aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShinichiro Hamaji <shinichiro.hamaji@gmail.com>2016-02-15 18:24:42 +0900
committerShinichiro Hamaji <shinichiro.hamaji@gmail.com>2016-02-15 19:11:52 +0900
commit2d353a0c5c1e5eee1e986334445063fd78cf1c97 (patch)
treea1f0a3e3ebd2273da0af4d78f918ab9720e9e140
parentdeb9290e423961a945ad7695bb291b8ede6960de (diff)
downloadplatform_build_kati-2d353a0c5c1e5eee1e986334445063fd78cf1c97.tar.gz
platform_build_kati-2d353a0c5c1e5eee1e986334445063fd78cf1c97.tar.bz2
platform_build_kati-2d353a0c5c1e5eee1e986334445063fd78cf1c97.zip
[C++] Always use std::stable_sort
It seems this is a fairly good choice even if we compare this against string-specific algorithms, probably because our strings are not usually very long.
-rw-r--r--Makefile.ckati1
-rw-r--r--func.cc9
-rw-r--r--lcp_msort.cc210
-rw-r--r--lcp_msort.h25
4 files changed, 2 insertions, 243 deletions
diff --git a/Makefile.ckati b/Makefile.ckati
index 4e41d51..cf5bab8 100644
--- a/Makefile.ckati
+++ b/Makefile.ckati
@@ -35,7 +35,6 @@ KATI_CXX_SRCS := \
flags.cc \
func.cc \
io.cc \
- lcp_msort.cc \
log.cc \
main.cc \
mutex.cc \
diff --git a/func.cc b/func.cc
index 5c5cf30..2ea8afa 100644
--- a/func.cc
+++ b/func.cc
@@ -30,7 +30,6 @@
#include "eval.h"
#include "fileutil.h"
#include "find.h"
-#include "lcp_msort.h"
#include "log.h"
#include "parser.h"
#include "stats.h"
@@ -186,9 +185,8 @@ void SortFunc(const vector<Value*>& args, Evaluator* ev, string* s) {
string list;
args[0]->Eval(ev, &list);
COLLECT_STATS("func sort time");
- // TODO(hamaji): Probably we could make LCP merge sort faster than
- // stable_sort.
-#if __APPLE___
+ // TODO(hamaji): Probably we could use a faster string-specific sort
+ // algorithm.
vector<StringPiece> toks;
WordScanner(list).Split(&toks);
stable_sort(toks.begin(), toks.end());
@@ -200,9 +198,6 @@ void SortFunc(const vector<Value*>& args, Evaluator* ev, string* s) {
prev = tok;
}
}
-#else
- StringSortByLcpMsort(&list, s);
-#endif
}
static int GetNumericValueForFunc(const string& buf) {
diff --git a/lcp_msort.cc b/lcp_msort.cc
deleted file mode 100644
index 30439c6..0000000
--- a/lcp_msort.cc
+++ /dev/null
@@ -1,210 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// http://ci.nii.ac.jp/naid/110006613087
-
-#include "lcp_msort.h"
-
-#include <stdlib.h>
-
-#include "strutil.h"
-
-#define DO_SORT_AND_UNIQ_AT_ONCE
-
-namespace {
-
-struct AnnotatedString {
- const unsigned char* s;
- int l;
-};
-
-inline int LcpCompare(const unsigned char* s1,
- const unsigned char* s2,
- int b,
- int& i) {
- for (i = b; s1[i] || s2[i]; i++) {
- unsigned char c1 = s1[i];
- unsigned char c2 = s2[i];
- if (c1 < c2) {
- return -1;
- }
- else if (c1 > c2) {
- return 1;
- }
- }
- return 0;
-}
-
-#ifdef DO_SORT_AND_UNIQ_AT_ONCE
-
-int StringMergeSortAndUniq(const vector<const unsigned char*>& data,
- int begin,
- int end,
- AnnotatedString* work,
- AnnotatedString* out) {
- if (begin + 1 == end) {
- out[begin] = AnnotatedString{ data[begin], 0 };
- return 1;
- }
-
- int mid = (begin + end) / 2;
- int s1_len = StringMergeSortAndUniq(data, begin, mid, work, out);
- int s2_len = StringMergeSortAndUniq(data, mid, end, work, out);
-
- memcpy(work, out + begin, s1_len * sizeof(AnnotatedString));
-
- AnnotatedString* s1 = work;
- AnnotatedString* s2 = out + mid;
- int i = 0, j = 0, k = 0;
- AnnotatedString* d = out + begin;
- for (; i < s1_len && j < s2_len; k++) {
- if (s1[i].l > s2[j].l) {
- d[k] = s1[i];
- i++;
- } else if (s1[i].l < s2[j].l) {
- d[k] = s2[j];
- j++;
- } else {
- int m;
- int r = LcpCompare(s1[i].s, s2[j].s, s1[i].l, m);
- if (r < 0) {
- d[k] = s1[i];
- i++;
- s2[j].l = m;
- } else if (r > 0) {
- d[k] = s2[j];
- j++;
- s1[i].l = m;
- } else {
- // Discard a unique string.
- d[k] = s1[i];
- i++;
- j++;
- }
- }
- }
-
- if (i < s1_len) {
- memcpy(&d[k], &s1[i], (s1_len - i) * sizeof(AnnotatedString));
- k += s1_len - i;
- }
- else if (j < s2_len) {
- memmove(&d[k], &s2[j], (s2_len - j) * sizeof(AnnotatedString));
- k += s2_len - j;
- }
- return k;
-}
-
-#else
-
-void StringMergeSort(const vector<const unsigned char*>& data,
- int begin,
- int end,
- AnnotatedString* work,
- AnnotatedString* out) {
- if (begin + 1 == end) {
- out[begin] = AnnotatedString{ data[begin], 0 };
- return;
- }
-
- int mid = (begin + end) / 2;
- StringMergeSort(data, begin, mid, work, out);
- StringMergeSort(data, mid, end, work, out);
-
- memcpy(work, out + begin, (mid - begin) * sizeof(AnnotatedString));
-
- AnnotatedString* s1 = work;
- int s1_len = mid - begin;
- AnnotatedString* s2 = out + mid;
- int s2_len = end - mid;
- int i = 0, j = 0, k = 0;
- AnnotatedString* d = out + begin;
- for (; i < s1_len && j < s2_len; k++) {
- if (s1[i].l > s2[j].l) {
- d[k] = s1[i];
- i++;
- } else if (s1[i].l < s2[j].l) {
- d[k] = s2[j];
- j++;
- } else {
- int m;
- int r = LcpCompare(s1[i].s, s2[j].s, s1[i].l, m);
- if (r < 0) {
- d[k] = s1[i];
- i++;
- s2[j].l = m;
- } else {
- d[k] = s2[j];
- j++;
- s1[i].l = m;
- }
- }
- }
-
- if (i < s1_len)
- memcpy(&d[k], &s1[i], (s1_len - i) * sizeof(AnnotatedString));
- else if (j < s2_len)
- memmove(&d[k], &s2[j], (s2_len - j) * sizeof(AnnotatedString));
-}
-
-#endif
-
-} // namespace
-
-void StringSortByLcpMsort(string* buf, string* out) {
- vector<const unsigned char*> toks;
- for (StringPiece tok : WordScanner(*buf)) {
- const_cast<char*>(tok.data())[tok.size()] = 0;
- toks.push_back(reinterpret_cast<const unsigned char*>(tok.data()));
- }
-
- if (toks.empty())
- return;
- if (toks.size() == 1) {
- *out += reinterpret_cast<const char*>(toks[0]);
- return;
- }
-
- AnnotatedString* as = static_cast<AnnotatedString*>(
- malloc(toks.size() * sizeof(AnnotatedString)));
- AnnotatedString* work = static_cast<AnnotatedString*>(
- malloc((toks.size() / 2 + 1) * sizeof(AnnotatedString)));
-
-#ifdef DO_SORT_AND_UNIQ_AT_ONCE
- int len = StringMergeSortAndUniq(toks, 0, toks.size(), work, as);
-#else
- int len = static_cast<int>(toks.size());
- StringMergeSort(toks, 0, toks.size(), work, as);
-#endif
-
- WordWriter ww(out);
-#ifdef DO_SORT_AND_UNIQ_AT_ONCE
- for (int i = 0; i < len; i++) {
- ww.MaybeAddWhitespace();
- *out += reinterpret_cast<const char*>(as[i].s);
- }
-#else
- StringPiece prev;
- for (int i = 0; i < len; i++) {
- StringPiece tok = reinterpret_cast<const char*>(as[i].s);
- if (prev != tok) {
- ww.Write(tok);
- prev = tok;
- }
- }
-#endif
-
- free(as);
- free(work);
-}
diff --git a/lcp_msort.h b/lcp_msort.h
deleted file mode 100644
index a74b095..0000000
--- a/lcp_msort.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef LCP_MSORT_H_
-#define LCP_MSORT_H_
-
-#include <string>
-
-using namespace std;
-
-// Split |buf|, sort and unique tokens, and append results to |out|.
-void StringSortByLcpMsort(string* buf, string* out);
-
-#endif // LCP_MSORT_H_