diff options
| author | Shinichiro Hamaji <shinichiro.hamaji@gmail.com> | 2016-02-15 18:24:42 +0900 |
|---|---|---|
| committer | Shinichiro Hamaji <shinichiro.hamaji@gmail.com> | 2016-02-15 19:11:52 +0900 |
| commit | 2d353a0c5c1e5eee1e986334445063fd78cf1c97 (patch) | |
| tree | a1f0a3e3ebd2273da0af4d78f918ab9720e9e140 | |
| parent | deb9290e423961a945ad7695bb291b8ede6960de (diff) | |
| download | platform_build_kati-2d353a0c5c1e5eee1e986334445063fd78cf1c97.tar.gz platform_build_kati-2d353a0c5c1e5eee1e986334445063fd78cf1c97.tar.bz2 platform_build_kati-2d353a0c5c1e5eee1e986334445063fd78cf1c97.zip | |
[C++] Always use std::stable_sort
It seems this is a fairly good choice even if we compare this
against string-specific algorithms, probably because our
strings are not usually very long.
| -rw-r--r-- | Makefile.ckati | 1 | ||||
| -rw-r--r-- | func.cc | 9 | ||||
| -rw-r--r-- | lcp_msort.cc | 210 | ||||
| -rw-r--r-- | lcp_msort.h | 25 |
4 files changed, 2 insertions, 243 deletions
diff --git a/Makefile.ckati b/Makefile.ckati index 4e41d51..cf5bab8 100644 --- a/Makefile.ckati +++ b/Makefile.ckati @@ -35,7 +35,6 @@ KATI_CXX_SRCS := \ flags.cc \ func.cc \ io.cc \ - lcp_msort.cc \ log.cc \ main.cc \ mutex.cc \ @@ -30,7 +30,6 @@ #include "eval.h" #include "fileutil.h" #include "find.h" -#include "lcp_msort.h" #include "log.h" #include "parser.h" #include "stats.h" @@ -186,9 +185,8 @@ void SortFunc(const vector<Value*>& args, Evaluator* ev, string* s) { string list; args[0]->Eval(ev, &list); COLLECT_STATS("func sort time"); - // TODO(hamaji): Probably we could make LCP merge sort faster than - // stable_sort. -#if __APPLE___ + // TODO(hamaji): Probably we could use a faster string-specific sort + // algorithm. vector<StringPiece> toks; WordScanner(list).Split(&toks); stable_sort(toks.begin(), toks.end()); @@ -200,9 +198,6 @@ void SortFunc(const vector<Value*>& args, Evaluator* ev, string* s) { prev = tok; } } -#else - StringSortByLcpMsort(&list, s); -#endif } static int GetNumericValueForFunc(const string& buf) { diff --git a/lcp_msort.cc b/lcp_msort.cc deleted file mode 100644 index 30439c6..0000000 --- a/lcp_msort.cc +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2016 Google Inc. All rights reserved -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// http://ci.nii.ac.jp/naid/110006613087 - -#include "lcp_msort.h" - -#include <stdlib.h> - -#include "strutil.h" - -#define DO_SORT_AND_UNIQ_AT_ONCE - -namespace { - -struct AnnotatedString { - const unsigned char* s; - int l; -}; - -inline int LcpCompare(const unsigned char* s1, - const unsigned char* s2, - int b, - int& i) { - for (i = b; s1[i] || s2[i]; i++) { - unsigned char c1 = s1[i]; - unsigned char c2 = s2[i]; - if (c1 < c2) { - return -1; - } - else if (c1 > c2) { - return 1; - } - } - return 0; -} - -#ifdef DO_SORT_AND_UNIQ_AT_ONCE - -int StringMergeSortAndUniq(const vector<const unsigned char*>& data, - int begin, - int end, - AnnotatedString* work, - AnnotatedString* out) { - if (begin + 1 == end) { - out[begin] = AnnotatedString{ data[begin], 0 }; - return 1; - } - - int mid = (begin + end) / 2; - int s1_len = StringMergeSortAndUniq(data, begin, mid, work, out); - int s2_len = StringMergeSortAndUniq(data, mid, end, work, out); - - memcpy(work, out + begin, s1_len * sizeof(AnnotatedString)); - - AnnotatedString* s1 = work; - AnnotatedString* s2 = out + mid; - int i = 0, j = 0, k = 0; - AnnotatedString* d = out + begin; - for (; i < s1_len && j < s2_len; k++) { - if (s1[i].l > s2[j].l) { - d[k] = s1[i]; - i++; - } else if (s1[i].l < s2[j].l) { - d[k] = s2[j]; - j++; - } else { - int m; - int r = LcpCompare(s1[i].s, s2[j].s, s1[i].l, m); - if (r < 0) { - d[k] = s1[i]; - i++; - s2[j].l = m; - } else if (r > 0) { - d[k] = s2[j]; - j++; - s1[i].l = m; - } else { - // Discard a unique string. - d[k] = s1[i]; - i++; - j++; - } - } - } - - if (i < s1_len) { - memcpy(&d[k], &s1[i], (s1_len - i) * sizeof(AnnotatedString)); - k += s1_len - i; - } - else if (j < s2_len) { - memmove(&d[k], &s2[j], (s2_len - j) * sizeof(AnnotatedString)); - k += s2_len - j; - } - return k; -} - -#else - -void StringMergeSort(const vector<const unsigned char*>& data, - int begin, - int end, - AnnotatedString* work, - AnnotatedString* out) { - if (begin + 1 == end) { - out[begin] = AnnotatedString{ data[begin], 0 }; - return; - } - - int mid = (begin + end) / 2; - StringMergeSort(data, begin, mid, work, out); - StringMergeSort(data, mid, end, work, out); - - memcpy(work, out + begin, (mid - begin) * sizeof(AnnotatedString)); - - AnnotatedString* s1 = work; - int s1_len = mid - begin; - AnnotatedString* s2 = out + mid; - int s2_len = end - mid; - int i = 0, j = 0, k = 0; - AnnotatedString* d = out + begin; - for (; i < s1_len && j < s2_len; k++) { - if (s1[i].l > s2[j].l) { - d[k] = s1[i]; - i++; - } else if (s1[i].l < s2[j].l) { - d[k] = s2[j]; - j++; - } else { - int m; - int r = LcpCompare(s1[i].s, s2[j].s, s1[i].l, m); - if (r < 0) { - d[k] = s1[i]; - i++; - s2[j].l = m; - } else { - d[k] = s2[j]; - j++; - s1[i].l = m; - } - } - } - - if (i < s1_len) - memcpy(&d[k], &s1[i], (s1_len - i) * sizeof(AnnotatedString)); - else if (j < s2_len) - memmove(&d[k], &s2[j], (s2_len - j) * sizeof(AnnotatedString)); -} - -#endif - -} // namespace - -void StringSortByLcpMsort(string* buf, string* out) { - vector<const unsigned char*> toks; - for (StringPiece tok : WordScanner(*buf)) { - const_cast<char*>(tok.data())[tok.size()] = 0; - toks.push_back(reinterpret_cast<const unsigned char*>(tok.data())); - } - - if (toks.empty()) - return; - if (toks.size() == 1) { - *out += reinterpret_cast<const char*>(toks[0]); - return; - } - - AnnotatedString* as = static_cast<AnnotatedString*>( - malloc(toks.size() * sizeof(AnnotatedString))); - AnnotatedString* work = static_cast<AnnotatedString*>( - malloc((toks.size() / 2 + 1) * sizeof(AnnotatedString))); - -#ifdef DO_SORT_AND_UNIQ_AT_ONCE - int len = StringMergeSortAndUniq(toks, 0, toks.size(), work, as); -#else - int len = static_cast<int>(toks.size()); - StringMergeSort(toks, 0, toks.size(), work, as); -#endif - - WordWriter ww(out); -#ifdef DO_SORT_AND_UNIQ_AT_ONCE - for (int i = 0; i < len; i++) { - ww.MaybeAddWhitespace(); - *out += reinterpret_cast<const char*>(as[i].s); - } -#else - StringPiece prev; - for (int i = 0; i < len; i++) { - StringPiece tok = reinterpret_cast<const char*>(as[i].s); - if (prev != tok) { - ww.Write(tok); - prev = tok; - } - } -#endif - - free(as); - free(work); -} diff --git a/lcp_msort.h b/lcp_msort.h deleted file mode 100644 index a74b095..0000000 --- a/lcp_msort.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2016 Google Inc. All rights reserved -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef LCP_MSORT_H_ -#define LCP_MSORT_H_ - -#include <string> - -using namespace std; - -// Split |buf|, sort and unique tokens, and append results to |out|. -void StringSortByLcpMsort(string* buf, string* out); - -#endif // LCP_MSORT_H_ |
