diff options
author | Shinichiro Hamaji <shinichiro.hamaji@gmail.com> | 2016-02-12 16:32:42 +0900 |
---|---|---|
committer | Shinichiro Hamaji <shinichiro.hamaji@gmail.com> | 2016-02-12 19:38:51 +0900 |
commit | 1443689e8d54961e5288e144ba3a051e9e3bac8a (patch) | |
tree | 23d08212658f86bcd90e6c1079f17333516e788f | |
parent | 5d17287c243bfb9ecd750af3039f8650263e1788 (diff) | |
download | android_build_kati-1443689e8d54961e5288e144ba3a051e9e3bac8a.tar.gz android_build_kati-1443689e8d54961e5288e144ba3a051e9e3bac8a.tar.bz2 android_build_kati-1443689e8d54961e5288e144ba3a051e9e3bac8a.zip |
[C++] Use LCP merge sort for $(sort)
and use stable_sort on Mac.
On Linux:
LCPMS: 0.627s, sort: 3.37s, stable_sort: 1.79s, qsort: 1.95s
On Mac:
LCPMS: 1.583s, sort: 1.33s, stable_sort: 1.19s, qsort: 1.80s
-rw-r--r-- | Makefile.ckati | 1 | ||||
-rw-r--r-- | func.cc | 13 | ||||
-rw-r--r-- | lcp_msort.cc | 210 | ||||
-rw-r--r-- | lcp_msort.h | 25 | ||||
-rw-r--r-- | testcase/sort.mk | 6 |
5 files changed, 253 insertions, 2 deletions
diff --git a/Makefile.ckati b/Makefile.ckati index cf5bab8..4e41d51 100644 --- a/Makefile.ckati +++ b/Makefile.ckati @@ -35,6 +35,7 @@ KATI_CXX_SRCS := \ flags.cc \ func.cc \ io.cc \ + lcp_msort.cc \ log.cc \ main.cc \ mutex.cc \ @@ -30,6 +30,7 @@ #include "eval.h" #include "fileutil.h" #include "find.h" +#include "lcp_msort.h" #include "log.h" #include "parser.h" #include "stats.h" @@ -182,10 +183,15 @@ void FilterOutFunc(const vector<Value*>& args, Evaluator* ev, string* s) { } void SortFunc(const vector<Value*>& args, Evaluator* ev, string* s) { - const string&& list = args[0]->Eval(ev); + string list; + args[0]->Eval(ev, &list); + COLLECT_STATS("func sort time"); + // TODO(hamaji): Probably we could make LCP merge sort faster than + // stable_sort. +#if __APPLE___ vector<StringPiece> toks; WordScanner(list).Split(&toks); - sort(toks.begin(), toks.end()); + stable_sort(toks.begin(), toks.end()); WordWriter ww(s); StringPiece prev; for (StringPiece tok : toks) { @@ -194,6 +200,9 @@ void SortFunc(const vector<Value*>& args, Evaluator* ev, string* s) { prev = tok; } } +#else + StringSortByLcpMsort(&list, s); +#endif } static int GetNumericValueForFunc(const string& buf) { diff --git a/lcp_msort.cc b/lcp_msort.cc new file mode 100644 index 0000000..c062ea5 --- /dev/null +++ b/lcp_msort.cc @@ -0,0 +1,210 @@ +// Copyright 2016 Google Inc. All rights reserved +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// http://ci.nii.ac.jp/naid/110006613087 + +#include "lcp_msort.h" + +#include <stdlib.h> + +#include "strutil.h" + +#define DO_SORT_AND_UNIQ_AT_ONCE + +namespace { + +struct AnnotatedString { + const unsigned char* s; + int l; +}; + +inline int LcpCompare(const unsigned char* s1, + const unsigned char* s2, + int b, + int& i) { + for (i = b; s1[i] || s2[i]; i++) { + unsigned char c1 = s1[i]; + unsigned char c2 = s2[i]; + if (c1 < c2) { + return -1; + } + else if (c1 > c2) { + return 1; + } + } + return 0; +} + +#ifdef DO_SORT_AND_UNIQ_AT_ONCE + +int StringMergeSortAndUniq(const vector<const unsigned char*>& data, + int begin, + int end, + AnnotatedString* work, + AnnotatedString* out) { + if (begin + 1 == end) { + out[begin] = AnnotatedString{ data[begin], 0 }; + return 1; + } + + int mid = (begin + end) / 2; + int s1_len = StringMergeSortAndUniq(data, begin, mid, work, out); + int s2_len = StringMergeSortAndUniq(data, mid, end, work, out); + + memcpy(work, out + begin, s1_len * sizeof(AnnotatedString)); + + AnnotatedString* s1 = work; + AnnotatedString* s2 = out + mid; + int i = 0, j = 0, k = 0; + AnnotatedString* d = out + begin; + for (; i < s1_len && j < s2_len; k++) { + if (s1[i].l > s2[j].l) { + d[k] = s1[i]; + i++; + } else if (s1[i].l < s2[j].l) { + d[k] = s2[j]; + j++; + } else { + int m; + int r = LcpCompare(s1[i].s, s2[j].s, s1[i].l, m); + if (r < 0) { + d[k] = s1[i]; + i++; + s2[j].l = m; + } else if (r > 0) { + d[k] = s2[j]; + j++; + s1[i].l = m; + } else { + // Discard a unique string. + d[k] = s1[i]; + i++; + j++; + } + } + } + + if (i < s1_len) { + memcpy(&d[k], &s1[i], (s1_len - i) * sizeof(AnnotatedString)); + k += s1_len - i; + } + else if (j < s2_len) { + memcpy(&d[k], &s2[j], (s2_len - j) * sizeof(AnnotatedString)); + k += s2_len - j; + } + return k; +} + +#else + +void StringMergeSort(const vector<const unsigned char*>& data, + int begin, + int end, + AnnotatedString* work, + AnnotatedString* out) { + if (begin + 1 == end) { + out[begin] = AnnotatedString{ data[begin], 0 }; + return; + } + + int mid = (begin + end) / 2; + StringMergeSort(data, begin, mid, work, out); + StringMergeSort(data, mid, end, work, out); + + memcpy(work, out + begin, (mid - begin) * sizeof(AnnotatedString)); + + AnnotatedString* s1 = work; + int s1_len = mid - begin; + AnnotatedString* s2 = out + mid; + int s2_len = end - mid; + int i = 0, j = 0, k = 0; + AnnotatedString* d = out + begin; + for (; i < s1_len && j < s2_len; k++) { + if (s1[i].l > s2[j].l) { + d[k] = s1[i]; + i++; + } else if (s1[i].l < s2[j].l) { + d[k] = s2[j]; + j++; + } else { + int m; + int r = LcpCompare(s1[i].s, s2[j].s, s1[i].l, m); + if (r < 0) { + d[k] = s1[i]; + i++; + s2[j].l = m; + } else { + d[k] = s2[j]; + j++; + s1[i].l = m; + } + } + } + + if (i < s1_len) + memcpy(&d[k], &s1[i], (s1_len - i) * sizeof(AnnotatedString)); + else if (j < s2_len) + memcpy(&d[k], &s2[j], (s2_len - j) * sizeof(AnnotatedString)); +} + +#endif + +} // namespace + +void StringSortByLcpMsort(string* buf, string* out) { + vector<const unsigned char*> toks; + for (StringPiece tok : WordScanner(*buf)) { + const_cast<char*>(tok.data())[tok.size()] = 0; + toks.push_back(reinterpret_cast<const unsigned char*>(tok.data())); + } + + if (toks.empty()) + return; + if (toks.size() == 1) { + *out += reinterpret_cast<const char*>(toks[0]); + return; + } + + AnnotatedString* as = static_cast<AnnotatedString*>( + malloc(toks.size() * sizeof(AnnotatedString))); + AnnotatedString* work = static_cast<AnnotatedString*>( + malloc((toks.size() / 2 + 1) * sizeof(AnnotatedString))); + +#ifdef DO_SORT_AND_UNIQ_AT_ONCE + int len = StringMergeSortAndUniq(toks, 0, toks.size(), work, as); +#else + int len = static_cast<int>(toks.size()); + StringMergeSort(toks, 0, toks.size(), work, as); +#endif + + WordWriter ww(out); +#ifdef DO_SORT_AND_UNIQ_AT_ONCE + for (int i = 0; i < len; i++) { + ww.MaybeAddWhitespace(); + *out += reinterpret_cast<const char*>(as[i].s); + } +#else + StringPiece prev; + for (int i = 0; i < len; i++) { + StringPiece tok = reinterpret_cast<const char*>(as[i].s); + if (prev != tok) { + ww.Write(tok); + prev = tok; + } + } +#endif + + free(as); + free(work); +} diff --git a/lcp_msort.h b/lcp_msort.h new file mode 100644 index 0000000..a74b095 --- /dev/null +++ b/lcp_msort.h @@ -0,0 +1,25 @@ +// Copyright 2016 Google Inc. All rights reserved +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LCP_MSORT_H_ +#define LCP_MSORT_H_ + +#include <string> + +using namespace std; + +// Split |buf|, sort and unique tokens, and append results to |out|. +void StringSortByLcpMsort(string* buf, string* out); + +#endif // LCP_MSORT_H_ diff --git a/testcase/sort.mk b/testcase/sort.mk index c521289..03d0375 100644 --- a/testcase/sort.mk +++ b/testcase/sort.mk @@ -1,5 +1,11 @@ +sp := $(subst S, ,S) + test: echo $(sort foo bar lose) echo $(sort foo bar aaaa) echo $(sort foo bar lose lose foo bar bar) + echo $(sort baz bar) + echo $(sort single) + echo $(sort $(sp)foo$(sp)) echo $(sort ) + echo $(sort device/sample/products/AndroidProducts.mk device/moto/shamu/AndroidProducts.mk device/asus/fugu/AndroidProducts.mk device/asus/deb/AndroidProducts.mk device/asus/flo/AndroidProducts.mk device/generic/arm64/AndroidProducts.mk device/generic/qemu/AndroidProducts.mk device/generic/mini-emulator-x86_64/AndroidProducts.mk device/generic/x86/AndroidProducts.mk device/generic/mips/AndroidProducts.mk device/generic/mini-emulator-x86/AndroidProducts.mk device/generic/mini-emulator-mips/AndroidProducts.mk device/generic/mini-emulator-arm64/AndroidProducts.mk device/generic/mini-emulator-armv7-a-neon/AndroidProducts.mk device/generic/x86_64/AndroidProducts.mk device/generic/armv7-a-neon/AndroidProducts.mk device/htc/flounder/AndroidProducts.mk device/lge/bullhead/AndroidProducts.mk device/lge/hammerhead/AndroidProducts.mk device/huawei/angler/AndroidProducts.mk) |