aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShinichiro Hamaji <shinichiro.hamaji@gmail.com>2016-02-12 16:32:42 +0900
committerShinichiro Hamaji <shinichiro.hamaji@gmail.com>2016-02-12 19:38:51 +0900
commit1443689e8d54961e5288e144ba3a051e9e3bac8a (patch)
tree23d08212658f86bcd90e6c1079f17333516e788f
parent5d17287c243bfb9ecd750af3039f8650263e1788 (diff)
downloadandroid_build_kati-1443689e8d54961e5288e144ba3a051e9e3bac8a.tar.gz
android_build_kati-1443689e8d54961e5288e144ba3a051e9e3bac8a.tar.bz2
android_build_kati-1443689e8d54961e5288e144ba3a051e9e3bac8a.zip
[C++] Use LCP merge sort for $(sort)
and use stable_sort on Mac. On Linux: LCPMS: 0.627s, sort: 3.37s, stable_sort: 1.79s, qsort: 1.95s On Mac: LCPMS: 1.583s, sort: 1.33s, stable_sort: 1.19s, qsort: 1.80s
-rw-r--r--Makefile.ckati1
-rw-r--r--func.cc13
-rw-r--r--lcp_msort.cc210
-rw-r--r--lcp_msort.h25
-rw-r--r--testcase/sort.mk6
5 files changed, 253 insertions, 2 deletions
diff --git a/Makefile.ckati b/Makefile.ckati
index cf5bab8..4e41d51 100644
--- a/Makefile.ckati
+++ b/Makefile.ckati
@@ -35,6 +35,7 @@ KATI_CXX_SRCS := \
flags.cc \
func.cc \
io.cc \
+ lcp_msort.cc \
log.cc \
main.cc \
mutex.cc \
diff --git a/func.cc b/func.cc
index 49115d4..5c5cf30 100644
--- a/func.cc
+++ b/func.cc
@@ -30,6 +30,7 @@
#include "eval.h"
#include "fileutil.h"
#include "find.h"
+#include "lcp_msort.h"
#include "log.h"
#include "parser.h"
#include "stats.h"
@@ -182,10 +183,15 @@ void FilterOutFunc(const vector<Value*>& args, Evaluator* ev, string* s) {
}
void SortFunc(const vector<Value*>& args, Evaluator* ev, string* s) {
- const string&& list = args[0]->Eval(ev);
+ string list;
+ args[0]->Eval(ev, &list);
+ COLLECT_STATS("func sort time");
+ // TODO(hamaji): Probably we could make LCP merge sort faster than
+ // stable_sort.
+#if __APPLE___
vector<StringPiece> toks;
WordScanner(list).Split(&toks);
- sort(toks.begin(), toks.end());
+ stable_sort(toks.begin(), toks.end());
WordWriter ww(s);
StringPiece prev;
for (StringPiece tok : toks) {
@@ -194,6 +200,9 @@ void SortFunc(const vector<Value*>& args, Evaluator* ev, string* s) {
prev = tok;
}
}
+#else
+ StringSortByLcpMsort(&list, s);
+#endif
}
static int GetNumericValueForFunc(const string& buf) {
diff --git a/lcp_msort.cc b/lcp_msort.cc
new file mode 100644
index 0000000..c062ea5
--- /dev/null
+++ b/lcp_msort.cc
@@ -0,0 +1,210 @@
+// Copyright 2016 Google Inc. All rights reserved
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// http://ci.nii.ac.jp/naid/110006613087
+
+#include "lcp_msort.h"
+
+#include <stdlib.h>
+
+#include "strutil.h"
+
+#define DO_SORT_AND_UNIQ_AT_ONCE
+
+namespace {
+
+struct AnnotatedString {
+ const unsigned char* s;
+ int l;
+};
+
+inline int LcpCompare(const unsigned char* s1,
+ const unsigned char* s2,
+ int b,
+ int& i) {
+ for (i = b; s1[i] || s2[i]; i++) {
+ unsigned char c1 = s1[i];
+ unsigned char c2 = s2[i];
+ if (c1 < c2) {
+ return -1;
+ }
+ else if (c1 > c2) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+#ifdef DO_SORT_AND_UNIQ_AT_ONCE
+
+int StringMergeSortAndUniq(const vector<const unsigned char*>& data,
+ int begin,
+ int end,
+ AnnotatedString* work,
+ AnnotatedString* out) {
+ if (begin + 1 == end) {
+ out[begin] = AnnotatedString{ data[begin], 0 };
+ return 1;
+ }
+
+ int mid = (begin + end) / 2;
+ int s1_len = StringMergeSortAndUniq(data, begin, mid, work, out);
+ int s2_len = StringMergeSortAndUniq(data, mid, end, work, out);
+
+ memcpy(work, out + begin, s1_len * sizeof(AnnotatedString));
+
+ AnnotatedString* s1 = work;
+ AnnotatedString* s2 = out + mid;
+ int i = 0, j = 0, k = 0;
+ AnnotatedString* d = out + begin;
+ for (; i < s1_len && j < s2_len; k++) {
+ if (s1[i].l > s2[j].l) {
+ d[k] = s1[i];
+ i++;
+ } else if (s1[i].l < s2[j].l) {
+ d[k] = s2[j];
+ j++;
+ } else {
+ int m;
+ int r = LcpCompare(s1[i].s, s2[j].s, s1[i].l, m);
+ if (r < 0) {
+ d[k] = s1[i];
+ i++;
+ s2[j].l = m;
+ } else if (r > 0) {
+ d[k] = s2[j];
+ j++;
+ s1[i].l = m;
+ } else {
+ // Discard a unique string.
+ d[k] = s1[i];
+ i++;
+ j++;
+ }
+ }
+ }
+
+ if (i < s1_len) {
+ memcpy(&d[k], &s1[i], (s1_len - i) * sizeof(AnnotatedString));
+ k += s1_len - i;
+ }
+ else if (j < s2_len) {
+ memcpy(&d[k], &s2[j], (s2_len - j) * sizeof(AnnotatedString));
+ k += s2_len - j;
+ }
+ return k;
+}
+
+#else
+
+void StringMergeSort(const vector<const unsigned char*>& data,
+ int begin,
+ int end,
+ AnnotatedString* work,
+ AnnotatedString* out) {
+ if (begin + 1 == end) {
+ out[begin] = AnnotatedString{ data[begin], 0 };
+ return;
+ }
+
+ int mid = (begin + end) / 2;
+ StringMergeSort(data, begin, mid, work, out);
+ StringMergeSort(data, mid, end, work, out);
+
+ memcpy(work, out + begin, (mid - begin) * sizeof(AnnotatedString));
+
+ AnnotatedString* s1 = work;
+ int s1_len = mid - begin;
+ AnnotatedString* s2 = out + mid;
+ int s2_len = end - mid;
+ int i = 0, j = 0, k = 0;
+ AnnotatedString* d = out + begin;
+ for (; i < s1_len && j < s2_len; k++) {
+ if (s1[i].l > s2[j].l) {
+ d[k] = s1[i];
+ i++;
+ } else if (s1[i].l < s2[j].l) {
+ d[k] = s2[j];
+ j++;
+ } else {
+ int m;
+ int r = LcpCompare(s1[i].s, s2[j].s, s1[i].l, m);
+ if (r < 0) {
+ d[k] = s1[i];
+ i++;
+ s2[j].l = m;
+ } else {
+ d[k] = s2[j];
+ j++;
+ s1[i].l = m;
+ }
+ }
+ }
+
+ if (i < s1_len)
+ memcpy(&d[k], &s1[i], (s1_len - i) * sizeof(AnnotatedString));
+ else if (j < s2_len)
+ memcpy(&d[k], &s2[j], (s2_len - j) * sizeof(AnnotatedString));
+}
+
+#endif
+
+} // namespace
+
+void StringSortByLcpMsort(string* buf, string* out) {
+ vector<const unsigned char*> toks;
+ for (StringPiece tok : WordScanner(*buf)) {
+ const_cast<char*>(tok.data())[tok.size()] = 0;
+ toks.push_back(reinterpret_cast<const unsigned char*>(tok.data()));
+ }
+
+ if (toks.empty())
+ return;
+ if (toks.size() == 1) {
+ *out += reinterpret_cast<const char*>(toks[0]);
+ return;
+ }
+
+ AnnotatedString* as = static_cast<AnnotatedString*>(
+ malloc(toks.size() * sizeof(AnnotatedString)));
+ AnnotatedString* work = static_cast<AnnotatedString*>(
+ malloc((toks.size() / 2 + 1) * sizeof(AnnotatedString)));
+
+#ifdef DO_SORT_AND_UNIQ_AT_ONCE
+ int len = StringMergeSortAndUniq(toks, 0, toks.size(), work, as);
+#else
+ int len = static_cast<int>(toks.size());
+ StringMergeSort(toks, 0, toks.size(), work, as);
+#endif
+
+ WordWriter ww(out);
+#ifdef DO_SORT_AND_UNIQ_AT_ONCE
+ for (int i = 0; i < len; i++) {
+ ww.MaybeAddWhitespace();
+ *out += reinterpret_cast<const char*>(as[i].s);
+ }
+#else
+ StringPiece prev;
+ for (int i = 0; i < len; i++) {
+ StringPiece tok = reinterpret_cast<const char*>(as[i].s);
+ if (prev != tok) {
+ ww.Write(tok);
+ prev = tok;
+ }
+ }
+#endif
+
+ free(as);
+ free(work);
+}
diff --git a/lcp_msort.h b/lcp_msort.h
new file mode 100644
index 0000000..a74b095
--- /dev/null
+++ b/lcp_msort.h
@@ -0,0 +1,25 @@
+// Copyright 2016 Google Inc. All rights reserved
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LCP_MSORT_H_
+#define LCP_MSORT_H_
+
+#include <string>
+
+using namespace std;
+
+// Split |buf|, sort and unique tokens, and append results to |out|.
+void StringSortByLcpMsort(string* buf, string* out);
+
+#endif // LCP_MSORT_H_
diff --git a/testcase/sort.mk b/testcase/sort.mk
index c521289..03d0375 100644
--- a/testcase/sort.mk
+++ b/testcase/sort.mk
@@ -1,5 +1,11 @@
+sp := $(subst S, ,S)
+
test:
echo $(sort foo bar lose)
echo $(sort foo bar aaaa)
echo $(sort foo bar lose lose foo bar bar)
+ echo $(sort baz bar)
+ echo $(sort single)
+ echo $(sort $(sp)foo$(sp))
echo $(sort )
+ echo $(sort device/sample/products/AndroidProducts.mk device/moto/shamu/AndroidProducts.mk device/asus/fugu/AndroidProducts.mk device/asus/deb/AndroidProducts.mk device/asus/flo/AndroidProducts.mk device/generic/arm64/AndroidProducts.mk device/generic/qemu/AndroidProducts.mk device/generic/mini-emulator-x86_64/AndroidProducts.mk device/generic/x86/AndroidProducts.mk device/generic/mips/AndroidProducts.mk device/generic/mini-emulator-x86/AndroidProducts.mk device/generic/mini-emulator-mips/AndroidProducts.mk device/generic/mini-emulator-arm64/AndroidProducts.mk device/generic/mini-emulator-armv7-a-neon/AndroidProducts.mk device/generic/x86_64/AndroidProducts.mk device/generic/armv7-a-neon/AndroidProducts.mk device/htc/flounder/AndroidProducts.mk device/lge/bullhead/AndroidProducts.mk device/lge/hammerhead/AndroidProducts.mk device/huawei/angler/AndroidProducts.mk)