Merge "Add "benchmark" to time atomic operations"

author: Hans Boehm <hboehm@google.com> 2017-02-09 02:03:22 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> 2017-02-09 02:03:23 +0000
commit: 2bfb8c9180603bac97dca2d8e359f71bb11f7126 (patch)
tree: fa30aac07279d62c93d751318a654aa997fcd296
parent: f201c704d64ea40432123a90489d5dc88d613213 (diff)
parent: 3f5578708d02ef9a14bfaf5b169e0ecb672c10fd (diff)
download: android_bionic-2bfb8c9180603bac97dca2d8e359f71bb11f7126.tar.gz
android_bionic-2bfb8c9180603bac97dca2d8e359f71bb11f7126.tar.bz2
android_bionic-2bfb8c9180603bac97dca2d8e359f71bb11f7126.zip
2 files changed, 149 insertions, 0 deletions
diff --git a/benchmarks/Android.bp b/benchmarks/Android.bp
index 12f494062..3f95aa1f0 100644
--- a/benchmarks/Android.bp
+++ b/benchmarks/Android.bp
@@ -25,6 +25,7 @@ cc_defaults {
         "-Wunused",
     ],
     srcs: [
+        "atomic_benchmark.cpp",
         "math_benchmark.cpp",
         "property_benchmark.cpp",
         "pthread_benchmark.cpp",
diff --git a/benchmarks/atomic_benchmark.cpp b/benchmarks/atomic_benchmark.cpp
new file mode 100644
index 000000000..66a0120a1
--- /dev/null
+++ b/benchmarks/atomic_benchmark.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Our goal is to measure the cost of various C++ atomic operations.
+// Android doesn't really control those. But since some of these operations can be quite
+// expensive, this may be useful input for development of higher level code.
+// Expected mappings from C++ atomics to hardware primitives can be found at
+// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html .
+
+#include <benchmark/benchmark.h>
+#include <atomic>
+#include <mutex>
+
+// We time atomic operations separated by a volatile (not atomic!) increment.  This ensures
+// that the compiler emits memory instructions (e.g. load or store) prior to any fence or the
+// like.  That in turn ensures that the CPU has outstanding memory operations when the fence
+// is executed.
+
+// In most respects, we compute best case values. Since there is only one thread, there are no
+// coherence misses.
+
+// We assume that the compiler is not smart enough to optimize away fences in a single-threaded
+// program. If that changes, we'll need to add a second thread.
+
+volatile unsigned counter;
+
+std::atomic<int> test_loc(0);
+
+volatile unsigned sink;
+
+std::mutex mtx;
+
+void BM_empty(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    ++counter;
+  }
+}
+BENCHMARK(BM_empty);
+
+static void BM_load_relaxed(benchmark::State& state) {
+  unsigned result = 0;
+  while (state.KeepRunning()) {
+    result += test_loc.load(std::memory_order_relaxed);
+    ++counter;
+  }
+  sink = result;
+}
+BENCHMARK(BM_load_relaxed);
+
+static void BM_load_acquire(benchmark::State& state) {
+  unsigned result = 0;
+  while (state.KeepRunning()) {
+    result += test_loc.load(std::memory_order_acquire);
+    ++counter;
+  }
+  sink = result;
+}
+BENCHMARK(BM_load_acquire);
+
+static void BM_store_release(benchmark::State& state) {
+  int i = counter;
+  while (state.KeepRunning()) {
+    test_loc.store(++i, std::memory_order_release);
+    ++counter;
+  }
+}
+BENCHMARK(BM_store_release);
+
+static void BM_store_seq_cst(benchmark::State& state) {
+  int i = counter;
+  while (state.KeepRunning()) {
+    test_loc.store(++i, std::memory_order_seq_cst);
+    ++counter;
+  }
+}
+BENCHMARK(BM_store_seq_cst);
+
+static void BM_fetch_add_relaxed(benchmark::State& state) {
+  unsigned result = 0;
+  while (state.KeepRunning()) {
+    result += test_loc.fetch_add(1, std::memory_order_relaxed);
+    ++counter;
+  }
+  sink = result;
+}
+BENCHMARK(BM_fetch_add_relaxed);
+
+static void BM_fetch_add_seq_cst(benchmark::State& state) {
+  unsigned result = 0;
+  while (state.KeepRunning()) {
+    result += test_loc.fetch_add(1, std::memory_order_seq_cst);
+    ++counter;
+  }
+  sink = result;
+}
+BENCHMARK(BM_fetch_add_seq_cst);
+
+// The fence benchmarks include a relaxed load to make it much harder to optimize away
+// the fence.
+
+static void BM_acquire_fence(benchmark::State& state) {
+  unsigned result = 0;
+  while (state.KeepRunning()) {
+    result += test_loc.load(std::memory_order_relaxed);
+    std::atomic_thread_fence(std::memory_order_acquire);
+    ++counter;
+  }
+  sink = result;
+}
+BENCHMARK(BM_acquire_fence);
+
+static void BM_seq_cst_fence(benchmark::State& state) {
+  unsigned result = 0;
+  while (state.KeepRunning()) {
+    result += test_loc.load(std::memory_order_relaxed);
+    std::atomic_thread_fence(std::memory_order_seq_cst);
+    ++counter;
+  }
+  sink = result;
+}
+BENCHMARK(BM_seq_cst_fence);
+
+// For comparison, also throw in a critical section version:
+
+static void BM_fetch_add_cs(benchmark::State& state) {
+  unsigned result = 0;
+  while (state.KeepRunning()) {
+    {
+      std::lock_guard<std::mutex> _(mtx);
+      result += ++counter;
+    }
+  }
+  sink = result;
+}
+BENCHMARK(BM_fetch_add_cs);
author	Hans Boehm <hboehm@google.com>	2017-02-09 02:03:22 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	2017-02-09 02:03:23 +0000
commit	2bfb8c9180603bac97dca2d8e359f71bb11f7126 (patch)
tree	fa30aac07279d62c93d751318a654aa997fcd296
parent	f201c704d64ea40432123a90489d5dc88d613213 (diff)
parent	3f5578708d02ef9a14bfaf5b169e0ecb672c10fd (diff)
download	android_bionic-2bfb8c9180603bac97dca2d8e359f71bb11f7126.tar.gz android_bionic-2bfb8c9180603bac97dca2d8e359f71bb11f7126.tar.bz2 android_bionic-2bfb8c9180603bac97dca2d8e359f71bb11f7126.zip