From 94c32c5f01c7d44781317bf23933ed0a5bc4b796 Mon Sep 17 00:00:00 2001 From: Mathieu Chartier Date: Fri, 9 Aug 2013 11:14:04 -0700 Subject: More parallel GC, rewritten parallel mark stack processing. Card scanning may now be done in parallel. This speeds up sticky and reduces pause times for all GC types. Speedup on my mako (ritz perf): Average pause time for sticky GC (~250 samples): Without parallel cards scanning enabled: 2.524904215ms Parallel card scanning (num_gc_threads_): 1.552123552ms Throughput (~250 samples): Sticky GC throughput with parallel card scanning: 69MB/s Sticky GC throughput without parallel card scanning: 51MB/s Rewrote the mark stack processing to be LIFO and use a prefetch queue like the non parallel version. Cleaned up some of the logcat printing for the activity manager process state listening. Added unlikely hints to object scanning since arrays and classes are scanned much less often than normal objects. Fixed a bug where the number of GC threads was clamped to 1 due to a bool instead of a size_t. Fixed a race condition when we added references to the reference queues. Sharded the reference queue lock into one lock for each reference type (weak, soft, phatom, finalizer). Changed timing splits to be different for processing gray objects with and without mutators paused since sticky GC does both. Mask out the class bit when visiting fields as an optimization, this is valid since classes are held live by the class linker. Partially completed: Parallel recursive mark + finger. Bug: 10245302 Bug: 9969166 Bug: 9986532 Bug: 9961698 Change-Id: I142d09718c4609b7c2387cb28f517a6983c73288 --- runtime/thread_pool.cc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'runtime/thread_pool.cc') diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc index 067ef2d5d8..f7fdcfb25c 100644 --- a/runtime/thread_pool.cc +++ b/runtime/thread_pool.cc @@ -23,6 +23,8 @@ namespace art { +static const bool kMeasureWaitTime = false; + ThreadPoolWorker::ThreadPoolWorker(ThreadPool* thread_pool, const std::string& name, size_t stack_size) : thread_pool_(thread_pool), @@ -64,7 +66,7 @@ void ThreadPool::AddTask(Thread* self, Task* task) { MutexLock mu(self, task_queue_lock_); tasks_.push_back(task); // If we have any waiters, signal one. - if (waiting_count_ != 0) { + if (started_ && waiting_count_ != 0) { task_queue_condition_.Signal(self); } } @@ -129,11 +131,13 @@ Task* ThreadPool::GetTask(Thread* self) { // We may be done, lets broadcast to the completion condition. completion_condition_.Broadcast(self); } - const uint64_t wait_start = NanoTime(); + const uint64_t wait_start = kMeasureWaitTime ? NanoTime() : 0; task_queue_condition_.Wait(self); - const uint64_t wait_end = NanoTime(); - total_wait_time_ += wait_end - std::max(wait_start, start_time_); - waiting_count_--; + if (kMeasureWaitTime) { + const uint64_t wait_end = NanoTime(); + total_wait_time_ += wait_end - std::max(wait_start, start_time_); + } + --waiting_count_; } // We are shutting down, return NULL to tell the worker thread to stop looping. -- cgit v1.2.3