10 files changed, 606 insertions, 190 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 4c19ba0b4c..cf703a03da 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -115,6 +115,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \
   runtime/gc/space/rosalloc_space_static_test.cc \
   runtime/gc/space/rosalloc_space_random_test.cc \
   runtime/gc/space/large_object_space_test.cc \
+  runtime/gc/task_processor_test.cc \
   runtime/gtest_test.cc \
   runtime/handle_scope_test.cc \
   runtime/indenter_test.cc \
diff --git a/runtime/Android.mk b/runtime/Android.mk
index ca29eba4ee..13a216c48b 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -67,6 +67,7 @@ LIBART_COMMON_SRC_FILES := \
   gc/space/rosalloc_space.cc \
   gc/space/space.cc \
   gc/space/zygote_space.cc \
+  gc/task_processor.cc \
   hprof/hprof.cc \
   image.cc \
   indirect_reference_table.cc \
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 8f09e074f7..26d6117122 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -52,6 +52,7 @@
 #include "gc/space/rosalloc_space-inl.h"
 #include "gc/space/space-inl.h"
 #include "gc/space/zygote_space.h"
+#include "gc/task_processor.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "heap-inl.h"
 #include "image.h"
@@ -129,10 +130,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
       foreground_collector_type_(foreground_collector_type),
       background_collector_type_(background_collector_type),
       desired_collector_type_(foreground_collector_type_),
-      heap_trim_request_lock_(nullptr),
-      last_trim_time_(0),
-      heap_transition_or_trim_target_time_(0),
-      heap_trim_request_pending_(false),
+      pending_task_lock_(nullptr),
       parallel_gc_threads_(parallel_gc_threads),
       conc_gc_threads_(conc_gc_threads),
       low_memory_mode_(low_memory_mode),
@@ -142,8 +140,6 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
       zygote_creation_lock_("zygote creation lock", kZygoteCreationLock),
       zygote_space_(nullptr),
       large_object_threshold_(large_object_threshold),
-      gc_request_pending_(false),
-      conc_gc_running_(false),
       collector_type_running_(kCollectorTypeNone),
       last_gc_type_(collector::kGcTypeNone),
       next_gc_type_(collector::kGcTypePartial),
@@ -194,6 +190,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
       min_interval_homogeneous_space_compaction_by_oom_(
           min_interval_homogeneous_space_compaction_by_oom),
       last_time_homogeneous_space_compaction_by_oom_(NanoTime()),
+      pending_collector_transition_(nullptr),
+      pending_heap_trim_(nullptr),
       use_homogeneous_space_compaction_for_oom_(use_homogeneous_space_compaction_for_oom) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
@@ -409,9 +407,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
   gc_complete_lock_ = new Mutex("GC complete lock");
   gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable",
                                                 *gc_complete_lock_));
-  gc_request_lock_ = new Mutex("GC request lock");
-  gc_request_cond_.reset(new ConditionVariable("GC request condition variable", *gc_request_lock_));
-  heap_trim_request_lock_ = new Mutex("Heap trim request lock");
+  task_processor_.reset(new TaskProcessor());
+  pending_task_lock_ = new Mutex("Pending task lock");
   if (ignore_max_footprint_) {
     SetIdealFootprint(std::numeric_limits<size_t>::max());
     concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
@@ -719,8 +716,8 @@ void Heap::VisitObjects(ObjectCallback callback, void* arg) {
     mirror::Object* obj = *it;
     if (obj != nullptr && obj->GetClass() != nullptr) {
       // Avoid the race condition caused by the object not yet being written into the allocation
-      // stack or the class not yet being written in the object. Or, if kUseThreadLocalAllocationStack,
-      // there can be nulls on the allocation stack.
+      // stack or the class not yet being written in the object. Or, if
+      // kUseThreadLocalAllocationStack, there can be nulls on the allocation stack.
       callback(obj, arg);
     }
   }
@@ -872,8 +869,7 @@ Heap::~Heap() {
   STLDeleteElements(&continuous_spaces_);
   STLDeleteElements(&discontinuous_spaces_);
   delete gc_complete_lock_;
-  delete gc_request_lock_;
-  delete heap_trim_request_lock_;
+  delete pending_task_lock_;
   VLOG(heap) << "Finished ~Heap()";
 }
 
@@ -944,37 +940,23 @@ void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType
   self->ThrowOutOfMemoryError(oss.str().c_str());
 }
 
-void Heap::DoPendingTransitionOrTrim() {
-  Thread* self = Thread::Current();
-  CollectorType desired_collector_type;
-  // Wait until we reach the desired transition time.
-  while (true) {
-    uint64_t wait_time;
-    {
-      MutexLock mu(self, *heap_trim_request_lock_);
-      desired_collector_type = desired_collector_type_;
-      uint64_t current_time = NanoTime();
-      if (current_time >= heap_transition_or_trim_target_time_) {
-        break;
-      }
-      wait_time = heap_transition_or_trim_target_time_ - current_time;
-    }
-    ScopedThreadStateChange tsc(self, kSleeping);
-    usleep(wait_time / 1000);  // Usleep takes microseconds.
-  }
+void Heap::DoPendingCollectorTransition() {
+  CollectorType desired_collector_type = desired_collector_type_;
   // Launch homogeneous space compaction if it is desired.
   if (desired_collector_type == kCollectorTypeHomogeneousSpaceCompact) {
     if (!CareAboutPauseTimes()) {
       PerformHomogeneousSpaceCompact();
+    } else {
+      VLOG(gc) << "Homogeneous compaction ignored due to jank perceptible process state";
     }
-    // No need to Trim(). Homogeneous space compaction may free more virtual and physical memory.
-    desired_collector_type = collector_type_;
-    return;
+  } else {
+    TransitionCollector(desired_collector_type);
   }
-  // Transition the collector if the desired collector type is not the same as the current
-  // collector type.
-  TransitionCollector(desired_collector_type);
+}
+
+void Heap::Trim(Thread* self) {
   if (!CareAboutPauseTimes()) {
+    ATRACE_BEGIN("Deflating monitors");
     // Deflate the monitors, this can cause a pause but shouldn't matter since we don't care
     // about pauses.
     Runtime* runtime = Runtime::Current();
@@ -984,9 +966,10 @@ void Heap::DoPendingTransitionOrTrim() {
     VLOG(heap) << "Deflating " << count << " monitors took "
         << PrettyDuration(NanoTime() - start_time);
     runtime->GetThreadList()->ResumeAll();
+    ATRACE_END();
   }
-  // Do a heap trim if it is needed.
-  Trim();
+  TrimIndirectReferenceTables(self);
+  TrimSpaces(self);
 }
 
 class TrimIndirectReferenceTableClosure : public Closure {
@@ -1004,17 +987,22 @@ class TrimIndirectReferenceTableClosure : public Closure {
   Barrier* const barrier_;
 };
 
-
-void Heap::Trim() {
-  Thread* self = Thread::Current();
-  {
-    MutexLock mu(self, *heap_trim_request_lock_);
-    if (!heap_trim_request_pending_ || last_trim_time_ + kHeapTrimWait >= NanoTime()) {
-      return;
-    }
-    last_trim_time_ = NanoTime();
-    heap_trim_request_pending_ = false;
-  }
+void Heap::TrimIndirectReferenceTables(Thread* self) {
+  ScopedObjectAccess soa(self);
+  ATRACE_BEGIN(__FUNCTION__);
+  JavaVMExt* vm = soa.Vm();
+  // Trim globals indirect reference table.
+  vm->TrimGlobals();
+  // Trim locals indirect reference tables.
+  Barrier barrier(0);
+  TrimIndirectReferenceTableClosure closure(&barrier);
+  ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
+  size_t barrier_count = Runtime::Current()->GetThreadList()->RunCheckpoint(&closure);
+  barrier.Increment(self, barrier_count);
+  ATRACE_END();
+}
+
+void Heap::TrimSpaces(Thread* self) {
   {
     // Need to do this before acquiring the locks since we don't want to get suspended while
     // holding any locks.
@@ -1026,20 +1014,8 @@ void Heap::Trim() {
     WaitForGcToCompleteLocked(kGcCauseTrim, self);
     collector_type_running_ = kCollectorTypeHeapTrim;
   }
-  // Trim reference tables.
-  {
-    ScopedObjectAccess soa(self);
-    JavaVMExt* vm = soa.Vm();
-    // Trim globals indirect reference table.
-    vm->TrimGlobals();
-    // Trim locals indirect reference tables.
-    Barrier barrier(0);
-    TrimIndirectReferenceTableClosure closure(&barrier);
-    ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
-    size_t barrier_count = Runtime::Current()->GetThreadList()->RunCheckpoint(&closure);
-    barrier.Increment(self, barrier_count);
-  }
-  uint64_t start_ns = NanoTime();
+  ATRACE_BEGIN(__FUNCTION__);
+  const uint64_t start_ns = NanoTime();
   // Trim the managed spaces.
   uint64_t total_alloc_space_allocated = 0;
   uint64_t total_alloc_space_size = 0;
@@ -1089,6 +1065,7 @@ void Heap::Trim() {
       << PrettyDuration(end_ns - gc_heap_end_ns) << ", advised=" << PrettySize(native_reclaimed)
       << ") heaps. Managed heap utilization of " << static_cast<int>(100 * managed_utilization)
       << "%.";
+  ATRACE_END();
 }
 
 bool Heap::IsValidObjectAddress(const mirror::Object* obj) const {
@@ -1639,7 +1616,6 @@ HomogeneousSpaceCompactResult Heap::PerformHomogeneousSpaceCompact() {
   return HomogeneousSpaceCompactResult::kSuccess;
 }
 
-
 void Heap::TransitionCollector(CollectorType collector_type) {
   if (collector_type == collector_type_) {
     return;
@@ -2207,7 +2183,7 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus
   collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
   total_objects_freed_ever_ += GetCurrentGcIteration()->GetFreedObjects();
   total_bytes_freed_ever_ += GetCurrentGcIteration()->GetFreedBytes();
-  RequestHeapTrim();
+  RequestTrim(self);
   // Enqueue cleared references.
   reference_processor_.EnqueueClearedReferences(self);
   // Grow the heap so that we know when to perform the next GC.
@@ -3032,52 +3008,109 @@ void Heap::RequestConcurrentGCAndSaveObject(Thread* self, mirror::Object** obj)
   RequestConcurrentGC(self);
 }
 
-void Heap::RequestConcurrentGC(Thread* self) {
-  // Make sure that we can do a concurrent GC.
+class Heap::ConcurrentGCTask : public HeapTask {
+ public:
+  explicit ConcurrentGCTask(uint64_t target_time) : HeapTask(target_time) { }
+  virtual void Run(Thread* self) OVERRIDE {
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    heap->ConcurrentGC(self);
+    heap->ClearConcurrentGCRequest();
+  }
+};
+
+static bool CanAddHeapTask(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_) {
   Runtime* runtime = Runtime::Current();
-  if (runtime == nullptr || !runtime->IsFinishedStarting() || runtime->IsShuttingDown(self) ||
-      self->IsHandlingStackOverflow()) {
-    return;
+  return runtime != nullptr && runtime->IsFinishedStarting() && !runtime->IsShuttingDown(self) &&
+      !self->IsHandlingStackOverflow();
+}
+
+void Heap::ClearConcurrentGCRequest() {
+  concurrent_gc_pending_.StoreRelaxed(false);
+}
+
+void Heap::RequestConcurrentGC(Thread* self) {
+  if (CanAddHeapTask(self) &&
+      concurrent_gc_pending_.CompareExchangeStrongSequentiallyConsistent(false, true)) {
+    task_processor_->AddTask(self, new ConcurrentGCTask(NanoTime()));  // Start straight away.
   }
-  NotifyConcurrentGCRequest(self);
 }
 
 void Heap::ConcurrentGC(Thread* self) {
-  if (Runtime::Current()->IsShuttingDown(self)) {
-    return;
-  }
-  // Wait for any GCs currently running to finish.
-  if (WaitForGcToComplete(kGcCauseBackground, self) == collector::kGcTypeNone) {
-    // If the we can't run the GC type we wanted to run, find the next appropriate one and try that
-    // instead. E.g. can't do partial, so do full instead.
-    if (CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false) ==
-        collector::kGcTypeNone) {
-      for (collector::GcType gc_type : gc_plan_) {
-        // Attempt to run the collector, if we succeed, we are done.
-        if (gc_type > next_gc_type_ &&
-            CollectGarbageInternal(gc_type, kGcCauseBackground, false) != collector::kGcTypeNone) {
-          break;
+  if (!Runtime::Current()->IsShuttingDown(self)) {
+    // Wait for any GCs currently running to finish.
+    if (WaitForGcToComplete(kGcCauseBackground, self) == collector::kGcTypeNone) {
+      // If the we can't run the GC type we wanted to run, find the next appropriate one and try that
+      // instead. E.g. can't do partial, so do full instead.
+      if (CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false) ==
+          collector::kGcTypeNone) {
+        for (collector::GcType gc_type : gc_plan_) {
+          // Attempt to run the collector, if we succeed, we are done.
+          if (gc_type > next_gc_type_ &&
+              CollectGarbageInternal(gc_type, kGcCauseBackground, false) !=
+                  collector::kGcTypeNone) {
+            break;
+          }
         }
       }
     }
   }
 }
 
+class Heap::CollectorTransitionTask : public HeapTask {
+ public:
+  explicit CollectorTransitionTask(uint64_t target_time) : HeapTask(target_time) { }
+  virtual void Run(Thread* self) OVERRIDE {
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    heap->DoPendingCollectorTransition();
+    heap->ClearPendingCollectorTransition(self);
+  }
+};
+
+void Heap::ClearPendingCollectorTransition(Thread* self) {
+  MutexLock mu(self, *pending_task_lock_);
+  pending_collector_transition_ = nullptr;
+}
+
 void Heap::RequestCollectorTransition(CollectorType desired_collector_type, uint64_t delta_time) {
   Thread* self = Thread::Current();
+  desired_collector_type_ = desired_collector_type;
+  if (desired_collector_type_ == collector_type_ || !CanAddHeapTask(self)) {
+    return;
+  }
+  CollectorTransitionTask* added_task = nullptr;
+  const uint64_t target_time = NanoTime() + delta_time;
   {
-    MutexLock mu(self, *heap_trim_request_lock_);
-    if (desired_collector_type_ == desired_collector_type) {
+    MutexLock mu(self, *pending_task_lock_);
+    // If we have an existing collector transition, update the targe time to be the new target.
+    if (pending_collector_transition_ != nullptr) {
+      task_processor_->UpdateTargetRunTime(self, pending_collector_transition_, target_time);
       return;
     }
-    heap_transition_or_trim_target_time_ =
-        std::max(heap_transition_or_trim_target_time_, NanoTime() + delta_time);
-    desired_collector_type_ = desired_collector_type;
+    added_task = new CollectorTransitionTask(target_time);
+    pending_collector_transition_ = added_task;
+  }
+  task_processor_->AddTask(self, added_task);
+}
+
+class Heap::HeapTrimTask : public HeapTask {
+ public:
+  explicit HeapTrimTask(uint64_t delta_time) : HeapTask(NanoTime() + delta_time) { }
+  virtual void Run(Thread* self) OVERRIDE {
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    heap->Trim(self);
+    heap->ClearPendingTrim(self);
   }
-  SignalHeapTrimDaemon(self);
+};
+
+void Heap::ClearPendingTrim(Thread* self) {
+  MutexLock mu(self, *pending_task_lock_);
+  pending_heap_trim_ = nullptr;
 }
 
-void Heap::RequestHeapTrim() {
+void Heap::RequestTrim(Thread* self) {
+  if (!CanAddHeapTask(self)) {
+    return;
+  }
   // GC completed and now we must decide whether to request a heap trim (advising pages back to the
   // kernel) or not. Issuing a request will also cause trimming of the libc heap. As a trim scans
   // a space it will hold its lock and can become a cause of jank.
@@ -3090,42 +3123,17 @@ void Heap::RequestHeapTrim() {
   // to utilization (which is probably inversely proportional to how much benefit we can expect).
   // We could try mincore(2) but that's only a measure of how many pages we haven't given away,
   // not how much use we're making of those pages.
-
-  Thread* self = Thread::Current();
-  Runtime* runtime = Runtime::Current();
-  if (runtime == nullptr || !runtime->IsFinishedStarting() || runtime->IsShuttingDown(self) ||
-      runtime->IsZygote()) {
-    // Ignore the request if we are the zygote to prevent app launching lag due to sleep in heap
-    // trimmer daemon. b/17310019
-    // Heap trimming isn't supported without a Java runtime or Daemons (such as at dex2oat time)
-    // Also: we do not wish to start a heap trim if the runtime is shutting down (a racy check
-    // as we don't hold the lock while requesting the trim).
-    return;
-  }
+  HeapTrimTask* added_task = nullptr;
   {
-    MutexLock mu(self, *heap_trim_request_lock_);
-    if (last_trim_time_ + kHeapTrimWait >= NanoTime()) {
-      // We have done a heap trim in the last kHeapTrimWait nanosecs, don't request another one
-      // just yet.
+    MutexLock mu(self, *pending_task_lock_);
+    if (pending_heap_trim_ != nullptr) {
+      // Already have a heap trim request in task processor, ignore this request.
       return;
     }
-    heap_trim_request_pending_ = true;
-    uint64_t current_time = NanoTime();
-    if (heap_transition_or_trim_target_time_ < current_time) {
-      heap_transition_or_trim_target_time_ = current_time + kHeapTrimWait;
-    }
+    added_task = new HeapTrimTask(kHeapTrimWait);
+    pending_heap_trim_ = added_task;
   }
-  // Notify the daemon thread which will actually do the heap trim.
-  SignalHeapTrimDaemon(self);
-}
-
-void Heap::SignalHeapTrimDaemon(Thread* self) {
-  JNIEnv* env = self->GetJniEnv();
-  DCHECK(WellKnownClasses::java_lang_Daemons != nullptr);
-  DCHECK(WellKnownClasses::java_lang_Daemons_requestHeapTrim != nullptr);
-  env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
-                            WellKnownClasses::java_lang_Daemons_requestHeapTrim);
-  CHECK(!env->ExceptionCheck());
+  task_processor_->AddTask(self, added_task);
 }
 
 void Heap::RevokeThreadLocalBuffers(Thread* thread) {
@@ -3153,7 +3161,7 @@ void Heap::RevokeAllThreadLocalBuffers() {
 }
 
 bool Heap::IsGCRequestPending() const {
-  return concurrent_start_bytes_ != std::numeric_limits<size_t>::max();
+  return concurrent_gc_pending_.LoadRelaxed();
 }
 
 void Heap::RunFinalization(JNIEnv* env) {
@@ -3235,7 +3243,7 @@ void Heap::AddModUnionTable(accounting::ModUnionTable* mod_union_table) {
 }
 
 void Heap::CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count) {
-  CHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) ||
+  CHECK(c == nullptr || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) ||
         (c->IsVariableSize() || c->GetObjectSize() == byte_count));
   CHECK_GE(byte_count, sizeof(mirror::Object));
 }
@@ -3272,25 +3280,5 @@ void Heap::ClearMarkedObjects() {
   }
 }
 
-void Heap::WaitForConcurrentGCRequest(Thread* self) {
-  ScopedThreadStateChange tsc(self, kBlocked);
-  MutexLock mu(self, *gc_request_lock_);
-  conc_gc_running_ = false;
-  while (!gc_request_pending_) {
-    gc_request_cond_->Wait(self);
-  }
-  gc_request_pending_ = false;
-  conc_gc_running_ = true;
-}
-
-void Heap::NotifyConcurrentGCRequest(Thread* self) {
-  ScopedThreadStateChange tsc(self, kBlocked);
-  MutexLock mu(self, *gc_request_lock_);
-  if (!conc_gc_running_) {
-    gc_request_pending_ = true;
-    gc_request_cond_->Signal(self);
-  }
-}
-
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index cf94eb6a9d..1738124c0c 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -57,6 +57,7 @@ namespace mirror {
 namespace gc {
 
 class ReferenceProcessor;
+class TaskProcessor;
 
 namespace accounting {
   class HeapBitmap;
@@ -470,11 +471,11 @@ class Heap {
 
   void DumpForSigQuit(std::ostream& os);
 
-  // Do a pending heap transition or trim.
-  void DoPendingTransitionOrTrim() LOCKS_EXCLUDED(heap_trim_request_lock_);
+  // Do a pending collector transition.
+  void DoPendingCollectorTransition();
 
-  // Trim the managed and native heaps by releasing unused memory back to the OS.
-  void Trim() LOCKS_EXCLUDED(heap_trim_request_lock_);
+  // Deflate monitors, ... and trim the spaces.
+  void Trim(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_);
 
   void RevokeThreadLocalBuffers(Thread* thread);
   void RevokeRosAllocThreadLocalBuffers(Thread* thread);
@@ -606,15 +607,25 @@ class Heap {
   ReferenceProcessor* GetReferenceProcessor() {
     return &reference_processor_;
   }
+  TaskProcessor* GetTaskProcessor() {
+    return task_processor_.get();
+  }
 
   bool HasZygoteSpace() const {
     return zygote_space_ != nullptr;
   }
 
-  void WaitForConcurrentGCRequest(Thread* self) LOCKS_EXCLUDED(gc_request_lock_);
-  void NotifyConcurrentGCRequest(Thread* self) LOCKS_EXCLUDED(gc_request_lock_);
+  // Request an asynchronous trim.
+  void RequestTrim(Thread* self) LOCKS_EXCLUDED(pending_task_lock_);
+
+  // Request asynchronous GC.
+  void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(pending_task_lock_);
 
  private:
+  class ConcurrentGCTask;
+  class CollectorTransitionTask;
+  class HeapTrimTask;
+
   // Compact source space to target space.
   void Compact(space::ContinuousMemMapAllocSpace* target_space,
                space::ContinuousMemMapAllocSpace* source_space,
@@ -705,12 +716,10 @@ class Heap {
       EXCLUSIVE_LOCKS_REQUIRED(gc_complete_lock_);
 
   void RequestCollectorTransition(CollectorType desired_collector_type, uint64_t delta_time)
-      LOCKS_EXCLUDED(heap_trim_request_lock_);
-  void RequestHeapTrim() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
+      LOCKS_EXCLUDED(pending_task_lock_);
+
   void RequestConcurrentGCAndSaveObject(Thread* self, mirror::Object** obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void RequestConcurrentGC(Thread* self)
-      LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
   bool IsGCRequestPending() const;
 
   // Sometimes CollectGarbageInternal decides to run a different Gc than you requested. Returns
@@ -771,10 +780,6 @@ class Heap {
   // Clear cards and update the mod union table.
   void ProcessCards(TimingLogger* timings, bool use_rem_sets);
 
-  // Signal the heap trim daemon that there is something to do, either a heap transition or heap
-  // trim.
-  void SignalHeapTrimDaemon(Thread* self);
-
   // Push an object onto the allocation stack.
   void PushOnAllocationStack(Thread* self, mirror::Object** obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -783,12 +788,22 @@ class Heap {
   void PushOnThreadLocalAllocationStackWithInternalGC(Thread* thread, mirror::Object** obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void ClearConcurrentGCRequest();
+  void ClearPendingTrim(Thread* self) LOCKS_EXCLUDED(pending_task_lock_);
+  void ClearPendingCollectorTransition(Thread* self) LOCKS_EXCLUDED(pending_task_lock_);
+
   // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark
   // sweep GC, false for other GC types.
   bool IsGcConcurrent() const ALWAYS_INLINE {
     return collector_type_ == kCollectorTypeCMS || collector_type_ == kCollectorTypeCC;
   }
 
+  // Trim the managed and native spaces by releasing unused memory back to the OS.
+  void TrimSpaces(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_);
+
+  // Trim 0 pages at the end of reference tables.
+  void TrimIndirectReferenceTables(Thread* self);
+
   // All-known continuous spaces, where objects lie within fixed bounds.
   std::vector<space::ContinuousSpace*> continuous_spaces_;
 
@@ -835,14 +850,8 @@ class Heap {
   // Desired collector type, heap trimming daemon transitions the heap if it is != collector_type_.
   CollectorType desired_collector_type_;
 
-  // Lock which guards heap trim requests.
-  Mutex* heap_trim_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  // When we want to perform the next heap trim (nano seconds).
-  uint64_t last_trim_time_ GUARDED_BY(heap_trim_request_lock_);
-  // When we want to perform the next heap transition (nano seconds) or heap trim.
-  uint64_t heap_transition_or_trim_target_time_ GUARDED_BY(heap_trim_request_lock_);
-  // If we have a heap trim request pending.
-  bool heap_trim_request_pending_ GUARDED_BY(heap_trim_request_lock_);
+  // Lock which guards pending tasks.
+  Mutex* pending_task_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
   // How many GC threads we may use for paused parts of garbage collection.
   const size_t parallel_gc_threads_;
@@ -879,15 +888,12 @@ class Heap {
   Mutex* gc_complete_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::unique_ptr<ConditionVariable> gc_complete_cond_ GUARDED_BY(gc_complete_lock_);
 
-  // Guards concurrent GC requests.
-  Mutex* gc_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::unique_ptr<ConditionVariable> gc_request_cond_ GUARDED_BY(gc_request_lock_);
-  bool gc_request_pending_ GUARDED_BY(gc_request_lock_);
-  bool conc_gc_running_ GUARDED_BY(gc_request_lock_);
-
   // Reference processor;
   ReferenceProcessor reference_processor_;
 
+  // Task processor, proxies heap trim requests to the daemon threads.
+  std::unique_ptr<TaskProcessor> task_processor_;
+
   // True while the garbage collector is running.
   volatile CollectorType collector_type_running_ GUARDED_BY(gc_complete_lock_);
 
@@ -1060,9 +1066,17 @@ class Heap {
   // Count for performed homogeneous space compaction.
   Atomic<size_t> count_performed_homogeneous_space_compaction_;
 
+  // Whether or not a concurrent GC is pending.
+  Atomic<bool> concurrent_gc_pending_;
+
+  // Active tasks which we can modify (change target time, desired collector type, etc..).
+  CollectorTransitionTask* pending_collector_transition_ GUARDED_BY(pending_task_lock_);
+  HeapTrimTask* pending_heap_trim_ GUARDED_BY(pending_task_lock_);
+
   // Whether or not we use homogeneous space compaction to avoid OOM errors.
   bool use_homogeneous_space_compaction_for_oom_;
 
+  friend class CollectorTransitionTask;
   friend class collector::GarbageCollector;
   friend class collector::MarkCompact;
   friend class collector::MarkSweep;
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 99bd63fa8a..01e8795669 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -23,11 +23,14 @@
 #include "reflection.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
+#include "task_processor.h"
 #include "well_known_classes.h"
 
 namespace art {
 namespace gc {
 
+static constexpr bool kAsyncReferenceQueueAdd = false;
+
 ReferenceProcessor::ReferenceProcessor()
     : process_references_args_(nullptr, nullptr, nullptr),
       preserving_references_(false),
@@ -213,17 +216,43 @@ void ReferenceProcessor::UpdateRoots(IsMarkedCallback* callback, void* arg) {
   cleared_references_.UpdateRoots(callback, arg);
 }
 
+class ClearedReferenceTask : public HeapTask {
+ public:
+  explicit ClearedReferenceTask(jobject cleared_references)
+      : HeapTask(NanoTime()), cleared_references_(cleared_references) {
+  }
+  virtual void Run(Thread* thread) {
+    ScopedObjectAccess soa(thread);
+    jvalue args[1];
+    args[0].l = cleared_references_;
+    InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_ReferenceQueue_add, args);
+    soa.Env()->DeleteGlobalRef(cleared_references_);
+  }
+
+ private:
+  const jobject cleared_references_;
+};
+
 void ReferenceProcessor::EnqueueClearedReferences(Thread* self) {
   Locks::mutator_lock_->AssertNotHeld(self);
+  // When a runtime isn't started there are no reference queues to care about so ignore.
   if (!cleared_references_.IsEmpty()) {
-    // When a runtime isn't started there are no reference queues to care about so ignore.
     if (LIKELY(Runtime::Current()->IsStarted())) {
-      ScopedObjectAccess soa(self);
-      ScopedLocalRef<jobject> arg(self->GetJniEnv(),
-                                  soa.AddLocalReference<jobject>(cleared_references_.GetList()));
-      jvalue args[1];
-      args[0].l = arg.get();
-      InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_ReferenceQueue_add, args);
+      jobject cleared_references;
+      {
+        ReaderMutexLock mu(self, *Locks::mutator_lock_);
+        cleared_references = self->GetJniEnv()->vm->AddGlobalRef(
+            self, cleared_references_.GetList());
+      }
+      if (kAsyncReferenceQueueAdd) {
+        // TODO: This can cause RunFinalization to terminate before newly freed objects are
+        // finalized since they may not be enqueued by the time RunFinalization starts.
+        Runtime::Current()->GetHeap()->GetTaskProcessor()->AddTask(
+            self, new ClearedReferenceTask(cleared_references));
+      } else {
+        ClearedReferenceTask task(cleared_references);
+        task.Run(self);
+      }
     }
     cleared_references_.Clear();
   }
diff --git a/runtime/gc/task_processor.cc b/runtime/gc/task_processor.cc
new file mode 100644
index 0000000000..1a3c6f5399
--- /dev/null
+++ b/runtime/gc/task_processor.cc
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "task_processor.h"
+
+#include "scoped_thread_state_change.h"
+
+namespace art {
+namespace gc {
+
+TaskProcessor::TaskProcessor()
+    : lock_(new Mutex("Task processor lock", kReferenceProcessorLock)), is_running_(false) {
+  // Piggyback off the reference processor lock level.
+  cond_.reset(new ConditionVariable("Task processor condition", *lock_));
+}
+
+TaskProcessor::~TaskProcessor() {
+  delete lock_;
+}
+
+void TaskProcessor::AddTask(Thread* self, HeapTask* task) {
+  ScopedThreadStateChange tsc(self, kBlocked);
+  MutexLock mu(self, *lock_);
+  tasks_.insert(task);
+  cond_->Signal(self);
+}
+
+HeapTask* TaskProcessor::GetTask(Thread* self) {
+  ScopedThreadStateChange tsc(self, kBlocked);
+  MutexLock mu(self, *lock_);
+  while (true) {
+    if (tasks_.empty()) {
+      if (!is_running_) {
+        return nullptr;
+      }
+      cond_->Wait(self);  // Empty queue, wait until we are signalled.
+    } else {
+      // Non empty queue, look at the top element and see if we are ready to run it.
+      const uint64_t current_time = NanoTime();
+      HeapTask* task = *tasks_.begin();
+      // If we are shutting down, return the task right away without waiting. Otherwise return the
+      // task if it is late enough.
+      uint64_t target_time = task->GetTargetRunTime();
+      if (!is_running_ || target_time <= current_time) {
+        tasks_.erase(tasks_.begin());
+        return task;
+      }
+      DCHECK_GT(target_time, current_time);
+      // Wait untl we hit the target run time.
+      const uint64_t delta_time = target_time - current_time;
+      const uint64_t ms_delta = NsToMs(delta_time);
+      const uint64_t ns_delta = delta_time - MsToNs(ms_delta);
+      cond_->TimedWait(self, static_cast<int64_t>(ms_delta), static_cast<int32_t>(ns_delta));
+    }
+  }
+  UNREACHABLE();
+  return nullptr;
+}
+
+void TaskProcessor::UpdateTargetRunTime(Thread* self, HeapTask* task, uint64_t new_target_time) {
+  MutexLock mu(self, *lock_);
+  // Find the task.
+  auto range = tasks_.equal_range(task);
+  for (auto it = range.first; it != range.second; ++it) {
+    if (*it == task) {
+      // Check if the target time was updated, if so re-insert then wait.
+      if (new_target_time != task->GetTargetRunTime()) {
+        tasks_.erase(it);
+        task->SetTargetRunTime(new_target_time);
+        tasks_.insert(task);
+        // If we became the first task then we may need to signal since we changed the task that we
+        // are sleeping on.
+        if (*tasks_.begin() == task) {
+          cond_->Signal(self);
+        }
+        return;
+      }
+    }
+  }
+}
+
+bool TaskProcessor::IsRunning() const {
+  MutexLock mu(Thread::Current(), *lock_);
+  return is_running_;
+}
+
+void TaskProcessor::Stop(Thread* self) {
+  MutexLock mu(self, *lock_);
+  is_running_ = false;
+  cond_->Broadcast(self);
+}
+
+void TaskProcessor::Start(Thread* self) {
+  MutexLock mu(self, *lock_);
+  is_running_ = true;
+}
+
+void TaskProcessor::RunAllTasks(Thread* self) {
+  while (true) {
+    // Wait and get a task, may be interrupted.
+    HeapTask* task = GetTask(self);
+    if (task != nullptr) {
+      task->Run(self);
+      task->Finalize();
+    } else if (!IsRunning()) {
+      break;
+    }
+  }
+}
+
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/task_processor.h b/runtime/gc/task_processor.h
new file mode 100644
index 0000000000..765f03557e
--- /dev/null
+++ b/runtime/gc/task_processor.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_TASK_PROCESSOR_H_
+#define ART_RUNTIME_GC_TASK_PROCESSOR_H_
+
+#include <memory>
+#include <set>
+
+#include "base/mutex.h"
+#include "globals.h"
+#include "thread_pool.h"
+
+namespace art {
+namespace gc {
+
+class HeapTask : public SelfDeletingTask {
+ public:
+  explicit HeapTask(uint64_t target_run_time) : target_run_time_(target_run_time) {
+  }
+  uint64_t GetTargetRunTime() const {
+    return target_run_time_;
+  }
+
+ private:
+  // Update the updated_target_run_time_, the task processor will re-insert the task when it is
+  // popped and update the target_run_time_.
+  void SetTargetRunTime(uint64_t new_target_run_time) {
+    target_run_time_ = new_target_run_time;
+  }
+
+  // Time in ns at which we want the task to run.
+  uint64_t target_run_time_;
+
+  friend class TaskProcessor;
+};
+
+// Used to process GC tasks (heap trim, heap transitions, concurrent GC).
+class TaskProcessor {
+ public:
+  TaskProcessor();
+  virtual ~TaskProcessor();
+  void AddTask(Thread* self, HeapTask* task) LOCKS_EXCLUDED(lock_);
+  HeapTask* GetTask(Thread* self) LOCKS_EXCLUDED(lock_);
+  void Start(Thread* self) LOCKS_EXCLUDED(lock_);
+  // Stop tells the RunAllTasks to finish up the remaining tasks as soon as
+  // possible then return.
+  void Stop(Thread* self) LOCKS_EXCLUDED(lock_);
+  void RunAllTasks(Thread* self) LOCKS_EXCLUDED(lock_);
+  bool IsRunning() const LOCKS_EXCLUDED(lock_);
+  void UpdateTargetRunTime(Thread* self, HeapTask* target_time, uint64_t new_target_time)
+      LOCKS_EXCLUDED(lock_);
+
+ private:
+  class CompareByTargetRunTime {
+   public:
+    bool operator()(const HeapTask* a, const HeapTask* b) const {
+      return a->GetTargetRunTime() < b->GetTargetRunTime();
+    }
+  };
+
+  mutable Mutex* lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  bool is_running_ GUARDED_BY(lock_);
+  std::unique_ptr<ConditionVariable> cond_ GUARDED_BY(lock_);
+  std::multiset<HeapTask*, CompareByTargetRunTime> tasks_ GUARDED_BY(lock_);
+};
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_TASK_PROCESSOR_H_
diff --git a/runtime/gc/task_processor_test.cc b/runtime/gc/task_processor_test.cc
new file mode 100644
index 0000000000..5dd6d8fb7b
--- /dev/null
+++ b/runtime/gc/task_processor_test.cc
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_runtime_test.h"
+#include "task_processor.h"
+#include "thread_pool.h"
+#include "thread-inl.h"
+#include "utils.h"
+
+namespace art {
+namespace gc {
+
+class TaskProcessorTest : public CommonRuntimeTest {
+ public:
+};
+
+class RecursiveTask : public HeapTask {
+ public:
+  RecursiveTask(TaskProcessor* task_processor, Atomic<size_t>* counter, size_t max_recursion)
+     : HeapTask(NanoTime() + MsToNs(10)), task_processor_(task_processor), counter_(counter),
+       max_recursion_(max_recursion) {
+  }
+  virtual void Run(Thread* self) OVERRIDE {
+    if (max_recursion_ > 0) {
+      task_processor_->AddTask(self,
+                               new RecursiveTask(task_processor_, counter_, max_recursion_ - 1));
+      counter_->FetchAndAddSequentiallyConsistent(1U);
+    }
+  }
+
+ private:
+  TaskProcessor* const task_processor_;
+  Atomic<size_t>* const counter_;
+  const size_t max_recursion_;
+};
+
+class WorkUntilDoneTask : public SelfDeletingTask {
+ public:
+  WorkUntilDoneTask(TaskProcessor* task_processor, Atomic<bool>* done_running)
+      : task_processor_(task_processor), done_running_(done_running) {
+  }
+  virtual void Run(Thread* self) OVERRIDE {
+    task_processor_->RunAllTasks(self);
+    done_running_->StoreSequentiallyConsistent(true);
+  }
+
+ private:
+  TaskProcessor* const task_processor_;
+  Atomic<bool>* done_running_;
+};
+
+TEST_F(TaskProcessorTest, Interrupt) {
+  ThreadPool thread_pool("task processor test", 1U);
+  Thread* const self = Thread::Current();
+  TaskProcessor task_processor;
+  static constexpr size_t kRecursion = 10;
+  Atomic<bool> done_running(false);
+  Atomic<size_t> counter(0);
+  task_processor.AddTask(self, new RecursiveTask(&task_processor, &counter, kRecursion));
+  task_processor.Start(self);
+  // Add a task which will wait until interrupted to the thread pool.
+  thread_pool.AddTask(self, new WorkUntilDoneTask(&task_processor, &done_running));
+  thread_pool.StartWorkers(self);
+  ASSERT_FALSE(done_running);
+  // Wait until all the tasks are done, but since we didn't interrupt, done_running should be 0.
+  while (counter.LoadSequentiallyConsistent() != kRecursion) {
+    usleep(10);
+  }
+  ASSERT_FALSE(done_running);
+  task_processor.Stop(self);
+  thread_pool.Wait(self, true, false);
+  // After the interrupt and wait, the WorkUntilInterruptedTasktask should have terminated and
+  // set done_running_ to true.
+  ASSERT_TRUE(done_running.LoadSequentiallyConsistent());
+
+  // Test that we finish remaining tasks before returning from RunTasksUntilInterrupted.
+  counter.StoreSequentiallyConsistent(0);
+  done_running.StoreSequentiallyConsistent(false);
+  // Self interrupt before any of the other tasks run, but since we added them we should keep on
+  // working until all the tasks are completed.
+  task_processor.Stop(self);
+  task_processor.AddTask(self, new RecursiveTask(&task_processor, &counter, kRecursion));
+  thread_pool.AddTask(self, new WorkUntilDoneTask(&task_processor, &done_running));
+  thread_pool.StartWorkers(self);
+  thread_pool.Wait(self, true, false);
+  ASSERT_TRUE(done_running.LoadSequentiallyConsistent());
+  ASSERT_EQ(counter.LoadSequentiallyConsistent(), kRecursion);
+}
+
+class TestOrderTask : public HeapTask {
+ public:
+  explicit TestOrderTask(uint64_t expected_time, size_t expected_counter, size_t* counter)
+     : HeapTask(expected_time), expected_counter_(expected_counter), counter_(counter) {
+  }
+  virtual void Run(Thread* thread) OVERRIDE {
+    UNUSED(thread);  // Fix cppling bug.
+    ASSERT_EQ(*counter_, expected_counter_);
+    ++*counter_;
+  }
+
+ private:
+  const size_t expected_counter_;
+  size_t* const counter_;
+};
+
+TEST_F(TaskProcessorTest, Ordering) {
+  static const size_t kNumTasks = 25;
+  const uint64_t current_time = NanoTime();
+  Thread* const self = Thread::Current();
+  TaskProcessor task_processor;
+  task_processor.Stop(self);
+  size_t counter = 0;
+  std::vector<std::pair<uint64_t, size_t>> orderings;
+  for (size_t i = 0; i < kNumTasks; ++i) {
+    orderings.push_back(std::make_pair(current_time + MsToNs(10U * i), i));
+  }
+  for (size_t i = 0; i < kNumTasks; ++i) {
+    std::swap(orderings[i], orderings[(i * 87654231 + 12345) % orderings.size()]);
+  }
+  for (const auto& pair : orderings) {
+    auto* task = new TestOrderTask(pair.first, pair.second, &counter);
+    task_processor.AddTask(self, task);
+  }
+  ThreadPool thread_pool("task processor test", 1U);
+  Atomic<bool> done_running(false);
+  // Add a task which will wait until interrupted to the thread pool.
+  thread_pool.AddTask(self, new WorkUntilDoneTask(&task_processor, &done_running));
+  ASSERT_FALSE(done_running.LoadSequentiallyConsistent());
+  thread_pool.StartWorkers(self);
+  thread_pool.Wait(self, true, false);
+  ASSERT_TRUE(done_running.LoadSequentiallyConsistent());
+  ASSERT_EQ(counter, kNumTasks);
+}
+
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index a348432340..f503b354f7 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -34,6 +34,7 @@
 #include "gc/heap.h"
 #include "gc/space/dlmalloc_space.h"
 #include "gc/space/image_space.h"
+#include "gc/task_processor.h"
 #include "intern_table.h"
 #include "jni_internal.h"
 #include "mirror/art_method-inl.h"
@@ -213,19 +214,32 @@ static void VMRuntime_updateProcessState(JNIEnv*, jobject, jint process_state) {
   runtime->UpdateProfilerState(process_state);
 }
 
-static void VMRuntime_trimHeap(JNIEnv*, jobject) {
-  Runtime::Current()->GetHeap()->DoPendingTransitionOrTrim();
+static void VMRuntime_trimHeap(JNIEnv* env, jobject) {
+  Runtime::Current()->GetHeap()->Trim(ThreadForEnv(env));
 }
 
 static void VMRuntime_concurrentGC(JNIEnv* env, jobject) {
   Runtime::Current()->GetHeap()->ConcurrentGC(ThreadForEnv(env));
 }
 
+static void VMRuntime_requestHeapTrim(JNIEnv* env, jobject) {
+  Runtime::Current()->GetHeap()->RequestTrim(ThreadForEnv(env));
+}
+
 static void VMRuntime_requestConcurrentGC(JNIEnv* env, jobject) {
-  Runtime::Current()->GetHeap()->NotifyConcurrentGCRequest(ThreadForEnv(env));
+  Runtime::Current()->GetHeap()->RequestConcurrentGC(ThreadForEnv(env));
 }
-static void VMRuntime_waitForConcurrentGCRequest(JNIEnv* env, jobject) {
-  Runtime::Current()->GetHeap()->WaitForConcurrentGCRequest(ThreadForEnv(env));
+
+static void VMRuntime_startHeapTaskProcessor(JNIEnv* env, jobject) {
+  Runtime::Current()->GetHeap()->GetTaskProcessor()->Start(ThreadForEnv(env));
+}
+
+static void VMRuntime_stopHeapTaskProcessor(JNIEnv* env, jobject) {
+  Runtime::Current()->GetHeap()->GetTaskProcessor()->Stop(ThreadForEnv(env));
+}
+
+static void VMRuntime_runHeapTasks(JNIEnv* env, jobject) {
+  Runtime::Current()->GetHeap()->GetTaskProcessor()->RunAllTasks(ThreadForEnv(env));
 }
 
 typedef std::map<std::string, mirror::String*> StringTable;
@@ -566,8 +580,6 @@ static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(VMRuntime, classPath, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, clearGrowthLimit, "()V"),
   NATIVE_METHOD(VMRuntime, concurrentGC, "()V"),
-  NATIVE_METHOD(VMRuntime, requestConcurrentGC, "()V"),
-  NATIVE_METHOD(VMRuntime, waitForConcurrentGCRequest, "()V"),
   NATIVE_METHOD(VMRuntime, disableJitCompilation, "()V"),
   NATIVE_METHOD(VMRuntime, getTargetHeapUtilization, "()F"),
   NATIVE_METHOD(VMRuntime, isDebuggerActive, "!()Z"),
@@ -578,8 +590,13 @@ static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(VMRuntime, setTargetSdkVersionNative, "(I)V"),
   NATIVE_METHOD(VMRuntime, registerNativeAllocation, "(I)V"),
   NATIVE_METHOD(VMRuntime, registerNativeFree, "(I)V"),
+  NATIVE_METHOD(VMRuntime, requestConcurrentGC, "()V"),
+  NATIVE_METHOD(VMRuntime, requestHeapTrim, "()V"),
+  NATIVE_METHOD(VMRuntime, runHeapTasks, "()V"),
   NATIVE_METHOD(VMRuntime, updateProcessState, "(I)V"),
+  NATIVE_METHOD(VMRuntime, startHeapTaskProcessor, "()V"),
   NATIVE_METHOD(VMRuntime, startJitCompilation, "()V"),
+  NATIVE_METHOD(VMRuntime, stopHeapTaskProcessor, "()V"),
   NATIVE_METHOD(VMRuntime, trimHeap, "()V"),
   NATIVE_METHOD(VMRuntime, vmVersion, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, vmLibrary, "()Ljava/lang/String;"),
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index 8c080673f9..79b57afedd 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -36,10 +36,18 @@ class Closure {
 
 class Task : public Closure {
  public:
-  // Called when references reaches 0.
+  // Called after Closure::Run has been called.
   virtual void Finalize() { }
 };
 
+class SelfDeletingTask : public Task {
+ public:
+  virtual ~SelfDeletingTask() { }
+  virtual void Finalize() {
+    delete this;
+  }
+};
+
 class ThreadPoolWorker {
  public:
   static const size_t kDefaultStackSize = 1 * MB;