Opt Compiler: ARM64: Enable explicit memory barriers over acquire/release

Implement remaining explicit memory barrier code paths and temporarily enable the use of explicit memory barriers for testing. This CL also enables the use of instruction set features in the ARM64 backend. kUseAcquireRelease has been replaced with PreferAcquireRelease(), which for now is statically set to false (prefer explicit memory barriers). Please note that we still prefer acquire-release for the ARM64 Optimizing Compiler, but we would like to exercise the explicit memory barrier code path too. Change-Id: I84e047ecd43b6fbefc5b82cf532e3f5c59076458 Signed-off-by: Serban Constantinescu <serban.constantinescu@arm.com>
author: Serban Constantinescu <serban.constantinescu@arm.com> 2015-02-22 20:51:33 +0000
committer: Serban Constantinescu <serban.constantinescu@arm.com> 2015-03-02 14:16:56 +0000
commit: 579885a26d761f5ba9550f2a1cd7f0f598c2e1e3 (patch)
tree: 58d144157b7a24bbdf7f8892631a15abeefa2c9f /compiler
parent: 2eb5168bd9e43b80452eaee5be32c063e124886e (diff)
download: android_art-579885a26d761f5ba9550f2a1cd7f0f598c2e1e3.tar.gz
android_art-579885a26d761f5ba9550f2a1cd7f0f598c2e1e3.tar.bz2
android_art-579885a26d761f5ba9550f2a1cd7f0f598c2e1e3.zip
5 files changed, 59 insertions, 32 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 2a57fdc929..ba5f7d8fab 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -386,7 +386,9 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph,
           compiler_options);
     }
     case kArm64: {
-      return new arm64::CodeGeneratorARM64(graph, compiler_options);
+      return new arm64::CodeGeneratorARM64(graph,
+          *isa_features.AsArm64InstructionSetFeatures(),
+          compiler_options);
     }
     case kMips:
       return nullptr;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 729bab78a6..c21084a6fe 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -16,6 +16,7 @@
 
 #include "code_generator_arm64.h"
 
+#include "arch/arm64/instruction_set_features_arm64.h"
 #include "common_arm64.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
@@ -397,7 +398,9 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
   return next_location;
 }
 
-CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options)
+CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
+                                       const Arm64InstructionSetFeatures& isa_features,
+                                       const CompilerOptions& compiler_options)
     : CodeGenerator(graph,
                     kNumberOfAllocatableRegisters,
                     kNumberOfAllocatableFPRegisters,
@@ -408,7 +411,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& com
       block_labels_(nullptr),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
-      move_resolver_(graph->GetArena(), this) {
+      move_resolver_(graph->GetArena(), this),
+      isa_features_(isa_features) {
   // Save the link register (containing the return address) to mimic Quick.
   AddAllocatedRegister(LocationFrom(lr));
 }
@@ -998,9 +1002,10 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
   size_t status_offset = mirror::Class::StatusOffset().SizeValue();
+  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
-  if (kUseAcquireRelease) {
+  if (use_acquire_release) {
     // TODO(vixl): Let the MacroAssembler handle MemOperand.
     __ Add(temp, class_reg, status_offset);
     __ Ldar(temp, HeapOperand(temp));
@@ -1689,9 +1694,10 @@ void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction
 
 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
+  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   if (instruction->IsVolatile()) {
-    if (kUseAcquireRelease) {
+    if (use_acquire_release) {
       // NB: LoadAcquire will record the pc info if needed.
       codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
     } else {
@@ -1718,9 +1724,10 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins
   CPURegister value = InputCPURegisterAt(instruction, 1);
   Offset offset = instruction->GetFieldOffset();
   Primitive::Type field_type = instruction->GetFieldType();
+  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   if (instruction->IsVolatile()) {
-    if (kUseAcquireRelease) {
+    if (use_acquire_release) {
       codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset));
       codegen_->MaybeRecordImplicitNullCheck(instruction);
     } else {
@@ -2437,9 +2444,10 @@ void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
 
 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
+  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   if (instruction->IsVolatile()) {
-    if (kUseAcquireRelease) {
+    if (use_acquire_release) {
       // NB: LoadAcquire will record the pc info if needed.
       codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
     } else {
@@ -2464,9 +2472,10 @@ void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruc
   CPURegister value = InputCPURegisterAt(instruction, 1);
   Offset offset = instruction->GetFieldOffset();
   Primitive::Type field_type = instruction->GetFieldType();
+  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   if (instruction->IsVolatile()) {
-    if (kUseAcquireRelease) {
+    if (use_acquire_release) {
       codegen_->StoreRelease(field_type, value, HeapOperand(cls, offset));
     } else {
       GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index afb7fc3718..48961d68e9 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -32,10 +32,6 @@ namespace arm64 {
 
 class CodeGeneratorARM64;
 
-// TODO: Tune the use of Load-Acquire, Store-Release vs Data Memory Barriers.
-// For now we prefer the use of load-acquire, store-release over explicit memory barriers.
-static constexpr bool kUseAcquireRelease = true;
-
 // Use a local definition to prevent copying mistakes.
 static constexpr size_t kArm64WordSize = kArm64PointerSize;
 
@@ -195,7 +191,9 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolver {
 
 class CodeGeneratorARM64 : public CodeGenerator {
  public:
-  CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options);
+  CodeGeneratorARM64(HGraph* graph,
+                     const Arm64InstructionSetFeatures& isa_features,
+                     const CompilerOptions& compiler_options);
   virtual ~CodeGeneratorARM64() {}
 
   void GenerateFrameEntry() OVERRIDE;
@@ -273,6 +271,10 @@ class CodeGeneratorARM64 : public CodeGenerator {
     return InstructionSet::kArm64;
   }
 
+  const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const {
+    return isa_features_;
+  }
+
   void Initialize() OVERRIDE {
     HGraph* graph = GetGraph();
     int length = graph->GetBlocks().Size();
@@ -317,6 +319,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
   InstructionCodeGeneratorARM64 instruction_visitor_;
   ParallelMoveResolverARM64 move_resolver_;
   Arm64Assembler assembler_;
+  const Arm64InstructionSetFeatures& isa_features_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
 };
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index e0e0b4c3e8..868fc5b867 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -18,6 +18,7 @@
 
 #include "arch/instruction_set.h"
 #include "arch/arm/instruction_set_features_arm.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
 #include "base/macros.h"
 #include "builder.h"
 #include "code_generator_arm.h"
@@ -115,9 +116,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
     Run(allocator, codegenX86, has_result, expected);
   }
 
-  std::unique_ptr<const ArmInstructionSetFeatures> features(
+  std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
       ArmInstructionSetFeatures::FromCppDefines());
-  TestCodeGeneratorARM codegenARM(graph, *features.get(), compiler_options);
+  TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
   codegenARM.CompileBaseline(&allocator, true);
   if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
     Run(allocator, codegenARM, has_result, expected);
@@ -129,7 +130,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
     Run(allocator, codegenX86_64, has_result, expected);
   }
 
-  arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options);
+  std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
+      Arm64InstructionSetFeatures::FromCppDefines());
+  arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
   codegenARM64.CompileBaseline(&allocator, true);
   if (kRuntimeISA == kArm64) {
     Run(allocator, codegenARM64, has_result, expected);
@@ -166,7 +169,9 @@ static void RunCodeOptimized(HGraph* graph,
                                     compiler_options);
     RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
   } else if (kRuntimeISA == kArm64) {
-    arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options);
+    arm64::CodeGeneratorARM64 codegenARM64(graph,
+                                           *Arm64InstructionSetFeatures::FromCppDefines(),
+                                           compiler_options);
     RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
   } else if (kRuntimeISA == kX86) {
     x86::CodeGeneratorX86 codegenX86(graph, compiler_options);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 8874edc341..1ddff8a125 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -16,6 +16,7 @@
 
 #include "intrinsics_arm64.h"
 
+#include "arch/arm64/instruction_set_features_arm64.h"
 #include "code_generator_arm64.h"
 #include "common_arm64.h"
 #include "entrypoints/quick/quick_entrypoints.h"
@@ -682,10 +683,11 @@ static void GenUnsafeGet(HInvoke* invoke,
   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
   Register trg = RegisterFrom(locations->Out(), type);
+  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
   MemOperand mem_op(base.X(), offset);
   if (is_volatile) {
-    if (kUseAcquireRelease) {
+    if (use_acquire_release) {
       codegen->LoadAcquire(invoke, trg, mem_op);
     } else {
       codegen->Load(type, trg, mem_op);
@@ -792,11 +794,12 @@ static void GenUnsafePut(LocationSummary* locations,
   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
   Register value = RegisterFrom(locations->InAt(3), type);
+  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
   MemOperand mem_op(base.X(), offset);
 
   if (is_volatile || is_ordered) {
-    if (kUseAcquireRelease) {
+    if (use_acquire_release) {
       codegen->StoreRelease(type, value, mem_op);
     } else {
       __ Dmb(InnerShareable, BarrierAll);
@@ -856,10 +859,7 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
 }
 
 static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
-  // TODO: Currently we use acquire-release load-stores in the CAS loop. One could reasonably write
-  //       a version relying on simple exclusive load-stores and barriers instead.
-  static_assert(kUseAcquireRelease, "Non-acquire-release inlined CAS not implemented, yet.");
-
+  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
   vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
 
   Register out = WRegisterFrom(locations->Out());                  // Boolean result.
@@ -889,15 +889,23 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
   // result = tmp_value != 0;
 
   vixl::Label loop_head, exit_loop;
-  __ Bind(&loop_head);
-
-  __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
-  __ Cmp(tmp_value, expected);
-  __ B(&exit_loop, ne);
-
-  __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
-  __ Cbnz(tmp_32, &loop_head);
-
+  if (use_acquire_release) {
+    __ Bind(&loop_head);
+    __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
+    __ Cmp(tmp_value, expected);
+    __ B(&exit_loop, ne);
+    __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
+    __ Cbnz(tmp_32, &loop_head);
+  } else {
+    __ Dmb(InnerShareable, BarrierWrites);
+    __ Bind(&loop_head);
+    __ Ldxr(tmp_value, MemOperand(tmp_ptr));
+    __ Cmp(tmp_value, expected);
+    __ B(&exit_loop, ne);
+    __ Stxr(tmp_32, value, MemOperand(tmp_ptr));
+    __ Cbnz(tmp_32, &loop_head);
+    __ Dmb(InnerShareable, BarrierAll);
+  }
   __ Bind(&exit_loop);
   __ Cset(out, eq);
 }
author	Serban Constantinescu <serban.constantinescu@arm.com>	2015-02-22 20:51:33 +0000
committer	Serban Constantinescu <serban.constantinescu@arm.com>	2015-03-02 14:16:56 +0000
commit	579885a26d761f5ba9550f2a1cd7f0f598c2e1e3 (patch)
tree	58d144157b7a24bbdf7f8892631a15abeefa2c9f /compiler
parent	2eb5168bd9e43b80452eaee5be32c063e124886e (diff)
download	android_art-579885a26d761f5ba9550f2a1cd7f0f598c2e1e3.tar.gz android_art-579885a26d761f5ba9550f2a1cd7f0f598c2e1e3.tar.bz2 android_art-579885a26d761f5ba9550f2a1cd7f0f598c2e1e3.zip