diff options
author | Serban Constantinescu <serban.constantinescu@arm.com> | 2015-02-22 20:51:33 +0000 |
---|---|---|
committer | Serban Constantinescu <serban.constantinescu@arm.com> | 2015-03-02 14:16:56 +0000 |
commit | 579885a26d761f5ba9550f2a1cd7f0f598c2e1e3 (patch) | |
tree | 58d144157b7a24bbdf7f8892631a15abeefa2c9f /compiler | |
parent | 2eb5168bd9e43b80452eaee5be32c063e124886e (diff) | |
download | android_art-579885a26d761f5ba9550f2a1cd7f0f598c2e1e3.tar.gz android_art-579885a26d761f5ba9550f2a1cd7f0f598c2e1e3.tar.bz2 android_art-579885a26d761f5ba9550f2a1cd7f0f598c2e1e3.zip |
Opt Compiler: ARM64: Enable explicit memory barriers over acquire/release
Implement remaining explicit memory barrier code paths and temporarily
enable the use of explicit memory barriers for testing.
This CL also enables the use of instruction set features in the ARM64
backend. kUseAcquireRelease has been replaced with PreferAcquireRelease(),
which for now is statically set to false (prefer explicit memory barriers).
Please note that we still prefer acquire-release for the ARM64 Optimizing
Compiler, but we would like to exercise the explicit memory barrier code
path too.
Change-Id: I84e047ecd43b6fbefc5b82cf532e3f5c59076458
Signed-off-by: Serban Constantinescu <serban.constantinescu@arm.com>
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/optimizing/code_generator.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 23 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 13 | ||||
-rw-r--r-- | compiler/optimizing/codegen_test.cc | 13 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 38 |
5 files changed, 59 insertions, 32 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 2a57fdc929..ba5f7d8fab 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -386,7 +386,9 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph, compiler_options); } case kArm64: { - return new arm64::CodeGeneratorARM64(graph, compiler_options); + return new arm64::CodeGeneratorARM64(graph, + *isa_features.AsArm64InstructionSetFeatures(), + compiler_options); } case kMips: return nullptr; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 729bab78a6..c21084a6fe 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -16,6 +16,7 @@ #include "code_generator_arm64.h" +#include "arch/arm64/instruction_set_features_arm64.h" #include "common_arm64.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" @@ -397,7 +398,9 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type return next_location; } -CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options) +CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, + const Arm64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfAllocatableRegisters, kNumberOfAllocatableFPRegisters, @@ -408,7 +411,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& com block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) { + move_resolver_(graph->GetArena(), this), + isa_features_(isa_features) { // Save the link register (containing the return address) to mimic Quick. AddAllocatedRegister(LocationFrom(lr)); } @@ -998,9 +1002,10 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireW(); size_t status_offset = mirror::Class::StatusOffset().SizeValue(); + bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); // Even if the initialized flag is set, we need to ensure consistent memory ordering. - if (kUseAcquireRelease) { + if (use_acquire_release) { // TODO(vixl): Let the MacroAssembler handle MemOperand. __ Add(temp, class_reg, status_offset); __ Ldar(temp, HeapOperand(temp)); @@ -1689,9 +1694,10 @@ void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); + bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); if (instruction->IsVolatile()) { - if (kUseAcquireRelease) { + if (use_acquire_release) { // NB: LoadAcquire will record the pc info if needed. codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); } else { @@ -1718,9 +1724,10 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins CPURegister value = InputCPURegisterAt(instruction, 1); Offset offset = instruction->GetFieldOffset(); Primitive::Type field_type = instruction->GetFieldType(); + bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); if (instruction->IsVolatile()) { - if (kUseAcquireRelease) { + if (use_acquire_release) { codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); } else { @@ -2437,9 +2444,10 @@ void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); + bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); if (instruction->IsVolatile()) { - if (kUseAcquireRelease) { + if (use_acquire_release) { // NB: LoadAcquire will record the pc info if needed. codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); } else { @@ -2464,9 +2472,10 @@ void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruc CPURegister value = InputCPURegisterAt(instruction, 1); Offset offset = instruction->GetFieldOffset(); Primitive::Type field_type = instruction->GetFieldType(); + bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); if (instruction->IsVolatile()) { - if (kUseAcquireRelease) { + if (use_acquire_release) { codegen_->StoreRelease(field_type, value, HeapOperand(cls, offset)); } else { GenerateMemoryBarrier(MemBarrierKind::kAnyStore); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index afb7fc3718..48961d68e9 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -32,10 +32,6 @@ namespace arm64 { class CodeGeneratorARM64; -// TODO: Tune the use of Load-Acquire, Store-Release vs Data Memory Barriers. -// For now we prefer the use of load-acquire, store-release over explicit memory barriers. -static constexpr bool kUseAcquireRelease = true; - // Use a local definition to prevent copying mistakes. static constexpr size_t kArm64WordSize = kArm64PointerSize; @@ -195,7 +191,9 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolver { class CodeGeneratorARM64 : public CodeGenerator { public: - CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options); + CodeGeneratorARM64(HGraph* graph, + const Arm64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); virtual ~CodeGeneratorARM64() {} void GenerateFrameEntry() OVERRIDE; @@ -273,6 +271,10 @@ class CodeGeneratorARM64 : public CodeGenerator { return InstructionSet::kArm64; } + const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const { + return isa_features_; + } + void Initialize() OVERRIDE { HGraph* graph = GetGraph(); int length = graph->GetBlocks().Size(); @@ -317,6 +319,7 @@ class CodeGeneratorARM64 : public CodeGenerator { InstructionCodeGeneratorARM64 instruction_visitor_; ParallelMoveResolverARM64 move_resolver_; Arm64Assembler assembler_; + const Arm64InstructionSetFeatures& isa_features_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64); }; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index e0e0b4c3e8..868fc5b867 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -18,6 +18,7 @@ #include "arch/instruction_set.h" #include "arch/arm/instruction_set_features_arm.h" +#include "arch/arm64/instruction_set_features_arm64.h" #include "base/macros.h" #include "builder.h" #include "code_generator_arm.h" @@ -115,9 +116,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Run(allocator, codegenX86, has_result, expected); } - std::unique_ptr<const ArmInstructionSetFeatures> features( + std::unique_ptr<const ArmInstructionSetFeatures> features_arm( ArmInstructionSetFeatures::FromCppDefines()); - TestCodeGeneratorARM codegenARM(graph, *features.get(), compiler_options); + TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options); codegenARM.CompileBaseline(&allocator, true); if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { Run(allocator, codegenARM, has_result, expected); @@ -129,7 +130,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Run(allocator, codegenX86_64, has_result, expected); } - arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options); + std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( + Arm64InstructionSetFeatures::FromCppDefines()); + arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options); codegenARM64.CompileBaseline(&allocator, true); if (kRuntimeISA == kArm64) { Run(allocator, codegenARM64, has_result, expected); @@ -166,7 +169,9 @@ static void RunCodeOptimized(HGraph* graph, compiler_options); RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kArm64) { - arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options); + arm64::CodeGeneratorARM64 codegenARM64(graph, + *Arm64InstructionSetFeatures::FromCppDefines(), + compiler_options); RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86) { x86::CodeGeneratorX86 codegenX86(graph, compiler_options); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 8874edc341..1ddff8a125 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -16,6 +16,7 @@ #include "intrinsics_arm64.h" +#include "arch/arm64/instruction_set_features_arm64.h" #include "code_generator_arm64.h" #include "common_arm64.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -682,10 +683,11 @@ static void GenUnsafeGet(HInvoke* invoke, Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. Register trg = RegisterFrom(locations->Out(), type); + bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); MemOperand mem_op(base.X(), offset); if (is_volatile) { - if (kUseAcquireRelease) { + if (use_acquire_release) { codegen->LoadAcquire(invoke, trg, mem_op); } else { codegen->Load(type, trg, mem_op); @@ -792,11 +794,12 @@ static void GenUnsafePut(LocationSummary* locations, Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. Register value = RegisterFrom(locations->InAt(3), type); + bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); MemOperand mem_op(base.X(), offset); if (is_volatile || is_ordered) { - if (kUseAcquireRelease) { + if (use_acquire_release) { codegen->StoreRelease(type, value, mem_op); } else { __ Dmb(InnerShareable, BarrierAll); @@ -856,10 +859,7 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) { } static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) { - // TODO: Currently we use acquire-release load-stores in the CAS loop. One could reasonably write - // a version relying on simple exclusive load-stores and barriers instead. - static_assert(kUseAcquireRelease, "Non-acquire-release inlined CAS not implemented, yet."); - + bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_; Register out = WRegisterFrom(locations->Out()); // Boolean result. @@ -889,15 +889,23 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // result = tmp_value != 0; vixl::Label loop_head, exit_loop; - __ Bind(&loop_head); - - __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); - __ Cmp(tmp_value, expected); - __ B(&exit_loop, ne); - - __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); - __ Cbnz(tmp_32, &loop_head); - + if (use_acquire_release) { + __ Bind(&loop_head); + __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); + __ Cmp(tmp_value, expected); + __ B(&exit_loop, ne); + __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); + __ Cbnz(tmp_32, &loop_head); + } else { + __ Dmb(InnerShareable, BarrierWrites); + __ Bind(&loop_head); + __ Ldxr(tmp_value, MemOperand(tmp_ptr)); + __ Cmp(tmp_value, expected); + __ B(&exit_loop, ne); + __ Stxr(tmp_32, value, MemOperand(tmp_ptr)); + __ Cbnz(tmp_32, &loop_head); + __ Dmb(InnerShareable, BarrierAll); + } __ Bind(&exit_loop); __ Cset(out, eq); } |