diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/common_compiler_test.cc | 1 | ||||
-rw-r--r-- | compiler/elf_builder.h | 109 | ||||
-rw-r--r-- | compiler/elf_writer_quick.cc | 3 | ||||
-rw-r--r-- | compiler/image_writer.cc | 4 | ||||
-rw-r--r-- | compiler/jit/jit_compiler.cc | 2 | ||||
-rw-r--r-- | compiler/oat_writer.cc | 1 | ||||
-rw-r--r-- | compiler/oat_writer.h | 7 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 23 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 13 | ||||
-rw-r--r-- | compiler/optimizing/codegen_test.cc | 13 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 38 |
12 files changed, 160 insertions, 58 deletions
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index e8354b2a1b..09be4372a8 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -253,6 +253,7 @@ void CommonCompilerTest::ReserveImageSpace() { (size_t)100 * 1024 * 1024, // 100MB PROT_NONE, false /* no need for 4gb flag with fixed mmap*/, + false /* not reusing existing reservation */, &error_msg)); CHECK(image_reservation_.get() != nullptr) << error_msg; } diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h index 94268de077..9ab3602606 100644 --- a/compiler/elf_builder.h +++ b/compiler/elf_builder.h @@ -538,6 +538,8 @@ class ElfBuilder FINAL { Elf_Word rodata_size, Elf_Word text_relative_offset, Elf_Word text_size, + Elf_Word bss_relative_offset, + Elf_Word bss_size, const bool add_symbols, bool debug = false) : oat_writer_(oat_writer), @@ -547,6 +549,7 @@ class ElfBuilder FINAL { text_builder_(".text", text_size, text_relative_offset, SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR), rodata_builder_(".rodata", rodata_size, rodata_relative_offset, SHT_PROGBITS, SHF_ALLOC), + bss_builder_(".bss", bss_size, bss_relative_offset, SHT_NOBITS, SHF_ALLOC), dynsym_builder_(".dynsym", SHT_DYNSYM, ".dynstr", SHT_STRTAB, true), symtab_builder_(".symtab", SHT_SYMTAB, ".strtab", SHT_STRTAB, false), hash_builder_(".hash", SHT_HASH, SHF_ALLOC, &dynsym_builder_, 0, sizeof(Elf_Word), @@ -569,6 +572,11 @@ class ElfBuilder FINAL { } bool Init() { + // Since the .text section of an oat file contains relative references to .rodata + // and (optionally) .bss, we keep these 2 or 3 sections together. This creates + // a non-traditional layout where the .bss section is mapped independently of the + // .dynamic section and needs its own program header with LOAD RW. + // // The basic layout of the elf file. Order may be different in final output. // +-------------------------+ // | Elf_Ehdr | @@ -576,6 +584,7 @@ class ElfBuilder FINAL { // | Elf_Phdr PHDR | // | Elf_Phdr LOAD R | .dynsym .dynstr .hash .rodata // | Elf_Phdr LOAD R X | .text + // | Elf_Phdr LOAD RW | .bss (Optional) // | Elf_Phdr LOAD RW | .dynamic // | Elf_Phdr DYNAMIC | .dynamic // +-------------------------+ @@ -584,6 +593,8 @@ class ElfBuilder FINAL { // | Elf_Sym oatdata | // | Elf_Sym oatexec | // | Elf_Sym oatlastword | + // | Elf_Sym oatbss | (Optional) + // | Elf_Sym oatbsslastword | (Optional) // +-------------------------+ // | .dynstr | // | \0 | @@ -631,6 +642,7 @@ class ElfBuilder FINAL { // | .hash\0 | // | .rodata\0 | // | .text\0 | + // | .bss\0 | (Optional) // | .shstrtab\0 | // | .symtab\0 | (Optional) // | .strtab\0 | (Optional) @@ -654,8 +666,9 @@ class ElfBuilder FINAL { // | Elf_Shdr .dynsym | // | Elf_Shdr .dynstr | // | Elf_Shdr .hash | - // | Elf_Shdr .text | // | Elf_Shdr .rodata | + // | Elf_Shdr .text | + // | Elf_Shdr .bss | (Optional) // | Elf_Shdr .dynamic | // | Elf_Shdr .shstrtab | // | Elf_Shdr .debug_info | (Optional) @@ -694,8 +707,11 @@ class ElfBuilder FINAL { program_headers_[PH_LOAD_R_X].p_type = PT_LOAD; program_headers_[PH_LOAD_R_X].p_flags = PF_R | PF_X; - program_headers_[PH_LOAD_RW_].p_type = PT_LOAD; - program_headers_[PH_LOAD_RW_].p_flags = PF_R | PF_W; + program_headers_[PH_LOAD_RW_BSS].p_type = PT_LOAD; + program_headers_[PH_LOAD_RW_BSS].p_flags = PF_R | PF_W; + + program_headers_[PH_LOAD_RW_DYNAMIC].p_type = PT_LOAD; + program_headers_[PH_LOAD_RW_DYNAMIC].p_flags = PF_R | PF_W; program_headers_[PH_DYNAMIC].p_type = PT_DYNAMIC; program_headers_[PH_DYNAMIC].p_flags = PF_R | PF_W; @@ -760,6 +776,14 @@ class ElfBuilder FINAL { text_builder_.SetSectionIndex(section_index_); section_index_++; + // Setup .bss + if (bss_builder_.GetSize() != 0u) { + section_ptrs_.push_back(bss_builder_.GetSection()); + AssignSectionStr(&bss_builder_, &shstrtab_); + bss_builder_.SetSectionIndex(section_index_); + section_index_++; + } + // Setup .dynamic section_ptrs_.push_back(dynamic_builder_.GetSection()); AssignSectionStr(&dynamic_builder_, &shstrtab_); @@ -820,10 +844,20 @@ class ElfBuilder FINAL { CHECK_ALIGNED(rodata_builder_.GetSection()->sh_offset + rodata_builder_.GetSection()->sh_size, kPageSize); + // Get the layout of the .bss section. + bss_builder_.GetSection()->sh_offset = + NextOffset<Elf_Word, Elf_Shdr>(*bss_builder_.GetSection(), + *text_builder_.GetSection()); + bss_builder_.GetSection()->sh_addr = bss_builder_.GetSection()->sh_offset; + bss_builder_.GetSection()->sh_size = bss_builder_.GetSize(); + bss_builder_.GetSection()->sh_link = bss_builder_.GetLink(); + // Get the layout of the dynamic section. - dynamic_builder_.GetSection()->sh_offset = - NextOffset<Elf_Word, Elf_Shdr>(*dynamic_builder_.GetSection(), *text_builder_.GetSection()); - dynamic_builder_.GetSection()->sh_addr = dynamic_builder_.GetSection()->sh_offset; + CHECK(IsAlignedParam(bss_builder_.GetSection()->sh_offset, + dynamic_builder_.GetSection()->sh_addralign)); + dynamic_builder_.GetSection()->sh_offset = bss_builder_.GetSection()->sh_offset; + dynamic_builder_.GetSection()->sh_addr = + NextOffset<Elf_Word, Elf_Shdr>(*dynamic_builder_.GetSection(), *bss_builder_.GetSection()); dynamic_builder_.GetSection()->sh_size = dynamic_builder_.GetSize() * sizeof(Elf_Dyn); dynamic_builder_.GetSection()->sh_link = dynamic_builder_.GetLink(); @@ -987,16 +1021,23 @@ class ElfBuilder FINAL { program_headers_[PH_LOAD_R_X].p_memsz = load_rx_size; program_headers_[PH_LOAD_R_X].p_align = text_builder_.GetSection()->sh_addralign; - program_headers_[PH_LOAD_RW_].p_offset = dynamic_builder_.GetSection()->sh_offset; - program_headers_[PH_LOAD_RW_].p_vaddr = dynamic_builder_.GetSection()->sh_offset; - program_headers_[PH_LOAD_RW_].p_paddr = dynamic_builder_.GetSection()->sh_offset; - program_headers_[PH_LOAD_RW_].p_filesz = dynamic_builder_.GetSection()->sh_size; - program_headers_[PH_LOAD_RW_].p_memsz = dynamic_builder_.GetSection()->sh_size; - program_headers_[PH_LOAD_RW_].p_align = dynamic_builder_.GetSection()->sh_addralign; + program_headers_[PH_LOAD_RW_BSS].p_offset = bss_builder_.GetSection()->sh_offset; + program_headers_[PH_LOAD_RW_BSS].p_vaddr = bss_builder_.GetSection()->sh_offset; + program_headers_[PH_LOAD_RW_BSS].p_paddr = bss_builder_.GetSection()->sh_offset; + program_headers_[PH_LOAD_RW_BSS].p_filesz = 0; + program_headers_[PH_LOAD_RW_BSS].p_memsz = bss_builder_.GetSection()->sh_size; + program_headers_[PH_LOAD_RW_BSS].p_align = bss_builder_.GetSection()->sh_addralign; + + program_headers_[PH_LOAD_RW_DYNAMIC].p_offset = dynamic_builder_.GetSection()->sh_offset; + program_headers_[PH_LOAD_RW_DYNAMIC].p_vaddr = dynamic_builder_.GetSection()->sh_addr; + program_headers_[PH_LOAD_RW_DYNAMIC].p_paddr = dynamic_builder_.GetSection()->sh_addr; + program_headers_[PH_LOAD_RW_DYNAMIC].p_filesz = dynamic_builder_.GetSection()->sh_size; + program_headers_[PH_LOAD_RW_DYNAMIC].p_memsz = dynamic_builder_.GetSection()->sh_size; + program_headers_[PH_LOAD_RW_DYNAMIC].p_align = dynamic_builder_.GetSection()->sh_addralign; program_headers_[PH_DYNAMIC].p_offset = dynamic_builder_.GetSection()->sh_offset; - program_headers_[PH_DYNAMIC].p_vaddr = dynamic_builder_.GetSection()->sh_offset; - program_headers_[PH_DYNAMIC].p_paddr = dynamic_builder_.GetSection()->sh_offset; + program_headers_[PH_DYNAMIC].p_vaddr = dynamic_builder_.GetSection()->sh_addr; + program_headers_[PH_DYNAMIC].p_paddr = dynamic_builder_.GetSection()->sh_addr; program_headers_[PH_DYNAMIC].p_filesz = dynamic_builder_.GetSection()->sh_size; program_headers_[PH_DYNAMIC].p_memsz = dynamic_builder_.GetSection()->sh_size; program_headers_[PH_DYNAMIC].p_align = dynamic_builder_.GetSection()->sh_addralign; @@ -1004,15 +1045,29 @@ class ElfBuilder FINAL { // Finish setup of the Ehdr values. elf_header_.e_phoff = PHDR_OFFSET; elf_header_.e_shoff = sections_offset; - elf_header_.e_phnum = PH_NUM; + elf_header_.e_phnum = (bss_builder_.GetSection()->sh_size != 0u) ? PH_NUM : PH_NUM - 1; elf_header_.e_shnum = section_ptrs_.size(); elf_header_.e_shstrndx = shstrtab_builder_.GetSectionIndex(); // Add the rest of the pieces to the list. pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Elf Header", 0, &elf_header_, sizeof(elf_header_))); - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET, - &program_headers_, sizeof(program_headers_))); + if (bss_builder_.GetSection()->sh_size != 0u) { + pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET, + &program_headers_[0], + elf_header_.e_phnum * sizeof(Elf_Phdr))); + } else { + // Skip PH_LOAD_RW_BSS. + Elf_Word part1_size = PH_LOAD_RW_BSS * sizeof(Elf_Phdr); + Elf_Word part2_size = (PH_NUM - PH_LOAD_RW_BSS - 1) * sizeof(Elf_Phdr); + CHECK_EQ(part1_size + part2_size, elf_header_.e_phnum * sizeof(Elf_Phdr)); + pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET, + &program_headers_[0], part1_size)); + pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers part 2", + PHDR_OFFSET + part1_size, + &program_headers_[PH_LOAD_RW_BSS + 1], + part2_size)); + } pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynamic", dynamic_builder_.GetSection()->sh_offset, dynamic.data(), @@ -1175,6 +1230,12 @@ class ElfBuilder FINAL { text_builder_.GetSize(), STB_GLOBAL, STT_OBJECT); dynsym_builder_.AddSymbol("oatlastword", &text_builder_, text_builder_.GetSize() - 4, true, 4, STB_GLOBAL, STT_OBJECT); + if (bss_builder_.GetSize() != 0u) { + dynsym_builder_.AddSymbol("oatbss", &bss_builder_, 0, true, + bss_builder_.GetSize(), STB_GLOBAL, STT_OBJECT); + dynsym_builder_.AddSymbol("oatbsslastword", &bss_builder_, bss_builder_.GetSize() - 4, + true, 4, STB_GLOBAL, STT_OBJECT); + } } void AssignSectionStr(ElfSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr>* builder, @@ -1213,12 +1274,13 @@ class ElfBuilder FINAL { // What phdr is. static const uint32_t PHDR_OFFSET = sizeof(Elf_Ehdr); enum : uint8_t { - PH_PHDR = 0, - PH_LOAD_R__ = 1, - PH_LOAD_R_X = 2, - PH_LOAD_RW_ = 3, - PH_DYNAMIC = 4, - PH_NUM = 5, + PH_PHDR = 0, + PH_LOAD_R__ = 1, + PH_LOAD_R_X = 2, + PH_LOAD_RW_BSS = 3, + PH_LOAD_RW_DYNAMIC = 4, + PH_DYNAMIC = 5, + PH_NUM = 6, }; static const uint32_t PHDR_SIZE = sizeof(Elf_Phdr) * PH_NUM; Elf_Phdr program_headers_[PH_NUM]; @@ -1236,6 +1298,7 @@ class ElfBuilder FINAL { ElfOatSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> text_builder_; ElfOatSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> rodata_builder_; + ElfOatSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> bss_builder_; ElfSymtabBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Shdr> dynsym_builder_; ElfSymtabBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Shdr> symtab_builder_; ElfSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> hash_builder_; diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index 401d5a951d..a822b24cde 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -229,6 +229,7 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, const OatHeader& oat_header = oat_writer->GetOatHeader(); Elf_Word oat_data_size = oat_header.GetExecutableOffset(); uint32_t oat_exec_size = oat_writer->GetSize() - oat_data_size; + uint32_t oat_bss_size = oat_writer->GetBssSize(); OatWriterWrapper wrapper(oat_writer); @@ -243,6 +244,8 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, oat_data_size, oat_data_size, oat_exec_size, + RoundUp(oat_data_size + oat_exec_size, kPageSize), + oat_bss_size, compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols(), debug)); diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index c588e1a53d..f5f9320532 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -408,8 +408,8 @@ ImageWriter::BinSlot ImageWriter::GetImageBinSlot(mirror::Object* object) const bool ImageWriter::AllocMemory() { size_t length = RoundUp(Runtime::Current()->GetHeap()->GetTotalMemory(), kPageSize); std::string error_msg; - image_.reset(MemMap::MapAnonymous("image writer image", NULL, length, PROT_READ | PROT_WRITE, - false, &error_msg)); + image_.reset(MemMap::MapAnonymous("image writer image", nullptr, length, PROT_READ | PROT_WRITE, + false, false, &error_msg)); if (UNLIKELY(image_.get() == nullptr)) { LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg; return false; diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 257739134b..0283791e28 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -76,7 +76,7 @@ JitCompiler::JitCompiler() : total_time_(0) { false, false, false, - true, // pic + false, // pic nullptr, pass_manager_options, nullptr)); diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 841109105d..c32a99226b 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -403,6 +403,7 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, image_writer_(image_writer), dex_files_(&dex_files), size_(0u), + bss_size_(0u), oat_data_offset_(0u), image_file_location_oat_checksum_(image_file_location_oat_checksum), image_file_location_oat_begin_(image_file_location_oat_begin), diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 980611fe96..fd2ccae4a5 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -96,6 +96,10 @@ class OatWriter { return size_; } + size_t GetBssSize() const { + return bss_size_; + } + const std::vector<uintptr_t>& GetAbsolutePatchLocations() const { return absolute_patch_locations_; } @@ -266,6 +270,9 @@ class OatWriter { // Size required for Oat data structures. size_t size_; + // The size of the required .bss section holding the DexCache data. + size_t bss_size_; + // Offset of the oat data from the start of the mmapped region of the elf file. size_t oat_data_offset_; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 2a57fdc929..ba5f7d8fab 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -386,7 +386,9 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph, compiler_options); } case kArm64: { - return new arm64::CodeGeneratorARM64(graph, compiler_options); + return new arm64::CodeGeneratorARM64(graph, + *isa_features.AsArm64InstructionSetFeatures(), + compiler_options); } case kMips: return nullptr; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 729bab78a6..c21084a6fe 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -16,6 +16,7 @@ #include "code_generator_arm64.h" +#include "arch/arm64/instruction_set_features_arm64.h" #include "common_arm64.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" @@ -397,7 +398,9 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type return next_location; } -CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options) +CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, + const Arm64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfAllocatableRegisters, kNumberOfAllocatableFPRegisters, @@ -408,7 +411,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& com block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) { + move_resolver_(graph->GetArena(), this), + isa_features_(isa_features) { // Save the link register (containing the return address) to mimic Quick. AddAllocatedRegister(LocationFrom(lr)); } @@ -998,9 +1002,10 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireW(); size_t status_offset = mirror::Class::StatusOffset().SizeValue(); + bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); // Even if the initialized flag is set, we need to ensure consistent memory ordering. - if (kUseAcquireRelease) { + if (use_acquire_release) { // TODO(vixl): Let the MacroAssembler handle MemOperand. __ Add(temp, class_reg, status_offset); __ Ldar(temp, HeapOperand(temp)); @@ -1689,9 +1694,10 @@ void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); + bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); if (instruction->IsVolatile()) { - if (kUseAcquireRelease) { + if (use_acquire_release) { // NB: LoadAcquire will record the pc info if needed. codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); } else { @@ -1718,9 +1724,10 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins CPURegister value = InputCPURegisterAt(instruction, 1); Offset offset = instruction->GetFieldOffset(); Primitive::Type field_type = instruction->GetFieldType(); + bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); if (instruction->IsVolatile()) { - if (kUseAcquireRelease) { + if (use_acquire_release) { codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); } else { @@ -2437,9 +2444,10 @@ void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); + bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); if (instruction->IsVolatile()) { - if (kUseAcquireRelease) { + if (use_acquire_release) { // NB: LoadAcquire will record the pc info if needed. codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); } else { @@ -2464,9 +2472,10 @@ void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruc CPURegister value = InputCPURegisterAt(instruction, 1); Offset offset = instruction->GetFieldOffset(); Primitive::Type field_type = instruction->GetFieldType(); + bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); if (instruction->IsVolatile()) { - if (kUseAcquireRelease) { + if (use_acquire_release) { codegen_->StoreRelease(field_type, value, HeapOperand(cls, offset)); } else { GenerateMemoryBarrier(MemBarrierKind::kAnyStore); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index afb7fc3718..48961d68e9 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -32,10 +32,6 @@ namespace arm64 { class CodeGeneratorARM64; -// TODO: Tune the use of Load-Acquire, Store-Release vs Data Memory Barriers. -// For now we prefer the use of load-acquire, store-release over explicit memory barriers. -static constexpr bool kUseAcquireRelease = true; - // Use a local definition to prevent copying mistakes. static constexpr size_t kArm64WordSize = kArm64PointerSize; @@ -195,7 +191,9 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolver { class CodeGeneratorARM64 : public CodeGenerator { public: - CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options); + CodeGeneratorARM64(HGraph* graph, + const Arm64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); virtual ~CodeGeneratorARM64() {} void GenerateFrameEntry() OVERRIDE; @@ -273,6 +271,10 @@ class CodeGeneratorARM64 : public CodeGenerator { return InstructionSet::kArm64; } + const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const { + return isa_features_; + } + void Initialize() OVERRIDE { HGraph* graph = GetGraph(); int length = graph->GetBlocks().Size(); @@ -317,6 +319,7 @@ class CodeGeneratorARM64 : public CodeGenerator { InstructionCodeGeneratorARM64 instruction_visitor_; ParallelMoveResolverARM64 move_resolver_; Arm64Assembler assembler_; + const Arm64InstructionSetFeatures& isa_features_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64); }; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index e0e0b4c3e8..868fc5b867 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -18,6 +18,7 @@ #include "arch/instruction_set.h" #include "arch/arm/instruction_set_features_arm.h" +#include "arch/arm64/instruction_set_features_arm64.h" #include "base/macros.h" #include "builder.h" #include "code_generator_arm.h" @@ -115,9 +116,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Run(allocator, codegenX86, has_result, expected); } - std::unique_ptr<const ArmInstructionSetFeatures> features( + std::unique_ptr<const ArmInstructionSetFeatures> features_arm( ArmInstructionSetFeatures::FromCppDefines()); - TestCodeGeneratorARM codegenARM(graph, *features.get(), compiler_options); + TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options); codegenARM.CompileBaseline(&allocator, true); if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { Run(allocator, codegenARM, has_result, expected); @@ -129,7 +130,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Run(allocator, codegenX86_64, has_result, expected); } - arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options); + std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( + Arm64InstructionSetFeatures::FromCppDefines()); + arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options); codegenARM64.CompileBaseline(&allocator, true); if (kRuntimeISA == kArm64) { Run(allocator, codegenARM64, has_result, expected); @@ -166,7 +169,9 @@ static void RunCodeOptimized(HGraph* graph, compiler_options); RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kArm64) { - arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options); + arm64::CodeGeneratorARM64 codegenARM64(graph, + *Arm64InstructionSetFeatures::FromCppDefines(), + compiler_options); RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86) { x86::CodeGeneratorX86 codegenX86(graph, compiler_options); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 8874edc341..1ddff8a125 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -16,6 +16,7 @@ #include "intrinsics_arm64.h" +#include "arch/arm64/instruction_set_features_arm64.h" #include "code_generator_arm64.h" #include "common_arm64.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -682,10 +683,11 @@ static void GenUnsafeGet(HInvoke* invoke, Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. Register trg = RegisterFrom(locations->Out(), type); + bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); MemOperand mem_op(base.X(), offset); if (is_volatile) { - if (kUseAcquireRelease) { + if (use_acquire_release) { codegen->LoadAcquire(invoke, trg, mem_op); } else { codegen->Load(type, trg, mem_op); @@ -792,11 +794,12 @@ static void GenUnsafePut(LocationSummary* locations, Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. Register value = RegisterFrom(locations->InAt(3), type); + bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); MemOperand mem_op(base.X(), offset); if (is_volatile || is_ordered) { - if (kUseAcquireRelease) { + if (use_acquire_release) { codegen->StoreRelease(type, value, mem_op); } else { __ Dmb(InnerShareable, BarrierAll); @@ -856,10 +859,7 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) { } static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) { - // TODO: Currently we use acquire-release load-stores in the CAS loop. One could reasonably write - // a version relying on simple exclusive load-stores and barriers instead. - static_assert(kUseAcquireRelease, "Non-acquire-release inlined CAS not implemented, yet."); - + bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_; Register out = WRegisterFrom(locations->Out()); // Boolean result. @@ -889,15 +889,23 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // result = tmp_value != 0; vixl::Label loop_head, exit_loop; - __ Bind(&loop_head); - - __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); - __ Cmp(tmp_value, expected); - __ B(&exit_loop, ne); - - __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); - __ Cbnz(tmp_32, &loop_head); - + if (use_acquire_release) { + __ Bind(&loop_head); + __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); + __ Cmp(tmp_value, expected); + __ B(&exit_loop, ne); + __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); + __ Cbnz(tmp_32, &loop_head); + } else { + __ Dmb(InnerShareable, BarrierWrites); + __ Bind(&loop_head); + __ Ldxr(tmp_value, MemOperand(tmp_ptr)); + __ Cmp(tmp_value, expected); + __ B(&exit_loop, ne); + __ Stxr(tmp_32, value, MemOperand(tmp_ptr)); + __ Cbnz(tmp_32, &loop_head); + __ Dmb(InnerShareable, BarrierAll); + } __ Bind(&exit_loop); __ Cset(out, eq); } |