summaryrefslogtreecommitdiffstats
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/common_compiler_test.cc1
-rw-r--r--compiler/elf_builder.h109
-rw-r--r--compiler/elf_writer_quick.cc3
-rw-r--r--compiler/image_writer.cc4
-rw-r--r--compiler/jit/jit_compiler.cc2
-rw-r--r--compiler/oat_writer.cc1
-rw-r--r--compiler/oat_writer.h7
-rw-r--r--compiler/optimizing/code_generator.cc4
-rw-r--r--compiler/optimizing/code_generator_arm64.cc23
-rw-r--r--compiler/optimizing/code_generator_arm64.h13
-rw-r--r--compiler/optimizing/codegen_test.cc13
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc38
12 files changed, 160 insertions, 58 deletions
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index e8354b2a1b..09be4372a8 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -253,6 +253,7 @@ void CommonCompilerTest::ReserveImageSpace() {
(size_t)100 * 1024 * 1024, // 100MB
PROT_NONE,
false /* no need for 4gb flag with fixed mmap*/,
+ false /* not reusing existing reservation */,
&error_msg));
CHECK(image_reservation_.get() != nullptr) << error_msg;
}
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 94268de077..9ab3602606 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -538,6 +538,8 @@ class ElfBuilder FINAL {
Elf_Word rodata_size,
Elf_Word text_relative_offset,
Elf_Word text_size,
+ Elf_Word bss_relative_offset,
+ Elf_Word bss_size,
const bool add_symbols,
bool debug = false)
: oat_writer_(oat_writer),
@@ -547,6 +549,7 @@ class ElfBuilder FINAL {
text_builder_(".text", text_size, text_relative_offset, SHT_PROGBITS,
SHF_ALLOC | SHF_EXECINSTR),
rodata_builder_(".rodata", rodata_size, rodata_relative_offset, SHT_PROGBITS, SHF_ALLOC),
+ bss_builder_(".bss", bss_size, bss_relative_offset, SHT_NOBITS, SHF_ALLOC),
dynsym_builder_(".dynsym", SHT_DYNSYM, ".dynstr", SHT_STRTAB, true),
symtab_builder_(".symtab", SHT_SYMTAB, ".strtab", SHT_STRTAB, false),
hash_builder_(".hash", SHT_HASH, SHF_ALLOC, &dynsym_builder_, 0, sizeof(Elf_Word),
@@ -569,6 +572,11 @@ class ElfBuilder FINAL {
}
bool Init() {
+ // Since the .text section of an oat file contains relative references to .rodata
+ // and (optionally) .bss, we keep these 2 or 3 sections together. This creates
+ // a non-traditional layout where the .bss section is mapped independently of the
+ // .dynamic section and needs its own program header with LOAD RW.
+ //
// The basic layout of the elf file. Order may be different in final output.
// +-------------------------+
// | Elf_Ehdr |
@@ -576,6 +584,7 @@ class ElfBuilder FINAL {
// | Elf_Phdr PHDR |
// | Elf_Phdr LOAD R | .dynsym .dynstr .hash .rodata
// | Elf_Phdr LOAD R X | .text
+ // | Elf_Phdr LOAD RW | .bss (Optional)
// | Elf_Phdr LOAD RW | .dynamic
// | Elf_Phdr DYNAMIC | .dynamic
// +-------------------------+
@@ -584,6 +593,8 @@ class ElfBuilder FINAL {
// | Elf_Sym oatdata |
// | Elf_Sym oatexec |
// | Elf_Sym oatlastword |
+ // | Elf_Sym oatbss | (Optional)
+ // | Elf_Sym oatbsslastword | (Optional)
// +-------------------------+
// | .dynstr |
// | \0 |
@@ -631,6 +642,7 @@ class ElfBuilder FINAL {
// | .hash\0 |
// | .rodata\0 |
// | .text\0 |
+ // | .bss\0 | (Optional)
// | .shstrtab\0 |
// | .symtab\0 | (Optional)
// | .strtab\0 | (Optional)
@@ -654,8 +666,9 @@ class ElfBuilder FINAL {
// | Elf_Shdr .dynsym |
// | Elf_Shdr .dynstr |
// | Elf_Shdr .hash |
- // | Elf_Shdr .text |
// | Elf_Shdr .rodata |
+ // | Elf_Shdr .text |
+ // | Elf_Shdr .bss | (Optional)
// | Elf_Shdr .dynamic |
// | Elf_Shdr .shstrtab |
// | Elf_Shdr .debug_info | (Optional)
@@ -694,8 +707,11 @@ class ElfBuilder FINAL {
program_headers_[PH_LOAD_R_X].p_type = PT_LOAD;
program_headers_[PH_LOAD_R_X].p_flags = PF_R | PF_X;
- program_headers_[PH_LOAD_RW_].p_type = PT_LOAD;
- program_headers_[PH_LOAD_RW_].p_flags = PF_R | PF_W;
+ program_headers_[PH_LOAD_RW_BSS].p_type = PT_LOAD;
+ program_headers_[PH_LOAD_RW_BSS].p_flags = PF_R | PF_W;
+
+ program_headers_[PH_LOAD_RW_DYNAMIC].p_type = PT_LOAD;
+ program_headers_[PH_LOAD_RW_DYNAMIC].p_flags = PF_R | PF_W;
program_headers_[PH_DYNAMIC].p_type = PT_DYNAMIC;
program_headers_[PH_DYNAMIC].p_flags = PF_R | PF_W;
@@ -760,6 +776,14 @@ class ElfBuilder FINAL {
text_builder_.SetSectionIndex(section_index_);
section_index_++;
+ // Setup .bss
+ if (bss_builder_.GetSize() != 0u) {
+ section_ptrs_.push_back(bss_builder_.GetSection());
+ AssignSectionStr(&bss_builder_, &shstrtab_);
+ bss_builder_.SetSectionIndex(section_index_);
+ section_index_++;
+ }
+
// Setup .dynamic
section_ptrs_.push_back(dynamic_builder_.GetSection());
AssignSectionStr(&dynamic_builder_, &shstrtab_);
@@ -820,10 +844,20 @@ class ElfBuilder FINAL {
CHECK_ALIGNED(rodata_builder_.GetSection()->sh_offset +
rodata_builder_.GetSection()->sh_size, kPageSize);
+ // Get the layout of the .bss section.
+ bss_builder_.GetSection()->sh_offset =
+ NextOffset<Elf_Word, Elf_Shdr>(*bss_builder_.GetSection(),
+ *text_builder_.GetSection());
+ bss_builder_.GetSection()->sh_addr = bss_builder_.GetSection()->sh_offset;
+ bss_builder_.GetSection()->sh_size = bss_builder_.GetSize();
+ bss_builder_.GetSection()->sh_link = bss_builder_.GetLink();
+
// Get the layout of the dynamic section.
- dynamic_builder_.GetSection()->sh_offset =
- NextOffset<Elf_Word, Elf_Shdr>(*dynamic_builder_.GetSection(), *text_builder_.GetSection());
- dynamic_builder_.GetSection()->sh_addr = dynamic_builder_.GetSection()->sh_offset;
+ CHECK(IsAlignedParam(bss_builder_.GetSection()->sh_offset,
+ dynamic_builder_.GetSection()->sh_addralign));
+ dynamic_builder_.GetSection()->sh_offset = bss_builder_.GetSection()->sh_offset;
+ dynamic_builder_.GetSection()->sh_addr =
+ NextOffset<Elf_Word, Elf_Shdr>(*dynamic_builder_.GetSection(), *bss_builder_.GetSection());
dynamic_builder_.GetSection()->sh_size = dynamic_builder_.GetSize() * sizeof(Elf_Dyn);
dynamic_builder_.GetSection()->sh_link = dynamic_builder_.GetLink();
@@ -987,16 +1021,23 @@ class ElfBuilder FINAL {
program_headers_[PH_LOAD_R_X].p_memsz = load_rx_size;
program_headers_[PH_LOAD_R_X].p_align = text_builder_.GetSection()->sh_addralign;
- program_headers_[PH_LOAD_RW_].p_offset = dynamic_builder_.GetSection()->sh_offset;
- program_headers_[PH_LOAD_RW_].p_vaddr = dynamic_builder_.GetSection()->sh_offset;
- program_headers_[PH_LOAD_RW_].p_paddr = dynamic_builder_.GetSection()->sh_offset;
- program_headers_[PH_LOAD_RW_].p_filesz = dynamic_builder_.GetSection()->sh_size;
- program_headers_[PH_LOAD_RW_].p_memsz = dynamic_builder_.GetSection()->sh_size;
- program_headers_[PH_LOAD_RW_].p_align = dynamic_builder_.GetSection()->sh_addralign;
+ program_headers_[PH_LOAD_RW_BSS].p_offset = bss_builder_.GetSection()->sh_offset;
+ program_headers_[PH_LOAD_RW_BSS].p_vaddr = bss_builder_.GetSection()->sh_offset;
+ program_headers_[PH_LOAD_RW_BSS].p_paddr = bss_builder_.GetSection()->sh_offset;
+ program_headers_[PH_LOAD_RW_BSS].p_filesz = 0;
+ program_headers_[PH_LOAD_RW_BSS].p_memsz = bss_builder_.GetSection()->sh_size;
+ program_headers_[PH_LOAD_RW_BSS].p_align = bss_builder_.GetSection()->sh_addralign;
+
+ program_headers_[PH_LOAD_RW_DYNAMIC].p_offset = dynamic_builder_.GetSection()->sh_offset;
+ program_headers_[PH_LOAD_RW_DYNAMIC].p_vaddr = dynamic_builder_.GetSection()->sh_addr;
+ program_headers_[PH_LOAD_RW_DYNAMIC].p_paddr = dynamic_builder_.GetSection()->sh_addr;
+ program_headers_[PH_LOAD_RW_DYNAMIC].p_filesz = dynamic_builder_.GetSection()->sh_size;
+ program_headers_[PH_LOAD_RW_DYNAMIC].p_memsz = dynamic_builder_.GetSection()->sh_size;
+ program_headers_[PH_LOAD_RW_DYNAMIC].p_align = dynamic_builder_.GetSection()->sh_addralign;
program_headers_[PH_DYNAMIC].p_offset = dynamic_builder_.GetSection()->sh_offset;
- program_headers_[PH_DYNAMIC].p_vaddr = dynamic_builder_.GetSection()->sh_offset;
- program_headers_[PH_DYNAMIC].p_paddr = dynamic_builder_.GetSection()->sh_offset;
+ program_headers_[PH_DYNAMIC].p_vaddr = dynamic_builder_.GetSection()->sh_addr;
+ program_headers_[PH_DYNAMIC].p_paddr = dynamic_builder_.GetSection()->sh_addr;
program_headers_[PH_DYNAMIC].p_filesz = dynamic_builder_.GetSection()->sh_size;
program_headers_[PH_DYNAMIC].p_memsz = dynamic_builder_.GetSection()->sh_size;
program_headers_[PH_DYNAMIC].p_align = dynamic_builder_.GetSection()->sh_addralign;
@@ -1004,15 +1045,29 @@ class ElfBuilder FINAL {
// Finish setup of the Ehdr values.
elf_header_.e_phoff = PHDR_OFFSET;
elf_header_.e_shoff = sections_offset;
- elf_header_.e_phnum = PH_NUM;
+ elf_header_.e_phnum = (bss_builder_.GetSection()->sh_size != 0u) ? PH_NUM : PH_NUM - 1;
elf_header_.e_shnum = section_ptrs_.size();
elf_header_.e_shstrndx = shstrtab_builder_.GetSectionIndex();
// Add the rest of the pieces to the list.
pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Elf Header", 0, &elf_header_,
sizeof(elf_header_)));
- pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET,
- &program_headers_, sizeof(program_headers_)));
+ if (bss_builder_.GetSection()->sh_size != 0u) {
+ pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET,
+ &program_headers_[0],
+ elf_header_.e_phnum * sizeof(Elf_Phdr)));
+ } else {
+ // Skip PH_LOAD_RW_BSS.
+ Elf_Word part1_size = PH_LOAD_RW_BSS * sizeof(Elf_Phdr);
+ Elf_Word part2_size = (PH_NUM - PH_LOAD_RW_BSS - 1) * sizeof(Elf_Phdr);
+ CHECK_EQ(part1_size + part2_size, elf_header_.e_phnum * sizeof(Elf_Phdr));
+ pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET,
+ &program_headers_[0], part1_size));
+ pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers part 2",
+ PHDR_OFFSET + part1_size,
+ &program_headers_[PH_LOAD_RW_BSS + 1],
+ part2_size));
+ }
pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynamic",
dynamic_builder_.GetSection()->sh_offset,
dynamic.data(),
@@ -1175,6 +1230,12 @@ class ElfBuilder FINAL {
text_builder_.GetSize(), STB_GLOBAL, STT_OBJECT);
dynsym_builder_.AddSymbol("oatlastword", &text_builder_, text_builder_.GetSize() - 4,
true, 4, STB_GLOBAL, STT_OBJECT);
+ if (bss_builder_.GetSize() != 0u) {
+ dynsym_builder_.AddSymbol("oatbss", &bss_builder_, 0, true,
+ bss_builder_.GetSize(), STB_GLOBAL, STT_OBJECT);
+ dynsym_builder_.AddSymbol("oatbsslastword", &bss_builder_, bss_builder_.GetSize() - 4,
+ true, 4, STB_GLOBAL, STT_OBJECT);
+ }
}
void AssignSectionStr(ElfSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr>* builder,
@@ -1213,12 +1274,13 @@ class ElfBuilder FINAL {
// What phdr is.
static const uint32_t PHDR_OFFSET = sizeof(Elf_Ehdr);
enum : uint8_t {
- PH_PHDR = 0,
- PH_LOAD_R__ = 1,
- PH_LOAD_R_X = 2,
- PH_LOAD_RW_ = 3,
- PH_DYNAMIC = 4,
- PH_NUM = 5,
+ PH_PHDR = 0,
+ PH_LOAD_R__ = 1,
+ PH_LOAD_R_X = 2,
+ PH_LOAD_RW_BSS = 3,
+ PH_LOAD_RW_DYNAMIC = 4,
+ PH_DYNAMIC = 5,
+ PH_NUM = 6,
};
static const uint32_t PHDR_SIZE = sizeof(Elf_Phdr) * PH_NUM;
Elf_Phdr program_headers_[PH_NUM];
@@ -1236,6 +1298,7 @@ class ElfBuilder FINAL {
ElfOatSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> text_builder_;
ElfOatSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> rodata_builder_;
+ ElfOatSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> bss_builder_;
ElfSymtabBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Shdr> dynsym_builder_;
ElfSymtabBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Shdr> symtab_builder_;
ElfSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> hash_builder_;
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 401d5a951d..a822b24cde 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -229,6 +229,7 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
const OatHeader& oat_header = oat_writer->GetOatHeader();
Elf_Word oat_data_size = oat_header.GetExecutableOffset();
uint32_t oat_exec_size = oat_writer->GetSize() - oat_data_size;
+ uint32_t oat_bss_size = oat_writer->GetBssSize();
OatWriterWrapper wrapper(oat_writer);
@@ -243,6 +244,8 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
oat_data_size,
oat_data_size,
oat_exec_size,
+ RoundUp(oat_data_size + oat_exec_size, kPageSize),
+ oat_bss_size,
compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols(),
debug));
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index c588e1a53d..f5f9320532 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -408,8 +408,8 @@ ImageWriter::BinSlot ImageWriter::GetImageBinSlot(mirror::Object* object) const
bool ImageWriter::AllocMemory() {
size_t length = RoundUp(Runtime::Current()->GetHeap()->GetTotalMemory(), kPageSize);
std::string error_msg;
- image_.reset(MemMap::MapAnonymous("image writer image", NULL, length, PROT_READ | PROT_WRITE,
- false, &error_msg));
+ image_.reset(MemMap::MapAnonymous("image writer image", nullptr, length, PROT_READ | PROT_WRITE,
+ false, false, &error_msg));
if (UNLIKELY(image_.get() == nullptr)) {
LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg;
return false;
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 257739134b..0283791e28 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -76,7 +76,7 @@ JitCompiler::JitCompiler() : total_time_(0) {
false,
false,
false,
- true, // pic
+ false, // pic
nullptr,
pass_manager_options,
nullptr));
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 841109105d..c32a99226b 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -403,6 +403,7 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
image_writer_(image_writer),
dex_files_(&dex_files),
size_(0u),
+ bss_size_(0u),
oat_data_offset_(0u),
image_file_location_oat_checksum_(image_file_location_oat_checksum),
image_file_location_oat_begin_(image_file_location_oat_begin),
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 980611fe96..fd2ccae4a5 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -96,6 +96,10 @@ class OatWriter {
return size_;
}
+ size_t GetBssSize() const {
+ return bss_size_;
+ }
+
const std::vector<uintptr_t>& GetAbsolutePatchLocations() const {
return absolute_patch_locations_;
}
@@ -266,6 +270,9 @@ class OatWriter {
// Size required for Oat data structures.
size_t size_;
+ // The size of the required .bss section holding the DexCache data.
+ size_t bss_size_;
+
// Offset of the oat data from the start of the mmapped region of the elf file.
size_t oat_data_offset_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 2a57fdc929..ba5f7d8fab 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -386,7 +386,9 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph,
compiler_options);
}
case kArm64: {
- return new arm64::CodeGeneratorARM64(graph, compiler_options);
+ return new arm64::CodeGeneratorARM64(graph,
+ *isa_features.AsArm64InstructionSetFeatures(),
+ compiler_options);
}
case kMips:
return nullptr;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 729bab78a6..c21084a6fe 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm64.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
#include "common_arm64.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
@@ -397,7 +398,9 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
return next_location;
}
-CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options)
+CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
+ const Arm64InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
: CodeGenerator(graph,
kNumberOfAllocatableRegisters,
kNumberOfAllocatableFPRegisters,
@@ -408,7 +411,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& com
block_labels_(nullptr),
location_builder_(graph, this),
instruction_visitor_(graph, this),
- move_resolver_(graph->GetArena(), this) {
+ move_resolver_(graph->GetArena(), this),
+ isa_features_(isa_features) {
// Save the link register (containing the return address) to mimic Quick.
AddAllocatedRegister(LocationFrom(lr));
}
@@ -998,9 +1002,10 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireW();
size_t status_offset = mirror::Class::StatusOffset().SizeValue();
+ bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
// Even if the initialized flag is set, we need to ensure consistent memory ordering.
- if (kUseAcquireRelease) {
+ if (use_acquire_release) {
// TODO(vixl): Let the MacroAssembler handle MemOperand.
__ Add(temp, class_reg, status_offset);
__ Ldar(temp, HeapOperand(temp));
@@ -1689,9 +1694,10 @@ void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction
void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
+ bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
if (instruction->IsVolatile()) {
- if (kUseAcquireRelease) {
+ if (use_acquire_release) {
// NB: LoadAcquire will record the pc info if needed.
codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
} else {
@@ -1718,9 +1724,10 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins
CPURegister value = InputCPURegisterAt(instruction, 1);
Offset offset = instruction->GetFieldOffset();
Primitive::Type field_type = instruction->GetFieldType();
+ bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
if (instruction->IsVolatile()) {
- if (kUseAcquireRelease) {
+ if (use_acquire_release) {
codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
} else {
@@ -2437,9 +2444,10 @@ void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
+ bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
if (instruction->IsVolatile()) {
- if (kUseAcquireRelease) {
+ if (use_acquire_release) {
// NB: LoadAcquire will record the pc info if needed.
codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
} else {
@@ -2464,9 +2472,10 @@ void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruc
CPURegister value = InputCPURegisterAt(instruction, 1);
Offset offset = instruction->GetFieldOffset();
Primitive::Type field_type = instruction->GetFieldType();
+ bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
if (instruction->IsVolatile()) {
- if (kUseAcquireRelease) {
+ if (use_acquire_release) {
codegen_->StoreRelease(field_type, value, HeapOperand(cls, offset));
} else {
GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index afb7fc3718..48961d68e9 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -32,10 +32,6 @@ namespace arm64 {
class CodeGeneratorARM64;
-// TODO: Tune the use of Load-Acquire, Store-Release vs Data Memory Barriers.
-// For now we prefer the use of load-acquire, store-release over explicit memory barriers.
-static constexpr bool kUseAcquireRelease = true;
-
// Use a local definition to prevent copying mistakes.
static constexpr size_t kArm64WordSize = kArm64PointerSize;
@@ -195,7 +191,9 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolver {
class CodeGeneratorARM64 : public CodeGenerator {
public:
- CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options);
+ CodeGeneratorARM64(HGraph* graph,
+ const Arm64InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options);
virtual ~CodeGeneratorARM64() {}
void GenerateFrameEntry() OVERRIDE;
@@ -273,6 +271,10 @@ class CodeGeneratorARM64 : public CodeGenerator {
return InstructionSet::kArm64;
}
+ const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const {
+ return isa_features_;
+ }
+
void Initialize() OVERRIDE {
HGraph* graph = GetGraph();
int length = graph->GetBlocks().Size();
@@ -317,6 +319,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
InstructionCodeGeneratorARM64 instruction_visitor_;
ParallelMoveResolverARM64 move_resolver_;
Arm64Assembler assembler_;
+ const Arm64InstructionSetFeatures& isa_features_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
};
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index e0e0b4c3e8..868fc5b867 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -18,6 +18,7 @@
#include "arch/instruction_set.h"
#include "arch/arm/instruction_set_features_arm.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
#include "base/macros.h"
#include "builder.h"
#include "code_generator_arm.h"
@@ -115,9 +116,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
Run(allocator, codegenX86, has_result, expected);
}
- std::unique_ptr<const ArmInstructionSetFeatures> features(
+ std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
ArmInstructionSetFeatures::FromCppDefines());
- TestCodeGeneratorARM codegenARM(graph, *features.get(), compiler_options);
+ TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
codegenARM.CompileBaseline(&allocator, true);
if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
Run(allocator, codegenARM, has_result, expected);
@@ -129,7 +130,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
Run(allocator, codegenX86_64, has_result, expected);
}
- arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options);
+ std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
+ Arm64InstructionSetFeatures::FromCppDefines());
+ arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
codegenARM64.CompileBaseline(&allocator, true);
if (kRuntimeISA == kArm64) {
Run(allocator, codegenARM64, has_result, expected);
@@ -166,7 +169,9 @@ static void RunCodeOptimized(HGraph* graph,
compiler_options);
RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kArm64) {
- arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options);
+ arm64::CodeGeneratorARM64 codegenARM64(graph,
+ *Arm64InstructionSetFeatures::FromCppDefines(),
+ compiler_options);
RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kX86) {
x86::CodeGeneratorX86 codegenX86(graph, compiler_options);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 8874edc341..1ddff8a125 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -16,6 +16,7 @@
#include "intrinsics_arm64.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
#include "code_generator_arm64.h"
#include "common_arm64.h"
#include "entrypoints/quick/quick_entrypoints.h"
@@ -682,10 +683,11 @@ static void GenUnsafeGet(HInvoke* invoke,
Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
Register trg = RegisterFrom(locations->Out(), type);
+ bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
MemOperand mem_op(base.X(), offset);
if (is_volatile) {
- if (kUseAcquireRelease) {
+ if (use_acquire_release) {
codegen->LoadAcquire(invoke, trg, mem_op);
} else {
codegen->Load(type, trg, mem_op);
@@ -792,11 +794,12 @@ static void GenUnsafePut(LocationSummary* locations,
Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
Register value = RegisterFrom(locations->InAt(3), type);
+ bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
MemOperand mem_op(base.X(), offset);
if (is_volatile || is_ordered) {
- if (kUseAcquireRelease) {
+ if (use_acquire_release) {
codegen->StoreRelease(type, value, mem_op);
} else {
__ Dmb(InnerShareable, BarrierAll);
@@ -856,10 +859,7 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
}
static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
- // TODO: Currently we use acquire-release load-stores in the CAS loop. One could reasonably write
- // a version relying on simple exclusive load-stores and barriers instead.
- static_assert(kUseAcquireRelease, "Non-acquire-release inlined CAS not implemented, yet.");
-
+ bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
Register out = WRegisterFrom(locations->Out()); // Boolean result.
@@ -889,15 +889,23 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// result = tmp_value != 0;
vixl::Label loop_head, exit_loop;
- __ Bind(&loop_head);
-
- __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
- __ Cmp(tmp_value, expected);
- __ B(&exit_loop, ne);
-
- __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
- __ Cbnz(tmp_32, &loop_head);
-
+ if (use_acquire_release) {
+ __ Bind(&loop_head);
+ __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
+ __ Cmp(tmp_value, expected);
+ __ B(&exit_loop, ne);
+ __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
+ __ Cbnz(tmp_32, &loop_head);
+ } else {
+ __ Dmb(InnerShareable, BarrierWrites);
+ __ Bind(&loop_head);
+ __ Ldxr(tmp_value, MemOperand(tmp_ptr));
+ __ Cmp(tmp_value, expected);
+ __ B(&exit_loop, ne);
+ __ Stxr(tmp_32, value, MemOperand(tmp_ptr));
+ __ Cbnz(tmp_32, &loop_head);
+ __ Dmb(InnerShareable, BarrierAll);
+ }
__ Bind(&exit_loop);
__ Cset(out, eq);
}