summaryrefslogtreecommitdiffstats
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.mk3
-rw-r--r--compiler/arch/arm/entrypoint_trampoline.cc33
-rw-r--r--compiler/arch/arm/final_relocations_arm.cc88
-rw-r--r--compiler/arch/arm/final_relocations_arm.h39
-rw-r--r--compiler/compiled_method.cc17
-rw-r--r--compiler/compiled_method.h19
-rw-r--r--compiler/dex/compiler_enums.h2
-rw-r--r--compiler/dex/quick/arm/arm_lir.h1
-rw-r--r--compiler/dex/quick/arm/assemble_arm.cc19
-rw-r--r--compiler/dex/quick/arm/call_arm.cc31
-rw-r--r--compiler/dex/quick/arm/codegen_arm.h5
-rw-r--r--compiler/dex/quick/arm/utility_arm.cc10
-rw-r--r--compiler/dex/quick/codegen_util.cc17
-rw-r--r--compiler/dex/quick/gen_common.cc19
-rw-r--r--compiler/dex/quick/gen_invoke.cc17
-rw-r--r--compiler/dex/quick/mir_to_lir.h19
-rw-r--r--compiler/dex/quick/x86/call_x86.cc14
-rw-r--r--compiler/dex/quick/x86/codegen_x86.h5
-rw-r--r--compiler/driver/compiler_driver.cc39
-rw-r--r--compiler/driver/compiler_driver.h111
-rw-r--r--compiler/driver/compiler_options.h14
-rw-r--r--compiler/final_relocations.cc32
-rw-r--r--compiler/final_relocations.h89
-rw-r--r--compiler/oat_writer.cc88
-rw-r--r--compiler/oat_writer.h22
-rw-r--r--compiler/optimizing/optimizing_compiler.cc1
26 files changed, 715 insertions, 39 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index b17cd52fad..29d9cc6c1d 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -19,6 +19,8 @@ LOCAL_PATH := $(call my-dir)
include art/build/Android.common.mk
LIBART_COMPILER_SRC_FILES := \
+ arch/arm/final_relocations_arm.cc \
+ arch/arm/entrypoint_trampoline.cc \
compiled_method.cc \
dex/local_value_numbering.cc \
dex/quick/arm/assemble_arm.cc \
@@ -65,6 +67,7 @@ LIBART_COMPILER_SRC_FILES := \
dex/ssa_transformation.cc \
driver/compiler_driver.cc \
driver/dex_compilation_unit.cc \
+ final_relocations.cc \
jni/quick/arm/calling_convention_arm.cc \
jni/quick/arm64/calling_convention_arm64.cc \
jni/quick/mips/calling_convention_mips.cc \
diff --git a/compiler/arch/arm/entrypoint_trampoline.cc b/compiler/arch/arm/entrypoint_trampoline.cc
new file mode 100644
index 0000000000..9ff9961f8d
--- /dev/null
+++ b/compiler/arch/arm/entrypoint_trampoline.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "driver/compiler_driver.h"
+
+namespace art {
+ void CompilerDriver::BuildArmEntrypointTrampolineCall(ThreadOffset<4> thread_offset) {
+ // Thumb2 instruction encoding of:
+ // ldr pc,[r9,#offset]
+
+ // TODO: we don't currently have a Thumb2 assembler, when we do use that
+ // in preference to the hand generated code below.
+ uint32_t offset = thread_offset.Uint32Value();
+ uint32_t instruction = 0xf8d0f000 | (9 << 16) | (offset & 0xfff);
+ entrypoint_trampoline_code_.push_back((instruction >> 16) & 0xff);
+ entrypoint_trampoline_code_.push_back((instruction >> 24) & 0xff);
+ entrypoint_trampoline_code_.push_back((instruction >> 0) & 0xff);
+ entrypoint_trampoline_code_.push_back((instruction >> 8) & 0xff);
+ }
+} // namespace art
diff --git a/compiler/arch/arm/final_relocations_arm.cc b/compiler/arch/arm/final_relocations_arm.cc
new file mode 100644
index 0000000000..e95f3e2e43
--- /dev/null
+++ b/compiler/arch/arm/final_relocations_arm.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arch/arm/final_relocations_arm.h"
+#include "compiled_method.h"
+#include "driver/compiler_driver.h"
+#include "oat_writer.h"
+#include "dex/compiler_ir.h"
+
+namespace art {
+
+void FinalEntrypointRelocationSetArm::Apply(uint8_t* code, const OatWriter* writer,
+ uint32_t address) const {
+ uint32_t island_offset = writer->GetCurrentTrampolineIslandOffset();
+ const bool kDebugPrint = false;
+
+ for (auto& reloc : relocations_) {
+ switch (reloc.type_) {
+ case kRelocationCall: {
+ // Fetch the instruction. This is two 16 bit words. We can't do a 32 bit load
+ // because it's not guaranteed to be 4-byte aligned.
+ uint32_t inst = static_cast<uint32_t>(
+ *reinterpret_cast<uint16_t*>(code + reloc.code_offset_) << 16
+ | *reinterpret_cast<uint16_t*>(code + reloc.code_offset_ + 2));
+
+ // Check that we are trying to relocate a Thumb2 BL instruction.
+ CHECK_EQ(inst, 0xf000d000);
+
+ uint32_t pc = address + reloc.code_offset_ + 4; // Thumb PC is instruction + 4
+
+ // The trampoline target is to a table starting at the island. Each trampoline
+ // entry is 4 bytes long.
+ uint32_t target = island_offset + static_cast<uint32_t>(reloc.value_) * 4;
+ int32_t delta = target - pc;
+
+
+ if (kDebugPrint) {
+ LOG(INFO) << "applying final relocation for island " << island_offset;
+ LOG(INFO) << "pc: " << std::hex << pc << ", target: " << target <<
+ ", reloc.value: " << reloc.value_ << ", delta: " << delta;
+ }
+
+ // Max range for a Thumb2 BL is 16MB. All calls will be to a lower address.
+ const int32_t kMaxRange = -16 * static_cast<int32_t>(MB);
+ CHECK_LT(delta, 0);
+ CHECK_GT(delta, kMaxRange);
+
+ // Modify the instruction using the T1 BL instruction format.
+ // This is equivalent of a R_ARM_THM_CALL ELF relocation.
+ delta >>= 1; // Low bit is implicit.
+ uint32_t signbit = (delta >> 31) & 0x1;
+ uint32_t i1 = (delta >> 22) & 0x1;
+ uint32_t i2 = (delta >> 21) & 0x1;
+ uint32_t imm10 = (delta >> 11) & 0x03ff;
+ uint32_t imm11 = delta & 0x07ff;
+ uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
+ uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
+ uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
+ imm11;
+ inst |= value;
+
+ // Write the instruction back. High 16 bits first, little endian format.
+ uint32_t offset = reloc.code_offset_;
+ code[offset+0] = (inst >> 16) & 0xff;
+ code[offset+1] = (inst >> 24) & 0xff;
+ code[offset+2] = (inst >> 0) & 0xff;
+ code[offset+3] = (inst >> 8) & 0xff;
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unknown entrypoint relocation type " << reloc.type_;
+ }
+ }
+}
+} // namespace art
diff --git a/compiler/arch/arm/final_relocations_arm.h b/compiler/arch/arm/final_relocations_arm.h
new file mode 100644
index 0000000000..656c181084
--- /dev/null
+++ b/compiler/arch/arm/final_relocations_arm.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_ARCH_ARM_FINAL_RELOCATIONS_ARM_H_
+#define ART_COMPILER_ARCH_ARM_FINAL_RELOCATIONS_ARM_H_
+
+// ARM final relocations
+
+#include "final_relocations.h"
+
+namespace art {
+
+class CompilerDriver;
+struct CompilationUnit;
+
+class FinalEntrypointRelocationSetArm : public FinalEntrypointRelocationSet {
+ public:
+ explicit FinalEntrypointRelocationSetArm(const CompilerDriver* driver) : FinalEntrypointRelocationSet(driver) {}
+ ~FinalEntrypointRelocationSetArm() {}
+
+ void Apply(uint8_t* code, const OatWriter* writer, uint32_t address) const;
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_ARCH_ARM_FINAL_RELOCATIONS_ARM_H_
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 8e013c1ece..4eb1f7a7ce 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -16,20 +16,23 @@
#include "compiled_method.h"
#include "driver/compiler_driver.h"
+#include "oat_writer.h"
+#include "dex/compiler_ir.h"
namespace art {
CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
- const std::vector<uint8_t>& quick_code)
+ const std::vector<uint8_t>& quick_code,
+ const FinalRelocations* relocs)
: compiler_driver_(compiler_driver), instruction_set_(instruction_set),
- portable_code_(nullptr), quick_code_(nullptr) {
+ portable_code_(nullptr), quick_code_(nullptr), final_relocations_(relocs) {
SetCode(&quick_code, nullptr);
}
CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
const std::string& elf_object, const std::string& symbol)
: compiler_driver_(compiler_driver), instruction_set_(instruction_set),
- portable_code_(nullptr), quick_code_(nullptr), symbol_(symbol) {
+ portable_code_(nullptr), quick_code_(nullptr), symbol_(symbol), final_relocations_(nullptr) {
CHECK_NE(elf_object.size(), 0U);
CHECK_NE(symbol.size(), 0U);
std::vector<uint8_t> temp_code(elf_object.size());
@@ -161,8 +164,9 @@ CompiledMethod::CompiledMethod(CompilerDriver& driver,
const std::vector<uint8_t>& mapping_table,
const std::vector<uint8_t>& vmap_table,
const std::vector<uint8_t>& native_gc_map,
- const std::vector<uint8_t>* cfi_info)
- : CompiledCode(&driver, instruction_set, quick_code), frame_size_in_bytes_(frame_size_in_bytes),
+ const std::vector<uint8_t>* cfi_info,
+ const FinalRelocations* relocs)
+ : CompiledCode(&driver, instruction_set, quick_code, relocs), frame_size_in_bytes_(frame_size_in_bytes),
core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
mapping_table_(driver.DeduplicateMappingTable(mapping_table)),
vmap_table_(driver.DeduplicateVMapTable(vmap_table)),
@@ -176,7 +180,7 @@ CompiledMethod::CompiledMethod(CompilerDriver& driver,
const size_t frame_size_in_bytes,
const uint32_t core_spill_mask,
const uint32_t fp_spill_mask)
- : CompiledCode(&driver, instruction_set, code),
+ : CompiledCode(&driver, instruction_set, code, nullptr),
frame_size_in_bytes_(frame_size_in_bytes),
core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
mapping_table_(driver.DeduplicateMappingTable(std::vector<uint8_t>())),
@@ -205,5 +209,4 @@ CompiledMethod::CompiledMethod(CompilerDriver& driver, InstructionSet instructio
vmap_table_ = driver.DeduplicateVMapTable(std::vector<uint8_t>());
gc_map_ = driver.DeduplicateGCMap(std::vector<uint8_t>());
}
-
} // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 90ae6eeae8..d9edc6bd55 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -23,6 +23,7 @@
#include "instruction_set.h"
#include "utils.h"
#include "UniquePtr.h"
+#include "final_relocations.h"
namespace llvm {
class Function;
@@ -31,12 +32,14 @@ namespace llvm {
namespace art {
class CompilerDriver;
+class OatWriter;
class CompiledCode {
public:
// For Quick to supply an code blob
CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
- const std::vector<uint8_t>& quick_code);
+ const std::vector<uint8_t>& quick_code,
+ const FinalRelocations* relocations);
// For Portable to supply an ELF object
CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
@@ -78,6 +81,13 @@ class CompiledCode {
const std::vector<uint32_t>& GetOatdataOffsetsToCompliledCodeOffset() const;
void AddOatdataOffsetToCompliledCodeOffset(uint32_t offset);
+ // Apply all the final relocations to the quick code sequence.
+ void ApplyFinalRelocations(const OatWriter *writer, uint32_t address) {
+ if (final_relocations_.get() != nullptr) {
+ final_relocations_->Apply(&(*quick_code_)[0], writer, address);
+ }
+ }
+
private:
CompilerDriver* const compiler_driver_;
@@ -97,6 +107,10 @@ class CompiledCode {
// OatWriter and then used by the ElfWriter to add relocations so
// that MCLinker can update the values to the location in the linked .so.
std::vector<uint32_t> oatdata_offsets_to_compiled_code_offset_;
+
+ // Set of relocations to apply as the final pass. This happens
+ // only when the the final oat file addresses are known.
+ UniquePtr<const FinalRelocations> final_relocations_;
};
class CompiledMethod : public CompiledCode {
@@ -111,7 +125,8 @@ class CompiledMethod : public CompiledCode {
const std::vector<uint8_t>& mapping_table,
const std::vector<uint8_t>& vmap_table,
const std::vector<uint8_t>& native_gc_map,
- const std::vector<uint8_t>* cfi_info);
+ const std::vector<uint8_t>* cfi_info,
+ const FinalRelocations* relocations);
// Constructs a CompiledMethod for the QuickJniCompiler.
CompiledMethod(CompilerDriver& driver,
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 6c8c85d16d..a3ce4f9c54 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -430,10 +430,12 @@ enum FixupKind {
kFixupT2Branch, // Thumb2 Unconditional branch
kFixupBlx1, // Blx1 (start of Blx1/Blx2 pair).
kFixupBl1, // Bl1 (start of Bl1/Bl2 pair).
+ kFixup2Bl1, // Thumb2 Bl1 (start of Bl1/Bl2 pair).
kFixupAdr, // Adr.
kFixupMovImmLST, // kThumb2MovImm16LST.
kFixupMovImmHST, // kThumb2MovImm16HST.
kFixupAlign4, // Align to 4-byte boundary.
+ kFixupTrampCall, // Call into trampoline for runtime helper.
};
std::ostream& operator<<(std::ostream& os, const FixupKind& kind);
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index c9acd66bba..8c5c6c5fca 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -496,6 +496,7 @@ enum ArmOpcode {
kThumb2LdrdPcRel8, // ldrd rt, rt2, pc +-/1024.
kThumb2LdrdI8, // ldrd rt, rt2, [rn +-/1024].
kThumb2StrdI8, // strd rt, rt2, [rn +-/1024].
+ kThumb2BlTramp, // Thumb2 BL to trampoline
kArmLast,
};
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index f77b0a6302..151f3c7b33 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1035,6 +1035,11 @@ const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = {
kFmtBitBlt, 7, 0,
IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE,
"strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
+ ENCODING_MAP(kThumb2BlTramp, 0xf000d000,
+ kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1,
+ IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+ "bl", "!0t", 4, kFixupTrampCall),
};
// new_lir replaces orig_lir in the pcrel_fixup list.
@@ -1224,6 +1229,7 @@ void ArmMir2Lir::AssembleLIR() {
while (true) {
offset_adjustment = 0;
AssemblerStatus res = kSuccess; // Assume success
+
generation ^= 1;
// Note: nodes requring possible fixup linked in ascending order.
lir = first_fixup_;
@@ -1577,6 +1583,17 @@ void ArmMir2Lir::AssembleLIR() {
}
break;
}
+ case kFixupTrampCall: {
+ // This is a call to a trampoline. The value for the trampoline call needs
+ // both the offset into the code and the trampoline to call. It will be
+ // added to the list of calls when we actually insert this instruction into
+ // the code_buffer (when we have a stable instruction stream).
+ uint32_t instoffset = lir->offset;
+ // LOG(INFO) << "adding trampoline call: offset: " << instoffset <<
+ // " entrypoint: " << lir->operands[0];
+ trampoline_calls_.push_back(TrampolineCall(instoffset, lir->operands[0]));
+ break;
+ }
default:
LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
}
@@ -1595,6 +1612,7 @@ void ArmMir2Lir::AssembleLIR() {
starting_offset += offset_adjustment;
data_offset_ = (starting_offset + 0x3) & ~0x3;
AssignDataOffsets();
+ trampoline_calls_.clear(); // These are invalid now.
}
}
@@ -1675,5 +1693,4 @@ void ArmMir2Lir::AssignDataOffsets() {
total_size_ = AssignFillArrayDataOffset(offset);
}
-
} // namespace art
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index d0d0e6b3a7..1b485a3d34 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -19,6 +19,7 @@
#include "arm_lir.h"
#include "codegen_arm.h"
#include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
namespace art {
@@ -468,4 +469,34 @@ void ArmMir2Lir::GenSpecialExitSequence() {
NewLIR1(kThumbBx, rARM_LR);
}
+// Entrypoint calls.
+RegStorage ArmMir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
+ const CompilerOptions& compiler_options = cu_->compiler_driver->GetCompilerOptions();
+ if (compiler_options.GenerateHelperTrampolines()) {
+ return RegStorage::InvalidReg();
+ } else {
+ return LoadHelper(helper_offset);
+ }
+}
+
+LIR* ArmMir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+ bool use_link) {
+ LIR* call_inst = nullptr;
+ if (use_link) {
+ const CompilerOptions& compiler_options = cu_->compiler_driver->GetCompilerOptions();
+ if (compiler_options.GenerateHelperTrampolines()) {
+ call_inst = OpThreadMem(kOpBlx, helper_offset);
+ } else {
+ call_inst = OpReg(kOpBlx, r_tgt);
+ FreeTemp(r_tgt);
+ }
+ } else {
+ call_inst = OpReg(kOpBx, r_tgt);
+ FreeTemp(r_tgt);
+ }
+ if (safepoint_pc) {
+ MarkSafepointPC(call_inst);
+ }
+ return call_inst;
+}
} // namespace art
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 13fa6353b0..3c0aa03d5b 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -195,6 +195,11 @@ class ArmMir2Lir FINAL : public Mir2Lir {
bool InexpensiveConstantLong(int64_t value);
bool InexpensiveConstantDouble(int64_t value);
+ // Entrypoint calls.
+ RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
+ LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset,
+ bool safepoint_pc, bool use_link);
+
private:
void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
ConditionCode ccode);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 70cbdd2e31..8e6d9a8ef8 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -1111,8 +1111,14 @@ LIR* ArmMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
}
LIR* ArmMir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
- LOG(FATAL) << "Unexpected use of OpThreadMem for Arm";
- return NULL;
+ if (op == kOpBlx) {
+ const uint32_t trampoline = cu_->compiler_driver->AddEntrypointTrampoline(
+ thread_offset.Int32Value());
+ return NewLIR1(kThumb2BlTramp, trampoline);
+ } else {
+ LOG(FATAL) << "Invalid opcode for ARM OpThreadMem on Arm";
+ return NULL;
+ }
}
LIR* ArmMir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 6e6b8f0a30..b163ef7e06 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1012,11 +1012,26 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() {
vmap_encoder.PushBackUnsigned(0u); // Size is 0.
}
+ // All relocations
+ UniquePtr<FinalRelocations> all_relocs(new FinalRelocations());
+
+ // Build the final relocations for this method.
+ if (trampoline_calls_.size() != 0) {
+ FinalEntrypointRelocationSet* ep_relocs =
+ cu_->compiler_driver->AllocateFinalEntrypointRelocationSet(cu_);
+ for (size_t i = 0 ; i < trampoline_calls_.size(); ++i) {
+ const TrampolineCall& call = trampoline_calls_[i];
+ ep_relocs->Add(call.code_offset_, call.trampoline_offset_);
+ }
+ all_relocs->push_back(ep_relocs);
+ }
+
UniquePtr<std::vector<uint8_t> > cfi_info(ReturnCallFrameInformation());
CompiledMethod* result =
new CompiledMethod(*cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
core_spill_mask_, fp_spill_mask_, encoded_mapping_table_,
- vmap_encoder.GetData(), native_gc_map_, cfi_info.get());
+ vmap_encoder.GetData(), native_gc_map_, cfi_info.get(),
+ all_relocs.release());
return result;
}
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index a3fb4201db..4e32931670 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -17,6 +17,7 @@
#include "dex/compiler_internals.h"
#include "dex/quick/arm/arm_lir.h"
#include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "mirror/array.h"
#include "mirror/object-inl.h"
@@ -950,12 +951,20 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) {
void Compile() {
GenerateTargetLabel();
- RegStorage r_tgt = m2l_->CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pResolveString));
+ const CompilerOptions& compiler_options =
+ m2l_->cu_->compiler_driver->GetCompilerOptions();
+ if (compiler_options.GenerateHelperTrampolines()) {
+ m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_);
+ m2l_->CallHelper(RegStorage::InvalidReg(), QUICK_ENTRYPOINT_OFFSET(4, pResolveString),
+ true);
+ } else {
+ RegStorage r_tgt = m2l_->CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pResolveString));
- m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_); // .eq
- LIR* call_inst = m2l_->OpReg(kOpBlx, r_tgt);
- m2l_->MarkSafepointPC(call_inst);
- m2l_->FreeTemp(r_tgt);
+ m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_);
+ LIR* call_inst = m2l_->OpReg(kOpBlx, r_tgt);
+ m2l_->MarkSafepointPC(call_inst);
+ m2l_->FreeTemp(r_tgt);
+ }
m2l_->OpUnconditionalBranch(cont_);
}
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 396a709994..fee15d7b3a 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -19,6 +19,7 @@
#include "dex/quick/dex_file_method_inliner.h"
#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "dex_file-inl.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "invoke_type.h"
#include "mirror/array.h"
@@ -62,25 +63,19 @@ void Mir2Lir::AddIntrinsicLaunchpad(CallInfo* info, LIR* branch, LIR* resume) {
/*
* To save scheduling time, helper calls are broken into two parts: generation of
- * the helper target address, and the actual call to the helper. Because x86
- * has a memory call operation, part 1 is a NOP for x86. For other targets,
- * load arguments between the two parts.
+ * the helper target address, and the actual call to the helper.
+ * These functions can be overridden by architecture specific codegen.
*/
RegStorage Mir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
- return (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) ? RegStorage::InvalidReg() : LoadHelper(helper_offset);
+ return LoadHelper(helper_offset);
}
/* NOTE: if r_tgt is a temp, it will be freed following use */
LIR* Mir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
bool use_link) {
- LIR* call_inst;
OpKind op = use_link ? kOpBlx : kOpBx;
- if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
- call_inst = OpThreadMem(op, helper_offset);
- } else {
- call_inst = OpReg(op, r_tgt);
- FreeTemp(r_tgt);
- }
+ LIR* call_inst = OpReg(op, r_tgt);
+ FreeTemp(r_tgt);
if (safepoint_pc) {
MarkSafepointPC(call_inst);
}
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 35f948e083..cecb01b768 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -328,6 +328,18 @@ class Mir2Lir : public Backend {
LIR* const cont_;
};
+ // This holds the data for a call to a trampoline. An instruction is making a call
+ // to something through a trampoline and this holds the offset into the code containing
+ // the instruction, and which trampoline offset to call.
+ struct TrampolineCall {
+ TrampolineCall(uint32_t code_offset, uint32_t trampoline_offset) : code_offset_(code_offset),
+ trampoline_offset_(trampoline_offset) {
+ }
+
+ uint32_t code_offset_; // Offset of instruction in method code stream (bytes).
+ uint32_t trampoline_offset_; // Which trampoline to call.
+ };
+
virtual ~Mir2Lir() {}
int32_t s4FromSwitchData(const void* switch_data) {
@@ -614,11 +626,11 @@ class Mir2Lir : public Backend {
virtual void GenConstWide(RegLocation rl_dest, int64_t value);
virtual void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
RegLocation rl_src1, RegLocation rl_src2);
+ virtual LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+ bool use_link = true);
+ virtual RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
// Shared by all targets - implemented in gen_invoke.cc.
- LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
- bool use_link = true);
- RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
void CallRuntimeHelperImm(ThreadOffset<4> helper_offset, int arg0, bool safepoint_pc);
void CallRuntimeHelperReg(ThreadOffset<4> helper_offset, RegStorage arg0, bool safepoint_pc);
void CallRuntimeHelperRegLocation(ThreadOffset<4> helper_offset, RegLocation arg0,
@@ -1277,6 +1289,7 @@ class Mir2Lir : public Backend {
LIR* last_lir_insn_;
GrowableArray<LIRSlowPath*> slow_paths_;
+ std::vector<TrampolineCall> trampoline_calls_;
}; // Class Mir2Lir
} // namespace art
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 729b30d621..2bd2caaff5 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -278,4 +278,18 @@ void X86Mir2Lir::GenSpecialExitSequence() {
NewLIR0(kX86Ret);
}
+RegStorage X86Mir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
+ return RegStorage::InvalidReg();
+}
+
+LIR* X86Mir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+ bool use_link) {
+ LIR* call_inst = OpThreadMem(use_link ? kOpBlx : kOpBx, helper_offset);
+ if (safepoint_pc) {
+ MarkSafepointPC(call_inst);
+ }
+ return call_inst;
+}
+
+
} // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index af2a140296..e913d1d5a4 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -328,6 +328,11 @@ class X86Mir2Lir FINAL : public Mir2Lir {
*/
std::vector<uint8_t>* ReturnCallFrameInformation();
+ // Entrypoint calls.
+ RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
+ LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset,
+ bool safepoint_pc, bool use_link);
+
private:
void EmitPrefix(const X86EncodingMap* entry);
void EmitOpcode(const X86EncodingMap* entry);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index a241d51468..8bf3b0486a 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -24,6 +24,8 @@
#include <unistd.h>
#include <utility>
+#include "arch/arm/final_relocations_arm.h"
+#include "base/hex_dump.h"
#include "base/stl_util.h"
#include "base/timing_logger.h"
#include "class_linker.h"
@@ -506,6 +508,7 @@ void CompilerDriver::CompileAll(jobject class_loader,
UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
PreCompile(class_loader, dex_files, thread_pool.get(), timings);
Compile(class_loader, dex_files, thread_pool.get(), timings);
+ PostCompile();
if (dump_stats_) {
stats_->Dump();
}
@@ -617,6 +620,10 @@ void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const De
UpdateImageClasses(timings);
}
+void CompilerDriver::PostCompile() {
+ BuildEntrypointTrampolineCode();
+}
+
bool CompilerDriver::IsImageClass(const char* descriptor) const {
if (!IsImage()) {
return true;
@@ -1240,6 +1247,25 @@ bool CompilerDriver::IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc
return result;
}
+uint32_t CompilerDriver::AddEntrypointTrampoline(uint32_t entrypoint) {
+ return entrypoint_trampolines_.AddEntrypoint(Thread::Current(), entrypoint);
+}
+
+
+void CompilerDriver::BuildEntrypointTrampolineCode() {
+ const auto& table = entrypoint_trampolines_.GetTrampolineTable();
+ for (uint32_t offset : table) {
+ switch (instruction_set_) {
+ case kThumb2:
+ BuildArmEntrypointTrampolineCall(ThreadOffset<4>(offset));
+ break;
+ default:
+ UNIMPLEMENTED(FATAL) << "No entrypoint trampolines for this architecture";
+ }
+ }
+}
+
+
void CompilerDriver::AddCodePatch(const DexFile* dex_file,
uint16_t referrer_class_def_idx,
uint32_t referrer_method_idx,
@@ -2150,4 +2176,17 @@ bool CompilerDriver::SkipCompilation(const std::string& method_name) {
}
return !compile;
}
+
+FinalEntrypointRelocationSet* CompilerDriver::AllocateFinalEntrypointRelocationSet(
+ CompilationUnit* cu) const {
+ switch (instruction_set_) {
+ case kArm:
+ case kThumb2:
+ return new FinalEntrypointRelocationSetArm(this);
+ default:
+ UNIMPLEMENTED(FATAL) << "Cannot allocate FinalEntrypointRelocationSet for non-ARM";
+ return nullptr;
+ }
+}
+
} // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 802f859da4..6df5d0c09f 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -634,6 +634,112 @@ class CompilerDriver {
// Should the compiler run on this method given profile information?
bool SkipCompilation(const std::string& method_name);
+ // Entrypoint trampolines.
+ //
+ // The idea here is that we can save code size by collecting the branches
+ // to the entrypoints (helper functions called by the generated code) into a
+ // table and then branching relative to that table from the code. On ARM 32 this
+ // will save 2 bytes per call. Only the entrypoints used by the program (the whole
+ // program - these are global) are in this table and are in no particular order.
+ //
+ // The trampolines will be placed right at the start of the .text section in the file
+ // and will consist of a table of instructions, each of which will branch relative to
+ // the thread register (r9 on ARM) to an entrypoint. On ARM this would look like:
+ //
+ // trampolines:
+ // 1: ldr pc, [r9, #40]
+ // 2: ldr pc, [r9, #8]
+ // ...
+ //
+ // Then a call to an entrypoint would be an immediate BL instruction to the appropriate
+ // label (1 or 2 in the above example). Because the entrypoint table has the lower bit
+ // of the address already set, the ldr pc will switch from ARM to Thumb for the entrypoint as
+ // necessary.
+ //
+ // On ARM, the range of a BL instruction is +-32M to this is more than enough for an
+ // immediate BL instruction in the generated code.
+ //
+ // The actual address of the trampoline for a particular entrypoint is not known until
+ // the OAT file is written and we know the addresses of all the branch instructions in
+ // the program. At this point we can rewrite the BL instruction to have the correct relative
+ // offset.
+ class EntrypointTrampolines {
+ public:
+ EntrypointTrampolines() : current_offset_(0), lock_("Entrypoint Trampolines") {}
+ ~EntrypointTrampolines() {}
+
+ // Add a trampoline and return the offset added. If it already exists
+ // return the offset it was added at previously.
+ uint32_t AddEntrypoint(Thread* self, uint32_t ep) LOCKS_EXCLUDED(lock_) {
+ MutexLock mu(self, lock_);
+ Trampolines::iterator tramp = trampolines_.find(ep);
+ if (tramp == trampolines_.end()) {
+ trampolines_[ep] = current_offset_;
+ trampoline_table_.push_back(ep);
+ LOG(DEBUG) << "adding new trampoline for " << ep << " at offset " << current_offset_;
+ return current_offset_++;
+ } else {
+ return tramp->second;
+ }
+ }
+
+ const std::vector<uint32_t>& GetTrampolineTable() const {
+ return trampoline_table_;
+ }
+
+ uint32_t GetTrampolineTableSize() const {
+ return current_offset_;
+ }
+
+ private:
+ uint32_t current_offset_;
+ // Mapping of entrypoint offset vs offset into trampoline table.
+ typedef std::map<uint32_t, uint32_t> Trampolines;
+ Trampolines trampolines_ GUARDED_BY(lock_);
+
+ // Table of all registered offsets in order of registration.
+ std::vector<uint32_t> trampoline_table_;
+ Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+ };
+
+ uint32_t AddEntrypointTrampoline(uint32_t entrypoint);
+
+ const std::vector<uint32_t>& GetEntrypointTrampolineTable() const {
+ return entrypoint_trampolines_.GetTrampolineTable();
+ }
+
+ uint32_t GetEntrypointTrampolineTableSize() const {
+ uint32_t size = entrypoint_trampolines_.GetTrampolineTableSize();
+ if (instruction_set_ == kThumb2) {
+ return size * 4;
+ }
+ return size;
+ }
+
+ // Get the maximum offset between entrypoint trampoline islands. Different architectures
+ // have limitations on the max offset for a call instruction. This function is used
+ // to determine when we need to generate a new trampoline island in the output to keep
+ // subsequent calls in range.
+ size_t GetMaxEntrypointTrampolineOffset() const {
+ if (instruction_set_ == kThumb2) {
+ // On Thumb2, the max range of a BL instruction is 16MB. Give it a little wiggle room.
+ return 15*MB;
+ }
+ // Returning 0 means we won't generate a trampoline island.
+ return 0;
+ }
+
+ void BuildEntrypointTrampolineCode();
+
+ // Architecture specific Entrypoint trampoline builder.
+ void BuildArmEntrypointTrampolineCall(ThreadOffset<4> offset);
+
+ const std::vector<uint8_t>& GetEntrypointTrampolineTableCode() const {
+ return entrypoint_trampoline_code_;
+ }
+
+ FinalEntrypointRelocationSet* AllocateFinalEntrypointRelocationSet(CompilationUnit* cu) const;
+
private:
// These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
// The only external contract is that unresolved method has flags 0 and resolved non-0.
@@ -671,6 +777,7 @@ class CompilerDriver {
LOCKS_EXCLUDED(Locks::mutator_lock_);
void LoadImageClasses(TimingLogger* timings);
+ void PostCompile() LOCKS_EXCLUDED(Locks::mutator_lock_);
// Attempt to resolve all type, methods, fields, and strings
// referenced from code in the dex file following PathClassLoader
@@ -831,6 +938,10 @@ class CompilerDriver {
DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_gc_map_;
DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_cfi_info_;
+ EntrypointTrampolines entrypoint_trampolines_;
+
+ std::vector<uint8_t> entrypoint_trampoline_code_;
+
DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
};
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 20c6bc8e4e..52248a6c5a 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -50,7 +50,8 @@ class CompilerOptions {
small_method_threshold_(kDefaultSmallMethodThreshold),
tiny_method_threshold_(kDefaultTinyMethodThreshold),
num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
- generate_gdb_information_(false)
+ generate_gdb_information_(false),
+ generate_helper_trampolines_(false)
#ifdef ART_SEA_IR_MODE
, sea_ir_mode_(false)
#endif
@@ -62,7 +63,8 @@ class CompilerOptions {
size_t small_method_threshold,
size_t tiny_method_threshold,
size_t num_dex_methods_threshold,
- bool generate_gdb_information
+ bool generate_gdb_information,
+ bool generate_helper_trampolines
#ifdef ART_SEA_IR_MODE
, bool sea_ir_mode
#endif
@@ -73,7 +75,8 @@ class CompilerOptions {
small_method_threshold_(small_method_threshold),
tiny_method_threshold_(tiny_method_threshold),
num_dex_methods_threshold_(num_dex_methods_threshold),
- generate_gdb_information_(generate_gdb_information)
+ generate_gdb_information_(generate_gdb_information),
+ generate_helper_trampolines_(generate_helper_trampolines)
#ifdef ART_SEA_IR_MODE
, sea_ir_mode_(sea_ir_mode)
#endif
@@ -140,6 +143,10 @@ class CompilerOptions {
return generate_gdb_information_;
}
+ bool GenerateHelperTrampolines() const {
+ return generate_helper_trampolines_;
+ }
+
private:
CompilerFilter compiler_filter_;
size_t huge_method_threshold_;
@@ -148,6 +155,7 @@ class CompilerOptions {
size_t tiny_method_threshold_;
size_t num_dex_methods_threshold_;
bool generate_gdb_information_;
+ bool generate_helper_trampolines_;
#ifdef ART_SEA_IR_MODE
bool sea_ir_mode_;
diff --git a/compiler/final_relocations.cc b/compiler/final_relocations.cc
new file mode 100644
index 0000000000..089214c6b6
--- /dev/null
+++ b/compiler/final_relocations.cc
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "final_relocations.h"
+
+namespace art {
+
+class OatWriter;
+
+void FinalRelocations::Apply(uint8_t* code, const OatWriter* writer, uint32_t address) const {
+ for (const auto& s : *this) {
+ s->Apply(code, writer, address);
+ }
+}
+
+void FinalEntrypointRelocationSet::Add(uint32_t offset, uint32_t entrypoint_offset) {
+ relocations_.push_back(Relocation(kRelocationCall, offset, entrypoint_offset));
+}
+} // namespace art
diff --git a/compiler/final_relocations.h b/compiler/final_relocations.h
new file mode 100644
index 0000000000..6478663c40
--- /dev/null
+++ b/compiler/final_relocations.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_FINAL_RELOCATIONS_H_
+#define ART_COMPILER_FINAL_RELOCATIONS_H_
+
+#include <string>
+#include <vector>
+#include "base/macros.h"
+
+namespace art {
+
+class CompilerDriver;
+class OatWriter;
+
+enum FinalRelocationType {
+ kRelocationCall // Relocation of a call instruction.
+};
+
+// This is a set of relocations that is performed when the code is finally
+// written to the output file. This is when we know all the offsets and
+// can patch the binary instructions with known PC relative addresses.
+//
+// This is an abstract class that can be used for sets of relocations of different
+// types. For example, one type of relocation set is the relocation of calls
+// to entrypoint trampoline islands. Another type could be intra-app direct
+// method calls. The 'Apply' function is virtual and is implemented by
+// each concrete subclass.
+class FinalRelocationSet {
+ public:
+ explicit FinalRelocationSet(const CompilerDriver* driver) : driver_(driver) {}
+ virtual ~FinalRelocationSet() {}
+
+ void AddRelocation(FinalRelocationType type, uint32_t offset, uintptr_t value) {
+ relocations_.push_back(Relocation(type, offset, value));
+ }
+
+ // Apply this relocation set to the given code.
+ virtual void Apply(uint8_t* code, const OatWriter* writer, uint32_t address) const = 0;
+
+ protected:
+ struct Relocation {
+ Relocation()=delete;
+ Relocation(FinalRelocationType type, uint32_t offset, uintptr_t value) :
+ type_(type), code_offset_(offset), value_(value) {}
+ FinalRelocationType type_;
+ uint32_t code_offset_;
+ uintptr_t value_;
+ };
+
+ const CompilerDriver* const driver_;
+ typedef std::vector<Relocation> Relocations;
+ Relocations relocations_;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(FinalRelocationSet);
+};
+
+/* abstract. Implemented by architecture-specific subclasses */
+class FinalEntrypointRelocationSet : public FinalRelocationSet {
+ public:
+ explicit FinalEntrypointRelocationSet(const CompilerDriver* driver) : FinalRelocationSet(driver) {
+ }
+ ~FinalEntrypointRelocationSet() {}
+
+ void Add(uint32_t offset, uint32_t entrypoint_offset);
+};
+
+// Holder class for a set of final relocations.
+class FinalRelocations : public std::vector<const FinalRelocationSet*> {
+ public:
+ void Apply(uint8_t* code, const OatWriter* writer, uint32_t address) const;
+};
+} // namespace art
+
+#endif // ART_COMPILER_FINAL_RELOCATIONS_H_
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 2d45a2f65f..4d843a5a37 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -80,7 +80,8 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
size_oat_class_type_(0),
size_oat_class_status_(0),
size_oat_class_method_bitmaps_(0),
- size_oat_class_method_offsets_(0) {
+ size_oat_class_method_offsets_(0),
+ current_trampoline_island_offset_(0xffffffff) {
size_t offset;
{
TimingLogger::ScopedSplit split("InitOatHeader", timings);
@@ -377,15 +378,28 @@ size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index,
CHECK(quick_code != nullptr);
offset = compiled_method->AlignCode(offset);
DCHECK_ALIGNED(offset, kArmAlignment);
+
+ uint32_t thumb_offset = compiled_method->CodeDelta();
+ uint32_t tramp_offset = offset + thumb_offset;
+
+ // Allocate a trampoline island if we need to.
+ uint32_t trampsize = AllocateTrampolineIslandIfNecessary(tramp_offset);
+ if (trampsize > 0) {
+ offset += trampsize; // Account for island in current offset.
+
+ // Realign code after trampoline island.
+ offset = compiled_method->AlignCode(offset);
+ DCHECK_ALIGNED(offset, kArmAlignment);
+ }
+
uint32_t code_size = quick_code->size() * sizeof(uint8_t);
CHECK_NE(code_size, 0U);
- uint32_t thumb_offset = compiled_method->CodeDelta();
quick_code_offset = offset + sizeof(code_size) + thumb_offset;
std::vector<uint8_t>* cfi_info = compiler_driver_->GetCallFrameInformation();
if (cfi_info != nullptr) {
- // Copy in the FDE, if present
- const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo();
+ // Copy in the FDE, if present
+ const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo();
if (fde != nullptr) {
// Copy the information into cfi_info and then fix the address in the new copy.
int cur_offset = cfi_info->size();
@@ -403,6 +417,7 @@ size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index,
}
}
+
// Deduplicate code arrays
SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator code_iter =
code_offsets_.find(quick_code);
@@ -413,8 +428,13 @@ size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index,
offset += sizeof(code_size); // code size is prepended before code
offset += code_size;
oat_header_->UpdateChecksum(&(*quick_code)[0], code_size);
+
+ // Apply the final relocations to the code now that we
+ // know the offset.
+ compiled_method->ApplyFinalRelocations(this, quick_code_offset);
}
}
+
frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
core_spill_mask = compiled_method->GetCoreSpillMask();
fp_spill_mask = compiled_method->GetFpSpillMask();
@@ -827,11 +847,39 @@ size_t OatWriter::WriteCodeMethod(OutputStream* out, const size_t file_offset,
DCHECK_OFFSET();
}
DCHECK_ALIGNED(relative_offset, kArmAlignment);
+
+ // Write out a trampoline island if there is one at this point.
+ uint32_t trampsize = WriteTrampolineIslandIfNecessary(out, relative_offset +
+ compiled_method->CodeDelta());
+ if (trampsize > 0) {
+ relative_offset += trampsize;
+ size_code_ += trampsize;
+ DCHECK_OFFSET();
+
+ // Need to realign the code again after the island.
+ uint32_t aligned_offset = compiled_method->AlignCode(relative_offset);
+ uint32_t aligned_code_delta = aligned_offset - relative_offset;
+ if (aligned_code_delta != 0) {
+ off_t new_offset = out->Seek(aligned_code_delta, kSeekCurrent);
+ size_code_alignment_ += aligned_code_delta;
+ uint32_t expected_offset = file_offset + aligned_offset;
+ if (static_cast<uint32_t>(new_offset) != expected_offset) {
+ PLOG(ERROR) << "Failed to seek to align oat code. Actual: " << new_offset
+ << " Expected: " << expected_offset << " File: " << out->GetLocation();
+ return 0;
+ }
+ relative_offset += aligned_code_delta;
+ DCHECK_OFFSET();
+ }
+ DCHECK_ALIGNED(relative_offset, kArmAlignment);
+ }
+
uint32_t code_size = quick_code->size() * sizeof(uint8_t);
CHECK_NE(code_size, 0U);
// Deduplicate code arrays
size_t code_offset = relative_offset + sizeof(code_size) + compiled_method->CodeDelta();
+
SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator code_iter =
code_offsets_.find(quick_code);
if (code_iter != code_offsets_.end() && code_offset != method_offsets.code_offset_) {
@@ -1121,4 +1169,36 @@ bool OatWriter::OatClass::Write(OatWriter* oat_writer,
return true;
}
+// Allocate a trampoline island if we need to.
+uint32_t OatWriter::AllocateTrampolineIslandIfNecessary(uint32_t offset) {
+ size_t max_offset = compiler_driver_->GetMaxEntrypointTrampolineOffset();
+ if (max_offset == 0) {
+ // Compiler driver says we don't need trampoline islands.
+ return 0;
+ }
+ uint32_t next_trampoline = current_trampoline_island_offset_ + max_offset;
+ if (current_trampoline_island_offset_ == 0xffffffff || offset >= next_trampoline) {
+ LOG(DEBUG) << "Need trampoline island at offset " << std::hex << offset;
+ uint32_t size = compiler_driver_->GetEntrypointTrampolineTableSize();
+ trampoline_island_offsets_.push_back(offset);
+ current_trampoline_island_offset_ = offset;
+ return size;
+ }
+ return 0;
+}
+
+uint32_t OatWriter::WriteTrampolineIslandIfNecessary(OutputStream* out, uint32_t offset) {
+ for (size_t i = 0; i < trampoline_island_offsets_.size(); ++i) {
+ if (trampoline_island_offsets_[i] == offset) {
+ uint32_t size = compiler_driver_->GetEntrypointTrampolineTableSize();
+ LOG(DEBUG) << "Writing trampoline island at offset " << std::hex << offset << " size: "
+ << std::dec << size;
+ const std::vector<uint8_t>& code = compiler_driver_->GetEntrypointTrampolineTableCode();
+ out->WriteFully(&code[0], size);
+ return size;
+ }
+ }
+ return 0;
+}
+
} // namespace art
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index bab1a26d44..2840cbfe66 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -95,6 +95,10 @@ class OatWriter {
return method_info_;
}
+ uint32_t GetCurrentTrampolineIslandOffset() const {
+ return current_trampoline_island_offset_;
+ }
+
private:
size_t InitOatHeader();
size_t InitOatDexFiles(size_t offset);
@@ -134,6 +138,9 @@ class OatWriter {
void ReportWriteFailure(const char* what, uint32_t method_idx, const DexFile& dex_file,
const OutputStream& out) const;
+ uint32_t AllocateTrampolineIslandIfNecessary(uint32_t offset);
+ uint32_t WriteTrampolineIslandIfNecessary(OutputStream* out, uint32_t offset);
+
class OatDexFile {
public:
explicit OatDexFile(size_t offset, const DexFile& dex_file);
@@ -288,6 +295,21 @@ class OatWriter {
SafeMap<const std::vector<uint8_t>*, uint32_t> mapping_table_offsets_;
SafeMap<const std::vector<uint8_t>*, uint32_t> gc_map_offsets_;
+ // The trampoline islands. These are sequences of code inserted between methods
+ // in the output. They contain jumps to other addresses and are accessed
+ // by direct calls in the method code. Due to the range of call instructions
+ // on certain architectures, we need to be able to put down multiple islands that
+ // are in range of the call instructions. On ARM this is done every 15MB (the call range
+ // on Thumb2 is 16MB). At any point in the output we have a current island that is
+ // guaranteed to be in range. This is held in the 'current_trampoline_island_offset'
+ // variable (an offset into the instruction stream).
+ //
+ // The vector 'trampoline_island_offsets' contains the offsets of the all the
+ // islands we have generated. This is used when performing the write of the file.
+
+ uint32_t current_trampoline_island_offset_; // Current island offset.
+ std::vector<uint32_t> trampoline_island_offsets_; // All the island offsets.
+
DISALLOW_COPY_AND_ASSIGN(OatWriter);
};
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d19c40c291..f48e9d04cd 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -109,6 +109,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(CompilerDriver& driver,
mapping_table,
vmap_table,
gc_map,
+ nullptr,
nullptr);
}