summaryrefslogtreecommitdiffstats
path: root/compiler
diff options
context:
space:
mode:
authorDave Allison <dallison@google.com>2014-02-19 14:05:39 -0800
committerDave Allison <dallison@google.com>2014-04-04 16:07:46 -0700
commit754ddad084ccb610d0cf486f6131bdc69bae5bc6 (patch)
tree18d8314f3f6760b035c2bcda7760782ad4f0e0bf /compiler
parent97a332b4476d5a2b4ad0650dacc6bfcff882fc57 (diff)
downloadart-754ddad084ccb610d0cf486f6131bdc69bae5bc6.tar.gz
art-754ddad084ccb610d0cf486f6131bdc69bae5bc6.tar.bz2
art-754ddad084ccb610d0cf486f6131bdc69bae5bc6.zip
Use trampolines for calls to helpers
This is an ARM specific optimization to the compiler that uses trampoline islands to make calls to runtime helper functions. The intention is to reduce the size of the generated code (by 2 bytes per call) without affecting performance. By default this is on when generating an OAT file. It is off when compiling to memory. To switch this off in dex2oat, use the command line option: --no-helper-trampolines Enhances disassembler to print the trampoline entry on the BL instruction like this: 0xb6a850c0: f7ffff9e bl -196 (0xb6a85000) ; pTestSuspend Bug: 12607709 Change-Id: I9202bdb7cf21252ad807bd48701f1f6ce8e3d0fe
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.mk3
-rw-r--r--compiler/arch/arm/entrypoint_trampoline.cc33
-rw-r--r--compiler/arch/arm/final_relocations_arm.cc88
-rw-r--r--compiler/arch/arm/final_relocations_arm.h39
-rw-r--r--compiler/compiled_method.cc17
-rw-r--r--compiler/compiled_method.h19
-rw-r--r--compiler/dex/compiler_enums.h2
-rw-r--r--compiler/dex/quick/arm/arm_lir.h1
-rw-r--r--compiler/dex/quick/arm/assemble_arm.cc19
-rw-r--r--compiler/dex/quick/arm/call_arm.cc31
-rw-r--r--compiler/dex/quick/arm/codegen_arm.h5
-rw-r--r--compiler/dex/quick/arm/utility_arm.cc10
-rw-r--r--compiler/dex/quick/codegen_util.cc17
-rw-r--r--compiler/dex/quick/gen_common.cc19
-rw-r--r--compiler/dex/quick/gen_invoke.cc17
-rw-r--r--compiler/dex/quick/mir_to_lir.h19
-rw-r--r--compiler/dex/quick/x86/call_x86.cc14
-rw-r--r--compiler/dex/quick/x86/codegen_x86.h5
-rw-r--r--compiler/driver/compiler_driver.cc39
-rw-r--r--compiler/driver/compiler_driver.h111
-rw-r--r--compiler/driver/compiler_options.h14
-rw-r--r--compiler/final_relocations.cc32
-rw-r--r--compiler/final_relocations.h89
-rw-r--r--compiler/oat_writer.cc88
-rw-r--r--compiler/oat_writer.h22
-rw-r--r--compiler/optimizing/optimizing_compiler.cc1
26 files changed, 715 insertions, 39 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index b17cd52fad..29d9cc6c1d 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -19,6 +19,8 @@ LOCAL_PATH := $(call my-dir)
include art/build/Android.common.mk
LIBART_COMPILER_SRC_FILES := \
+ arch/arm/final_relocations_arm.cc \
+ arch/arm/entrypoint_trampoline.cc \
compiled_method.cc \
dex/local_value_numbering.cc \
dex/quick/arm/assemble_arm.cc \
@@ -65,6 +67,7 @@ LIBART_COMPILER_SRC_FILES := \
dex/ssa_transformation.cc \
driver/compiler_driver.cc \
driver/dex_compilation_unit.cc \
+ final_relocations.cc \
jni/quick/arm/calling_convention_arm.cc \
jni/quick/arm64/calling_convention_arm64.cc \
jni/quick/mips/calling_convention_mips.cc \
diff --git a/compiler/arch/arm/entrypoint_trampoline.cc b/compiler/arch/arm/entrypoint_trampoline.cc
new file mode 100644
index 0000000000..9ff9961f8d
--- /dev/null
+++ b/compiler/arch/arm/entrypoint_trampoline.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "driver/compiler_driver.h"
+
+namespace art {
+ void CompilerDriver::BuildArmEntrypointTrampolineCall(ThreadOffset<4> thread_offset) {
+ // Thumb2 instruction encoding of:
+ // ldr pc,[r9,#offset]
+
+ // TODO: we don't currently have a Thumb2 assembler, when we do use that
+ // in preference to the hand generated code below.
+ uint32_t offset = thread_offset.Uint32Value();
+ uint32_t instruction = 0xf8d0f000 | (9 << 16) | (offset & 0xfff);
+ entrypoint_trampoline_code_.push_back((instruction >> 16) & 0xff);
+ entrypoint_trampoline_code_.push_back((instruction >> 24) & 0xff);
+ entrypoint_trampoline_code_.push_back((instruction >> 0) & 0xff);
+ entrypoint_trampoline_code_.push_back((instruction >> 8) & 0xff);
+ }
+} // namespace art
diff --git a/compiler/arch/arm/final_relocations_arm.cc b/compiler/arch/arm/final_relocations_arm.cc
new file mode 100644
index 0000000000..e95f3e2e43
--- /dev/null
+++ b/compiler/arch/arm/final_relocations_arm.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arch/arm/final_relocations_arm.h"
+#include "compiled_method.h"
+#include "driver/compiler_driver.h"
+#include "oat_writer.h"
+#include "dex/compiler_ir.h"
+
+namespace art {
+
+void FinalEntrypointRelocationSetArm::Apply(uint8_t* code, const OatWriter* writer,
+ uint32_t address) const {
+ uint32_t island_offset = writer->GetCurrentTrampolineIslandOffset();
+ const bool kDebugPrint = false;
+
+ for (auto& reloc : relocations_) {
+ switch (reloc.type_) {
+ case kRelocationCall: {
+ // Fetch the instruction. This is two 16 bit words. We can't do a 32 bit load
+ // because it's not guaranteed to be 4-byte aligned.
+ uint32_t inst = static_cast<uint32_t>(
+ *reinterpret_cast<uint16_t*>(code + reloc.code_offset_) << 16
+ | *reinterpret_cast<uint16_t*>(code + reloc.code_offset_ + 2));
+
+ // Check that we are trying to relocate a Thumb2 BL instruction.
+ CHECK_EQ(inst, 0xf000d000);
+
+ uint32_t pc = address + reloc.code_offset_ + 4; // Thumb PC is instruction + 4
+
+ // The trampoline target is to a table starting at the island. Each trampoline
+ // entry is 4 bytes long.
+ uint32_t target = island_offset + static_cast<uint32_t>(reloc.value_) * 4;
+ int32_t delta = target - pc;
+
+
+ if (kDebugPrint) {
+ LOG(INFO) << "applying final relocation for island " << island_offset;
+ LOG(INFO) << "pc: " << std::hex << pc << ", target: " << target <<
+ ", reloc.value: " << reloc.value_ << ", delta: " << delta;
+ }
+
+ // Max range for a Thumb2 BL is 16MB. All calls will be to a lower address.
+ const int32_t kMaxRange = -16 * static_cast<int32_t>(MB);
+ CHECK_LT(delta, 0);
+ CHECK_GT(delta, kMaxRange);
+
+ // Modify the instruction using the T1 BL instruction format.
+ // This is equivalent of a R_ARM_THM_CALL ELF relocation.
+ delta >>= 1; // Low bit is implicit.
+ uint32_t signbit = (delta >> 31) & 0x1;
+ uint32_t i1 = (delta >> 22) & 0x1;
+ uint32_t i2 = (delta >> 21) & 0x1;
+ uint32_t imm10 = (delta >> 11) & 0x03ff;
+ uint32_t imm11 = delta & 0x07ff;
+ uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
+ uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
+ uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
+ imm11;
+ inst |= value;
+
+ // Write the instruction back. High 16 bits first, little endian format.
+ uint32_t offset = reloc.code_offset_;
+ code[offset+0] = (inst >> 16) & 0xff;
+ code[offset+1] = (inst >> 24) & 0xff;
+ code[offset+2] = (inst >> 0) & 0xff;
+ code[offset+3] = (inst >> 8) & 0xff;
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unknown entrypoint relocation type " << reloc.type_;
+ }
+ }
+}
+} // namespace art
diff --git a/compiler/arch/arm/final_relocations_arm.h b/compiler/arch/arm/final_relocations_arm.h
new file mode 100644
index 0000000000..656c181084
--- /dev/null
+++ b/compiler/arch/arm/final_relocations_arm.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_ARCH_ARM_FINAL_RELOCATIONS_ARM_H_
+#define ART_COMPILER_ARCH_ARM_FINAL_RELOCATIONS_ARM_H_
+
+// ARM final relocations
+
+#include "final_relocations.h"
+
+namespace art {
+
+class CompilerDriver;
+struct CompilationUnit;
+
+class FinalEntrypointRelocationSetArm : public FinalEntrypointRelocationSet {
+ public:
+ explicit FinalEntrypointRelocationSetArm(const CompilerDriver* driver) : FinalEntrypointRelocationSet(driver) {}
+ ~FinalEntrypointRelocationSetArm() {}
+
+ void Apply(uint8_t* code, const OatWriter* writer, uint32_t address) const;
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_ARCH_ARM_FINAL_RELOCATIONS_ARM_H_
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 8e013c1ece..4eb1f7a7ce 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -16,20 +16,23 @@
#include "compiled_method.h"
#include "driver/compiler_driver.h"
+#include "oat_writer.h"
+#include "dex/compiler_ir.h"
namespace art {
CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
- const std::vector<uint8_t>& quick_code)
+ const std::vector<uint8_t>& quick_code,
+ const FinalRelocations* relocs)
: compiler_driver_(compiler_driver), instruction_set_(instruction_set),
- portable_code_(nullptr), quick_code_(nullptr) {
+ portable_code_(nullptr), quick_code_(nullptr), final_relocations_(relocs) {
SetCode(&quick_code, nullptr);
}
CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
const std::string& elf_object, const std::string& symbol)
: compiler_driver_(compiler_driver), instruction_set_(instruction_set),
- portable_code_(nullptr), quick_code_(nullptr), symbol_(symbol) {
+ portable_code_(nullptr), quick_code_(nullptr), symbol_(symbol), final_relocations_(nullptr) {
CHECK_NE(elf_object.size(), 0U);
CHECK_NE(symbol.size(), 0U);
std::vector<uint8_t> temp_code(elf_object.size());
@@ -161,8 +164,9 @@ CompiledMethod::CompiledMethod(CompilerDriver& driver,
const std::vector<uint8_t>& mapping_table,
const std::vector<uint8_t>& vmap_table,
const std::vector<uint8_t>& native_gc_map,
- const std::vector<uint8_t>* cfi_info)
- : CompiledCode(&driver, instruction_set, quick_code), frame_size_in_bytes_(frame_size_in_bytes),
+ const std::vector<uint8_t>* cfi_info,
+ const FinalRelocations* relocs)
+ : CompiledCode(&driver, instruction_set, quick_code, relocs), frame_size_in_bytes_(frame_size_in_bytes),
core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
mapping_table_(driver.DeduplicateMappingTable(mapping_table)),
vmap_table_(driver.DeduplicateVMapTable(vmap_table)),
@@ -176,7 +180,7 @@ CompiledMethod::CompiledMethod(CompilerDriver& driver,
const size_t frame_size_in_bytes,
const uint32_t core_spill_mask,
const uint32_t fp_spill_mask)
- : CompiledCode(&driver, instruction_set, code),
+ : CompiledCode(&driver, instruction_set, code, nullptr),
frame_size_in_bytes_(frame_size_in_bytes),
core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
mapping_table_(driver.DeduplicateMappingTable(std::vector<uint8_t>())),
@@ -205,5 +209,4 @@ CompiledMethod::CompiledMethod(CompilerDriver& driver, InstructionSet instructio
vmap_table_ = driver.DeduplicateVMapTable(std::vector<uint8_t>());
gc_map_ = driver.DeduplicateGCMap(std::vector<uint8_t>());
}
-
} // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 90ae6eeae8..d9edc6bd55 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -23,6 +23,7 @@
#include "instruction_set.h"
#include "utils.h"
#include "UniquePtr.h"
+#include "final_relocations.h"
namespace llvm {
class Function;
@@ -31,12 +32,14 @@ namespace llvm {
namespace art {
class CompilerDriver;
+class OatWriter;
class CompiledCode {
public:
// For Quick to supply an code blob
CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
- const std::vector<uint8_t>& quick_code);
+ const std::vector<uint8_t>& quick_code,
+ const FinalRelocations* relocations);
// For Portable to supply an ELF object
CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
@@ -78,6 +81,13 @@ class CompiledCode {
const std::vector<uint32_t>& GetOatdataOffsetsToCompliledCodeOffset() const;
void AddOatdataOffsetToCompliledCodeOffset(uint32_t offset);
+ // Apply all the final relocations to the quick code sequence.
+ void ApplyFinalRelocations(const OatWriter *writer, uint32_t address) {
+ if (final_relocations_.get() != nullptr) {
+ final_relocations_->Apply(&(*quick_code_)[0], writer, address);
+ }
+ }
+
private:
CompilerDriver* const compiler_driver_;
@@ -97,6 +107,10 @@ class CompiledCode {
// OatWriter and then used by the ElfWriter to add relocations so
// that MCLinker can update the values to the location in the linked .so.
std::vector<uint32_t> oatdata_offsets_to_compiled_code_offset_;
+
+ // Set of relocations to apply as the final pass. This happens
+ // only when the the final oat file addresses are known.
+ UniquePtr<const FinalRelocations> final_relocations_;
};
class CompiledMethod : public CompiledCode {
@@ -111,7 +125,8 @@ class CompiledMethod : public CompiledCode {
const std::vector<uint8_t>& mapping_table,
const std::vector<uint8_t>& vmap_table,
const std::vector<uint8_t>& native_gc_map,
- const std::vector<uint8_t>* cfi_info);
+ const std::vector<uint8_t>* cfi_info,
+ const FinalRelocations* relocations);
// Constructs a CompiledMethod for the QuickJniCompiler.
CompiledMethod(CompilerDriver& driver,
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 6c8c85d16d..a3ce4f9c54 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -430,10 +430,12 @@ enum FixupKind {
kFixupT2Branch, // Thumb2 Unconditional branch
kFixupBlx1, // Blx1 (start of Blx1/Blx2 pair).
kFixupBl1, // Bl1 (start of Bl1/Bl2 pair).
+ kFixup2Bl1, // Thumb2 Bl1 (start of Bl1/Bl2 pair).
kFixupAdr, // Adr.
kFixupMovImmLST, // kThumb2MovImm16LST.
kFixupMovImmHST, // kThumb2MovImm16HST.
kFixupAlign4, // Align to 4-byte boundary.
+ kFixupTrampCall, // Call into trampoline for runtime helper.
};
std::ostream& operator<<(std::ostream& os, const FixupKind& kind);
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index c9acd66bba..8c5c6c5fca 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -496,6 +496,7 @@ enum ArmOpcode {
kThumb2LdrdPcRel8, // ldrd rt, rt2, pc +-/1024.
kThumb2LdrdI8, // ldrd rt, rt2, [rn +-/1024].
kThumb2StrdI8, // strd rt, rt2, [rn +-/1024].
+ kThumb2BlTramp, // Thumb2 BL to trampoline
kArmLast,
};
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index f77b0a6302..151f3c7b33 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1035,6 +1035,11 @@ const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = {
kFmtBitBlt, 7, 0,
IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE,
"strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
+ ENCODING_MAP(kThumb2BlTramp, 0xf000d000,
+ kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1,
+ IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+ "bl", "!0t", 4, kFixupTrampCall),
};
// new_lir replaces orig_lir in the pcrel_fixup list.
@@ -1224,6 +1229,7 @@ void ArmMir2Lir::AssembleLIR() {
while (true) {
offset_adjustment = 0;
AssemblerStatus res = kSuccess; // Assume success
+
generation ^= 1;
// Note: nodes requring possible fixup linked in ascending order.
lir = first_fixup_;
@@ -1577,6 +1583,17 @@ void ArmMir2Lir::AssembleLIR() {
}
break;
}
+ case kFixupTrampCall: {
+ // This is a call to a trampoline. The value for the trampoline call needs
+ // both the offset into the code and the trampoline to call. It will be
+ // added to the list of calls when we actually insert this instruction into
+ // the code_buffer (when we have a stable instruction stream).
+ uint32_t instoffset = lir->offset;
+ // LOG(INFO) << "adding trampoline call: offset: " << instoffset <<
+ // " entrypoint: " << lir->operands[0];
+ trampoline_calls_.push_back(TrampolineCall(instoffset, lir->operands[0]));
+ break;
+ }
default:
LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
}
@@ -1595,6 +1612,7 @@ void ArmMir2Lir::AssembleLIR() {
starting_offset += offset_adjustment;
data_offset_ = (starting_offset + 0x3) & ~0x3;
AssignDataOffsets();
+ trampoline_calls_.clear(); // These are invalid now.
}
}
@@ -1675,5 +1693,4 @@ void ArmMir2Lir::AssignDataOffsets() {
total_size_ = AssignFillArrayDataOffset(offset);
}
-
} // namespace art
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index d0d0e6b3a7..1b485a3d34 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -19,6 +19,7 @@
#include "arm_lir.h"
#include "codegen_arm.h"
#include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
namespace art {
@@ -468,4 +469,34 @@ void ArmMir2Lir::GenSpecialExitSequence() {
NewLIR1(kThumbBx, rARM_LR);
}
+// Entrypoint calls.
+RegStorage ArmMir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
+ const CompilerOptions& compiler_options = cu_->compiler_driver->GetCompilerOptions();
+ if (compiler_options.GenerateHelperTrampolines()) {
+ return RegStorage::InvalidReg();
+ } else {
+ return LoadHelper(helper_offset);
+ }
+}
+
+LIR* ArmMir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+ bool use_link) {
+ LIR* call_inst = nullptr;
+ if (use_link) {
+ const CompilerOptions& compiler_options = cu_->compiler_driver->GetCompilerOptions();
+ if (compiler_options.GenerateHelperTrampolines()) {
+ call_inst = OpThreadMem(kOpBlx, helper_offset);
+ } else {
+ call_inst = OpReg(kOpBlx, r_tgt);
+ FreeTemp(r_tgt);
+ }
+ } else {
+ call_inst = OpReg(kOpBx, r_tgt);
+ FreeTemp(r_tgt);
+ }
+ if (safepoint_pc) {
+ MarkSafepointPC(call_inst);
+ }
+ return call_inst;
+}
} // namespace art
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 13fa6353b0..3c0aa03d5b 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -195,6 +195,11 @@ class ArmMir2Lir FINAL : public Mir2Lir {
bool InexpensiveConstantLong(int64_t value);
bool InexpensiveConstantDouble(int64_t value);
+ // Entrypoint calls.
+ RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
+ LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset,
+ bool safepoint_pc, bool use_link);
+
private:
void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
ConditionCode ccode);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 70cbdd2e31..8e6d9a8ef8 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -1111,8 +1111,14 @@ LIR* ArmMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
}
LIR* ArmMir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
- LOG(FATAL) << "Unexpected use of OpThreadMem for Arm";
- return NULL;
+ if (op == kOpBlx) {
+ const uint32_t trampoline = cu_->compiler_driver->AddEntrypointTrampoline(
+ thread_offset.Int32Value());
+ return NewLIR1(kThumb2BlTramp, trampoline);
+ } else {
+ LOG(FATAL) << "Invalid opcode for ARM OpThreadMem on Arm";
+ return NULL;
+ }
}
LIR* ArmMir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 6e6b8f0a30..b163ef7e06 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1012,11 +1012,26 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() {
vmap_encoder.PushBackUnsigned(0u); // Size is 0.
}
+ // All relocations
+ UniquePtr<FinalRelocations> all_relocs(new FinalRelocations());
+
+ // Build the final relocations for this method.
+ if (trampoline_calls_.size() != 0) {
+ FinalEntrypointRelocationSet* ep_relocs =
+ cu_->compiler_driver->AllocateFinalEntrypointRelocationSet(cu_);
+ for (size_t i = 0 ; i < trampoline_calls_.size(); ++i) {
+ const TrampolineCall& call = trampoline_calls_[i];
+ ep_relocs->Add(call.code_offset_, call.trampoline_offset_);
+ }
+ all_relocs->push_back(ep_relocs);
+ }
+
UniquePtr<std::vector<uint8_t> > cfi_info(ReturnCallFrameInformation());
CompiledMethod* result =
new CompiledMethod(*cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
core_spill_mask_, fp_spill_mask_, encoded_mapping_table_,
- vmap_encoder.GetData(), native_gc_map_, cfi_info.get());
+ vmap_encoder.GetData(), native_gc_map_, cfi_info.get(),
+ all_relocs.release());
return result;
}
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index a3fb4201db..4e32931670 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -17,6 +17,7 @@
#include "dex/compiler_internals.h"
#include "dex/quick/arm/arm_lir.h"
#include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "mirror/array.h"
#include "mirror/object-inl.h"
@@ -950,12 +951,20 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) {
void Compile() {
GenerateTargetLabel();
- RegStorage r_tgt = m2l_->CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pResolveString));
+ const CompilerOptions& compiler_options =
+ m2l_->cu_->compiler_driver->GetCompilerOptions();
+ if (compiler_options.GenerateHelperTrampolines()) {
+ m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_);
+ m2l_->CallHelper(RegStorage::InvalidReg(), QUICK_ENTRYPOINT_OFFSET(4, pResolveString),
+ true);
+ } else {
+ RegStorage r_tgt = m2l_->CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pResolveString));
- m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_); // .eq
- LIR* call_inst = m2l_->OpReg(kOpBlx, r_tgt);
- m2l_->MarkSafepointPC(call_inst);
- m2l_->FreeTemp(r_tgt);
+ m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_);
+ LIR* call_inst = m2l_->OpReg(kOpBlx, r_tgt);
+ m2l_->MarkSafepointPC(call_inst);
+ m2l_->FreeTemp(r_tgt);
+ }
m2l_->OpUnconditionalBranch(cont_);
}
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 396a709994..fee15d7b3a 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -19,6 +19,7 @@
#include "dex/quick/dex_file_method_inliner.h"
#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "dex_file-inl.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "invoke_type.h"
#include "mirror/array.h"
@@ -62,25 +63,19 @@ void Mir2Lir::AddIntrinsicLaunchpad(CallInfo* info, LIR* branch, LIR* resume) {
/*
* To save scheduling time, helper calls are broken into two parts: generation of
- * the helper target address, and the actual call to the helper. Because x86
- * has a memory call operation, part 1 is a NOP for x86. For other targets,
- * load arguments between the two parts.
+ * the helper target address, and the actual call to the helper.
+ * These functions can be overridden by architecture specific codegen.
*/
RegStorage Mir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
- return (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) ? RegStorage::InvalidReg() : LoadHelper(helper_offset);
+ return LoadHelper(helper_offset);
}
/* NOTE: if r_tgt is a temp, it will be freed following use */
LIR* Mir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
bool use_link) {
- LIR* call_inst;
OpKind op = use_link ? kOpBlx : kOpBx;
- if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
- call_inst = OpThreadMem(op, helper_offset);
- } else {
- call_inst = OpReg(op, r_tgt);
- FreeTemp(r_tgt);
- }
+ LIR* call_inst = OpReg(op, r_tgt);
+ FreeTemp(r_tgt);
if (safepoint_pc) {
MarkSafepointPC(call_inst);
}
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 35f948e083..cecb01b768 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -328,6 +328,18 @@ class Mir2Lir : public Backend {
LIR* const cont_;
};
+ // This holds the data for a call to a trampoline. An instruction is making a call
+ // to something through a trampoline and this holds the offset into the code containing
+ // the instruction, and which trampoline offset to call.
+ struct TrampolineCall {
+ TrampolineCall(uint32_t code_offset, uint32_t trampoline_offset) : code_offset_(code_offset),
+ trampoline_offset_(trampoline_offset) {
+ }
+
+ uint32_t code_offset_; // Offset of instruction in method code stream (bytes).
+ uint32_t trampoline_offset_; // Which trampoline to call.
+ };
+
virtual ~Mir2Lir() {}
int32_t s4FromSwitchData(const void* switch_data) {
@@ -614,11 +626,11 @@ class Mir2Lir : public Backend {
virtual void GenConstWide(RegLocation rl_dest, int64_t value);
virtual void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
RegLocation rl_src1, RegLocation rl_src2);
+ virtual LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+ bool use_link = true);
+ virtual RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
// Shared by all targets - implemented in gen_invoke.cc.
- LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
- bool use_link = true);
- RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
void CallRuntimeHelperImm(ThreadOffset<4> helper_offset, int arg0, bool safepoint_pc);
void CallRuntimeHelperReg(ThreadOffset<4> helper_offset, RegStorage arg0, bool safepoint_pc);
void CallRuntimeHelperRegLocation(ThreadOffset<4> helper_offset, RegLocation arg0,
@@ -1277,6 +1289,7 @@ class Mir2Lir : public Backend {
LIR* last_lir_insn_;
GrowableArray<LIRSlowPath*> slow_paths_;
+ std::vector<TrampolineCall> trampoline_calls_;
}; // Class Mir2Lir
} // namespace art
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 729b30d621..2bd2caaff5 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -278,4 +278,18 @@ void X86Mir2Lir::GenSpecialExitSequence() {
NewLIR0(kX86Ret);
}
+RegStorage X86Mir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
+ return RegStorage::InvalidReg();
+}
+
+LIR* X86Mir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+ bool use_link) {
+ LIR* call_inst = OpThreadMem(use_link ? kOpBlx : kOpBx, helper_offset);
+ if (safepoint_pc) {
+ MarkSafepointPC(call_inst);
+ }
+ return call_inst;
+}
+
+
} // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index af2a140296..e913d1d5a4 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -328,6 +328,11 @@ class X86Mir2Lir FINAL : public Mir2Lir {
*/
std::vector<uint8_t>* ReturnCallFrameInformation();
+ // Entrypoint calls.
+ RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
+ LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset,
+ bool safepoint_pc, bool use_link);
+
private:
void EmitPrefix(const X86EncodingMap* entry);
void EmitOpcode(const X86EncodingMap* entry);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index a241d51468..8bf3b0486a 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -24,6 +24,8 @@
#include <unistd.h>
#include <utility>
+#include "arch/arm/final_relocations_arm.h"
+#include "base/hex_dump.h"
#include "base/stl_util.h"
#include "base/timing_logger.h"
#include "class_linker.h"
@@ -506,6 +508,7 @@ void CompilerDriver::CompileAll(jobject class_loader,
UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
PreCompile(class_loader, dex_files, thread_pool.get(), timings);
Compile(class_loader, dex_files, thread_pool.get(), timings);
+ PostCompile();
if (dump_stats_) {
stats_->Dump();
}
@@ -617,6 +620,10 @@ void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const De
UpdateImageClasses(timings);
}
+void CompilerDriver::PostCompile() {
+ BuildEntrypointTrampolineCode();
+}
+
bool CompilerDriver::IsImageClass(const char* descriptor) const {
if (!IsImage()) {
return true;
@@ -1240,6 +1247,25 @@ bool CompilerDriver::IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc
return result;
}
+uint32_t CompilerDriver::AddEntrypointTrampoline(uint32_t entrypoint) {
+ return entrypoint_trampolines_.AddEntrypoint(Thread::Current(), entrypoint);
+}
+
+
+void CompilerDriver::BuildEntrypointTrampolineCode() {
+ const auto& table = entrypoint_trampolines_.GetTrampolineTable();
+ for (uint32_t offset : table) {
+ switch (instruction_set_) {
+ case kThumb2:
+ BuildArmEntrypointTrampolineCall(ThreadOffset<4>(offset));
+ break;
+ default:
+ UNIMPLEMENTED(FATAL) << "No entrypoint trampolines for this architecture";
+ }
+ }
+}
+
+
void CompilerDriver::AddCodePatch(const DexFile* dex_file,
uint16_t referrer_class_def_idx,
uint32_t referrer_method_idx,
@@ -2150,4 +2176,17 @@ bool CompilerDriver::SkipCompilation(const std::string& method_name) {
}
return !compile;
}
+
+FinalEntrypointRelocationSet* CompilerDriver::AllocateFinalEntrypointRelocationSet(
+ CompilationUnit* cu) const {
+ switch (instruction_set_) {
+ case kArm:
+ case kThumb2:
+ return new FinalEntrypointRelocationSetArm(this);
+ default:
+ UNIMPLEMENTED(FATAL) << "Cannot allocate FinalEntrypointRelocationSet for non-ARM";
+ return nullptr;
+ }
+}
+
} // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 802f859da4..6df5d0c09f 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -634,6 +634,112 @@ class CompilerDriver {
// Should the compiler run on this method given profile information?
bool SkipCompilation(const std::string& method_name);
+ // Entrypoint trampolines.
+ //
+ // The idea here is that we can save code size by collecting the branches
+ // to the entrypoints (helper functions called by the generated code) into a
+ // table and then branching relative to that table from the code. On ARM 32 this
+ // will save 2 bytes per call. Only the entrypoints used by the program (the whole
+ // program - these are global) are in this table and are in no particular order.
+ //
+ // The trampolines will be placed right at the start of the .text section in the file
+ // and will consist of a table of instructions, each of which will branch relative to
+ // the thread register (r9 on ARM) to an entrypoint. On ARM this would look like:
+ //
+ // trampolines:
+ // 1: ldr pc, [r9, #40]
+ // 2: ldr pc, [r9, #8]
+ // ...
+ //
+ // Then a call to an entrypoint would be an immediate BL instruction to the appropriate
+ // label (1 or 2 in the above example). Because the entrypoint table has the lower bit
+ // of the address already set, the ldr pc will switch from ARM to Thumb for the entrypoint as
+ // necessary.
+ //
+ // On ARM, the range of a BL instruction is +-32M to this is more than enough for an
+ // immediate BL instruction in the generated code.
+ //
+ // The actual address of the trampoline for a particular entrypoint is not known until
+ // the OAT file is written and we know the addresses of all the branch instructions in
+ // the program. At this point we can rewrite the BL instruction to have the correct relative
+ // offset.
+ class EntrypointTrampolines {
+ public:
+ EntrypointTrampolines() : current_offset_(0), lock_("Entrypoint Trampolines") {}
+ ~EntrypointTrampolines() {}
+
+ // Add a trampoline and return the offset added. If it already exists
+ // return the offset it was added at previously.
+ uint32_t AddEntrypoint(Thread* self, uint32_t ep) LOCKS_EXCLUDED(lock_) {
+ MutexLock mu(self, lock_);
+ Trampolines::iterator tramp = trampolines_.find(ep);
+ if (tramp == trampolines_.end()) {
+ trampolines_[ep] = current_offset_;
+ trampoline_table_.push_back(ep);
+ LOG(DEBUG) << "adding new trampoline for " << ep << " at offset " << current_offset_;
+ return current_offset_++;
+ } else {
+ return tramp->second;
+ }
+ }
+
+ const std::vector<uint32_t>& GetTrampolineTable() const {
+ return trampoline_table_;
+ }
+
+ uint32_t GetTrampolineTableSize() const {
+ return current_offset_;
+ }
+
+ private:
+ uint32_t current_offset_;
+ // Mapping of entrypoint offset vs offset into trampoline table.
+ typedef std::map<uint32_t, uint32_t> Trampolines;
+ Trampolines trampolines_ GUARDED_BY(lock_);
+
+ // Table of all registered offsets in order of registration.
+ std::vector<uint32_t> trampoline_table_;
+ Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+ };
+
+ uint32_t AddEntrypointTrampoline(uint32_t entrypoint);
+
+ const std::vector<uint32_t>& GetEntrypointTrampolineTable() const {
+ return entrypoint_trampolines_.GetTrampolineTable();
+ }
+
+ uint32_t GetEntrypointTrampolineTableSize() const {
+ uint32_t size = entrypoint_trampolines_.GetTrampolineTableSize();
+ if (instruction_set_ == kThumb2) {
+ return size * 4;
+ }
+ return size;
+ }
+
+ // Get the maximum offset between entrypoint trampoline islands. Different architectures
+ // have limitations on the max offset for a call instruction. This function is used
+ // to determine when we need to generate a new trampoline island in the output to keep
+ // subsequent calls in range.
+ size_t GetMaxEntrypointTrampolineOffset() const {
+ if (instruction_set_ == kThumb2) {
+ // On Thumb2, the max range of a BL instruction is 16MB. Give it a little wiggle room.
+ return 15*MB;
+ }
+ // Returning 0 means we won't generate a trampoline island.
+ return 0;
+ }
+
+ void BuildEntrypointTrampolineCode();
+
+ // Architecture specific Entrypoint trampoline builder.
+ void BuildArmEntrypointTrampolineCall(ThreadOffset<4> offset);
+
+ const std::vector<uint8_t>& GetEntrypointTrampolineTableCode() const {
+ return entrypoint_trampoline_code_;
+ }
+
+ FinalEntrypointRelocationSet* AllocateFinalEntrypointRelocationSet(CompilationUnit* cu) const;
+
private:
// These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
// The only external contract is that unresolved method has flags 0 and resolved non-0.
@@ -671,6 +777,7 @@ class CompilerDriver {
LOCKS_EXCLUDED(Locks::mutator_lock_);
void LoadImageClasses(TimingLogger* timings);
+ void PostCompile() LOCKS_EXCLUDED(Locks::mutator_lock_);
// Attempt to resolve all type, methods, fields, and strings
// referenced from code in the dex file following PathClassLoader
@@ -831,6 +938,10 @@ class CompilerDriver {
DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_gc_map_;
DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_cfi_info_;
+ EntrypointTrampolines entrypoint_trampolines_;
+
+ std::vector<uint8_t> entrypoint_trampoline_code_;
+
DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
};
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 20c6bc8e4e..52248a6c5a 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -50,7 +50,8 @@ class CompilerOptions {
small_method_threshold_(kDefaultSmallMethodThreshold),
tiny_method_threshold_(kDefaultTinyMethodThreshold),
num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
- generate_gdb_information_(false)
+ generate_gdb_information_(false),
+ generate_helper_trampolines_(false)
#ifdef ART_SEA_IR_MODE
, sea_ir_mode_(false)
#endif
@@ -62,7 +63,8 @@ class CompilerOptions {
size_t small_method_threshold,
size_t tiny_method_threshold,
size_t num_dex_methods_threshold,
- bool generate_gdb_information
+ bool generate_gdb_information,
+ bool generate_helper_trampolines
#ifdef ART_SEA_IR_MODE
, bool sea_ir_mode
#endif
@@ -73,7 +75,8 @@ class CompilerOptions {
small_method_threshold_(small_method_threshold),
tiny_method_threshold_(tiny_method_threshold),
num_dex_methods_threshold_(num_dex_methods_threshold),
- generate_gdb_information_(generate_gdb_information)
+ generate_gdb_information_(generate_gdb_information),
+ generate_helper_trampolines_(generate_helper_trampolines)
#ifdef ART_SEA_IR_MODE
, sea_ir_mode_(sea_ir_mode)
#endif
@@ -140,6 +143,10 @@ class CompilerOptions {
return generate_gdb_information_;
}
+ bool GenerateHelperTrampolines() const {
+ return generate_helper_trampolines_;
+ }
+
private:
CompilerFilter compiler_filter_;
size_t huge_method_threshold_;
@@ -148,6 +155,7 @@ class CompilerOptions {
size_t tiny_method_threshold_;
size_t num_dex_methods_threshold_;
bool generate_gdb_information_;
+ bool generate_helper_trampolines_;
#ifdef ART_SEA_IR_MODE
bool sea_ir_mode_;
diff --git a/compiler/final_relocations.cc b/compiler/final_relocations.cc
new file mode 100644
index 0000000000..089214c6b6
--- /dev/null
+++ b/compiler/final_relocations.cc
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "final_relocations.h"
+
+namespace art {
+
+class OatWriter;
+
+void FinalRelocations::Apply(uint8_t* code, const OatWriter* writer, uint32_t address) const {
+ for (const auto& s : *this) {
+ s->Apply(code, writer, address);
+ }
+}
+
+void FinalEntrypointRelocationSet::Add(uint32_t offset, uint32_t entrypoint_offset) {
+ relocations_.push_back(Relocation(kRelocationCall, offset, entrypoint_offset));
+}
+} // namespace art
diff --git a/compiler/final_relocations.h b/compiler/final_relocations.h
new file mode 100644
index 0000000000..6478663c40
--- /dev/null
+++ b/compiler/final_relocations.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_FINAL_RELOCATIONS_H_
+#define ART_COMPILER_FINAL_RELOCATIONS_H_
+
+#include <string>
+#include <vector>
+#include "base/macros.h"
+
+namespace art {
+
+class CompilerDriver;
+class OatWriter;
+
+enum FinalRelocationType {
+ kRelocationCall // Relocation of a call instruction.
+};
+
+// This is a set of relocations that is performed when the code is finally
+// written to the output file. This is when we know all the offsets and
+// can patch the binary instructions with known PC relative addresses.
+//
+// This is an abstract class that can be used for sets of relocations of different
+// types. For example, one type of relocation set is the relocation of calls
+// to entrypoint trampoline islands. Another type could be intra-app direct
+// method calls. The 'Apply' function is virtual and is implemented by
+// each concrete subclass.
+class FinalRelocationSet {
+ public:
+ explicit FinalRelocationSet(const CompilerDriver* driver) : driver_(driver) {}
+ virtual ~FinalRelocationSet() {}
+
+ void AddRelocation(FinalRelocationType type, uint32_t offset, uintptr_t value) {
+ relocations_.push_back(Relocation(type, offset, value));
+ }
+
+ // Apply this relocation set to the given code.
+ virtual void Apply(uint8_t* code, const OatWriter* writer, uint32_t address) const = 0;
+
+ protected:
+ struct Relocation {
+ Relocation()=delete;
+ Relocation(FinalRelocationType type, uint32_t offset, uintptr_t value) :
+ type_(type), code_offset_(offset), value_(value) {}
+ FinalRelocationType type_;
+ uint32_t code_offset_;
+ uintptr_t value_;
+ };
+
+ const CompilerDriver* const driver_;
+ typedef std::vector<Relocation> Relocations;
+ Relocations relocations_;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(FinalRelocationSet);
+};
+
+/* abstract. Implemented by architecture-specific subclasses */
+class FinalEntrypointRelocationSet : public FinalRelocationSet {
+ public:
+ explicit FinalEntrypointRelocationSet(const CompilerDriver* driver) : FinalRelocationSet(driver) {
+ }
+ ~FinalEntrypointRelocationSet() {}
+
+ void Add(uint32_t offset, uint32_t entrypoint_offset);
+};
+
+// Holder class for a set of final relocations.
+class FinalRelocations : public std::vector<const FinalRelocationSet*> {
+ public:
+ void Apply(uint8_t* code, const OatWriter* writer, uint32_t address) const;
+};
+} // namespace art
+
+#endif // ART_COMPILER_FINAL_RELOCATIONS_H_
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 2d45a2f65f..4d843a5a37 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -80,7 +80,8 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
size_oat_class_type_(0),
size_oat_class_status_(0),
size_oat_class_method_bitmaps_(0),
- size_oat_class_method_offsets_(0) {
+ size_oat_class_method_offsets_(0),
+ current_trampoline_island_offset_(0xffffffff) {
size_t offset;
{
TimingLogger::ScopedSplit split("InitOatHeader", timings);
@@ -377,15 +378,28 @@ size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index,
CHECK(quick_code != nullptr);
offset = compiled_method->AlignCode(offset);
DCHECK_ALIGNED(offset, kArmAlignment);
+
+ uint32_t thumb_offset = compiled_method->CodeDelta();
+ uint32_t tramp_offset = offset + thumb_offset;
+
+ // Allocate a trampoline island if we need to.
+ uint32_t trampsize = AllocateTrampolineIslandIfNecessary(tramp_offset);
+ if (trampsize > 0) {
+ offset += trampsize; // Account for island in current offset.
+
+ // Realign code after trampoline island.
+ offset = compiled_method->AlignCode(offset);
+ DCHECK_ALIGNED(offset, kArmAlignment);
+ }
+
uint32_t code_size = quick_code->size() * sizeof(uint8_t);
CHECK_NE(code_size, 0U);
- uint32_t thumb_offset = compiled_method->CodeDelta();
quick_code_offset = offset + sizeof(code_size) + thumb_offset;
std::vector<uint8_t>* cfi_info = compiler_driver_->GetCallFrameInformation();
if (cfi_info != nullptr) {
- // Copy in the FDE, if present
- const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo();
+ // Copy in the FDE, if present
+ const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo();
if (fde != nullptr) {
// Copy the information into cfi_info and then fix the address in the new copy.
int cur_offset = cfi_info->size();
@@ -403,6 +417,7 @@ size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index,
}
}
+
// Deduplicate code arrays
SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator code_iter =
code_offsets_.find(quick_code);
@@ -413,8 +428,13 @@ size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index,
offset += sizeof(code_size); // code size is prepended before code
offset += code_size;
oat_header_->UpdateChecksum(&(*quick_code)[0], code_size);
+
+ // Apply the final relocations to the code now that we
+ // know the offset.
+ compiled_method->ApplyFinalRelocations(this, quick_code_offset);
}
}
+
frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
core_spill_mask = compiled_method->GetCoreSpillMask();
fp_spill_mask = compiled_method->GetFpSpillMask();
@@ -827,11 +847,39 @@ size_t OatWriter::WriteCodeMethod(OutputStream* out, const size_t file_offset,
DCHECK_OFFSET();
}
DCHECK_ALIGNED(relative_offset, kArmAlignment);
+
+ // Write out a trampoline island if there is one at this point.
+ uint32_t trampsize = WriteTrampolineIslandIfNecessary(out, relative_offset +
+ compiled_method->CodeDelta());
+ if (trampsize > 0) {
+ relative_offset += trampsize;
+ size_code_ += trampsize;
+ DCHECK_OFFSET();
+
+ // Need to realign the code again after the island.
+ uint32_t aligned_offset = compiled_method->AlignCode(relative_offset);
+ uint32_t aligned_code_delta = aligned_offset - relative_offset;
+ if (aligned_code_delta != 0) {
+ off_t new_offset = out->Seek(aligned_code_delta, kSeekCurrent);
+ size_code_alignment_ += aligned_code_delta;
+ uint32_t expected_offset = file_offset + aligned_offset;
+ if (static_cast<uint32_t>(new_offset) != expected_offset) {
+ PLOG(ERROR) << "Failed to seek to align oat code. Actual: " << new_offset
+ << " Expected: " << expected_offset << " File: " << out->GetLocation();
+ return 0;
+ }
+ relative_offset += aligned_code_delta;
+ DCHECK_OFFSET();
+ }
+ DCHECK_ALIGNED(relative_offset, kArmAlignment);
+ }
+
uint32_t code_size = quick_code->size() * sizeof(uint8_t);
CHECK_NE(code_size, 0U);
// Deduplicate code arrays
size_t code_offset = relative_offset + sizeof(code_size) + compiled_method->CodeDelta();
+
SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator code_iter =
code_offsets_.find(quick_code);
if (code_iter != code_offsets_.end() && code_offset != method_offsets.code_offset_) {
@@ -1121,4 +1169,36 @@ bool OatWriter::OatClass::Write(OatWriter* oat_writer,
return true;
}
+// Allocate a trampoline island if we need to.
+uint32_t OatWriter::AllocateTrampolineIslandIfNecessary(uint32_t offset) {
+ size_t max_offset = compiler_driver_->GetMaxEntrypointTrampolineOffset();
+ if (max_offset == 0) {
+ // Compiler driver says we don't need trampoline islands.
+ return 0;
+ }
+ uint32_t next_trampoline = current_trampoline_island_offset_ + max_offset;
+ if (current_trampoline_island_offset_ == 0xffffffff || offset >= next_trampoline) {
+ LOG(DEBUG) << "Need trampoline island at offset " << std::hex << offset;
+ uint32_t size = compiler_driver_->GetEntrypointTrampolineTableSize();
+ trampoline_island_offsets_.push_back(offset);
+ current_trampoline_island_offset_ = offset;
+ return size;
+ }
+ return 0;
+}
+
+uint32_t OatWriter::WriteTrampolineIslandIfNecessary(OutputStream* out, uint32_t offset) {
+ for (size_t i = 0; i < trampoline_island_offsets_.size(); ++i) {
+ if (trampoline_island_offsets_[i] == offset) {
+ uint32_t size = compiler_driver_->GetEntrypointTrampolineTableSize();
+ LOG(DEBUG) << "Writing trampoline island at offset " << std::hex << offset << " size: "
+ << std::dec << size;
+ const std::vector<uint8_t>& code = compiler_driver_->GetEntrypointTrampolineTableCode();
+ out->WriteFully(&code[0], size);
+ return size;
+ }
+ }
+ return 0;
+}
+
} // namespace art
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index bab1a26d44..2840cbfe66 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -95,6 +95,10 @@ class OatWriter {
return method_info_;
}
+ uint32_t GetCurrentTrampolineIslandOffset() const {
+ return current_trampoline_island_offset_;
+ }
+
private:
size_t InitOatHeader();
size_t InitOatDexFiles(size_t offset);
@@ -134,6 +138,9 @@ class OatWriter {
void ReportWriteFailure(const char* what, uint32_t method_idx, const DexFile& dex_file,
const OutputStream& out) const;
+ uint32_t AllocateTrampolineIslandIfNecessary(uint32_t offset);
+ uint32_t WriteTrampolineIslandIfNecessary(OutputStream* out, uint32_t offset);
+
class OatDexFile {
public:
explicit OatDexFile(size_t offset, const DexFile& dex_file);
@@ -288,6 +295,21 @@ class OatWriter {
SafeMap<const std::vector<uint8_t>*, uint32_t> mapping_table_offsets_;
SafeMap<const std::vector<uint8_t>*, uint32_t> gc_map_offsets_;
+ // The trampoline islands. These are sequences of code inserted between methods
+ // in the output. They contain jumps to other addresses and are accessed
+ // by direct calls in the method code. Due to the range of call instructions
+ // on certain architectures, we need to be able to put down multiple islands that
+ // are in range of the call instructions. On ARM this is done every 15MB (the call range
+ // on Thumb2 is 16MB). At any point in the output we have a current island that is
+ // guaranteed to be in range. This is held in the 'current_trampoline_island_offset'
+ // variable (an offset into the instruction stream).
+ //
+ // The vector 'trampoline_island_offsets' contains the offsets of the all the
+ // islands we have generated. This is used when performing the write of the file.
+
+ uint32_t current_trampoline_island_offset_; // Current island offset.
+ std::vector<uint32_t> trampoline_island_offsets_; // All the island offsets.
+
DISALLOW_COPY_AND_ASSIGN(OatWriter);
};
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d19c40c291..f48e9d04cd 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -109,6 +109,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(CompilerDriver& driver,
mapping_table,
vmap_table,
gc_map,
+ nullptr,
nullptr);
}