summaryrefslogtreecommitdiffstats
path: root/compiler
diff options
context:
space:
mode:
authorZheng Xu <zheng.xu@arm.com>2014-07-25 11:49:42 +0800
committerAndreas Gampe <agampe@google.com>2014-07-29 00:38:14 -0700
commitb551fdcda9eb128c80de37c4fb978968bec6d4b3 (patch)
tree62942f412f2275e2e9188f71c370cd95ec91e17f /compiler
parent2815f1242c6c3ea1fc2df7bb5e4bd1924f4e75f7 (diff)
downloadandroid_art-b551fdcda9eb128c80de37c4fb978968bec6d4b3.tar.gz
android_art-b551fdcda9eb128c80de37c4fb978968bec6d4b3.tar.bz2
android_art-b551fdcda9eb128c80de37c4fb978968bec6d4b3.zip
AArch64: Clean up CalleeSaveMethod frame and the use of temp registers.
CalleeSaveMethod frame size changes : SaveAll : 368 -> 176 RefOnly : 176 -> 96 RefsAndArgs : 304 -> 224 JNI register spill size changes : 160 -> 88 In the transition assembly, use registers following the rules: 1. x0-x7 as temp/argument registers. 2. IP0, IP1 as scratch registers. 3. After correct type of callee-save-frame has been setup, all registers are scratch-able(probably except xSELF and xSUSPEND). 4. When restore callee-save-frame, IP0 and IP1 should be untouched. 5. From C to managed code, we assume all callee save register in AAPCS will be restored by managed code except x19(SUSPEND). In quick compiler: 1. Use IP0, IP1 as scratch register. 2. Use IP1 as hidden argument register(IP0 will be scratched by trampoline.) Change-Id: I05ed9d418b01b9e87218a7608536f57e7a286e4c
Diffstat (limited to 'compiler')
-rw-r--r--compiler/dex/quick/arm64/arm64_lir.h8
-rw-r--r--compiler/dex/quick/arm64/call_arm64.cc28
-rw-r--r--compiler/dex/quick/arm64/target_arm64.cc8
-rw-r--r--compiler/jni/quick/arm64/calling_convention_arm64.cc33
-rw-r--r--compiler/utils/arm64/assembler_arm64.cc99
-rw-r--r--compiler/utils/arm64/constants_arm64.h6
6 files changed, 117 insertions, 65 deletions
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index d0633afc9e..3a8ea3f96e 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -142,6 +142,8 @@ enum A64NativeRegisterPool {
rwsp = rw31,
// Aliases which are not defined in "ARM Architecture Reference, register names".
+ rxIP0 = rx16,
+ rxIP1 = rx17,
rxSUSPEND = rx19,
rxSELF = rx18,
rxLR = rx30,
@@ -150,6 +152,8 @@ enum A64NativeRegisterPool {
* the 64-bit view. However, for now we'll define a 32-bit view to keep these from being
* allocated as 32-bit temp registers.
*/
+ rwIP0 = rw16,
+ rwIP1 = rw17,
rwSUSPEND = rw19,
rwSELF = rw18,
rwLR = rw30,
@@ -165,6 +169,10 @@ A64_REGISTER_CODE_LIST(A64_DEFINE_REGSTORAGES)
constexpr RegStorage rs_xzr(RegStorage::kValid | rxzr);
constexpr RegStorage rs_wzr(RegStorage::kValid | rwzr);
+constexpr RegStorage rs_xIP0(RegStorage::kValid | rxIP0);
+constexpr RegStorage rs_wIP0(RegStorage::kValid | rwIP0);
+constexpr RegStorage rs_xIP1(RegStorage::kValid | rxIP1);
+constexpr RegStorage rs_wIP1(RegStorage::kValid | rwIP1);
// Reserved registers.
constexpr RegStorage rs_xSUSPEND(RegStorage::kValid | rxSUSPEND);
constexpr RegStorage rs_xSELF(RegStorage::kValid | rxSELF);
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 5e95500b25..e584548558 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -319,8 +319,8 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
LockTemp(rs_x5);
LockTemp(rs_x6);
LockTemp(rs_x7);
- LockTemp(rs_x8);
- LockTemp(rs_x9);
+ LockTemp(rs_xIP0);
+ LockTemp(rs_xIP1);
/*
* We can safely skip the stack overflow check if we're
@@ -341,7 +341,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
if (!large_frame) {
// Load stack limit
- LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9);
+ LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP1);
}
} else {
// TODO(Arm64) Implement implicit checks.
@@ -386,10 +386,10 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_);
m2l_->ClobberCallerSave();
ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
- m2l_->LockTemp(rs_x8);
- m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_x8);
- m2l_->NewLIR1(kA64Br1x, rs_x8.GetReg());
- m2l_->FreeTemp(rs_x8);
+ m2l_->LockTemp(rs_xIP0);
+ m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0);
+ m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg());
+ m2l_->FreeTemp(rs_xIP0);
}
private:
@@ -399,11 +399,11 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
if (large_frame) {
// Compare Expected SP against bottom of stack.
// Branch to throw target if there is not enough room.
- OpRegRegImm(kOpSub, rs_x9, rs_sp, frame_size_without_spills);
- LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8);
- LIR* branch = OpCmpBranch(kCondUlt, rs_x9, rs_x8, nullptr);
+ OpRegRegImm(kOpSub, rs_xIP1, rs_sp, frame_size_without_spills);
+ LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP0);
+ LIR* branch = OpCmpBranch(kCondUlt, rs_xIP1, rs_xIP0, nullptr);
AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size));
- OpRegCopy(rs_sp, rs_x9); // Establish stack after checks.
+ OpRegCopy(rs_sp, rs_xIP1); // Establish stack after checks.
} else {
/*
* If the frame is small enough we are guaranteed to have enough space that remains to
@@ -411,7 +411,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
* Establishes stack before checks.
*/
OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size_without_spills);
- LIR* branch = OpCmpBranch(kCondUlt, rs_sp, rs_x9, nullptr);
+ LIR* branch = OpCmpBranch(kCondUlt, rs_sp, rs_xIP1, nullptr);
AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_));
}
} else {
@@ -431,8 +431,8 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
FreeTemp(rs_x5);
FreeTemp(rs_x6);
FreeTemp(rs_x7);
- FreeTemp(rs_x8);
- FreeTemp(rs_x9);
+ FreeTemp(rs_xIP0);
+ FreeTemp(rs_xIP1);
}
void Arm64Mir2Lir::GenExitSequence() {
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index dec81cb2f1..9b4546a94b 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -48,14 +48,12 @@ static constexpr RegStorage dp_regs_arr[] =
rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15,
rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
+// Note: we are not able to call to C function since rs_xSELF is a special register need to be
+// preserved but would be scratched by native functions follow aapcs64.
static constexpr RegStorage reserved_regs_arr[] =
{rs_wSUSPEND, rs_wSELF, rs_wsp, rs_wLR, rs_wzr};
static constexpr RegStorage reserved64_regs_arr[] =
{rs_xSUSPEND, rs_xSELF, rs_sp, rs_xLR, rs_xzr};
-// TUNING: Are there too many temp registers and too less promote target?
-// This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler
-// Note: we are not able to call to C function directly if it un-match C ABI.
-// Currently, rs_rA64_SELF is not a callee save register which does not match C ABI.
static constexpr RegStorage core_temps_arr[] =
{rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16,
@@ -132,7 +130,7 @@ RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
case kRet0: res_reg = rs_w0; break;
case kRet1: res_reg = rs_w1; break;
case kInvokeTgt: res_reg = rs_wLR; break;
- case kHiddenArg: res_reg = rs_w12; break;
+ case kHiddenArg: res_reg = rs_wIP1; break;
case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
case kCount: res_reg = RegStorage::InvalidReg(); break;
default: res_reg = RegStorage::InvalidReg();
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 0a00d7d8ac..b95dad261e 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -152,7 +152,8 @@ const ManagedRegisterEntrySpills& Arm64ManagedRuntimeCallingConvention::EntrySpi
Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_synchronized,
const char* shorty)
: JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
- callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X19));
+ // TODO: Ugly hard code...
+ // Should generate these according to the spill mask automatically.
callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X20));
callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X21));
callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X22));
@@ -164,30 +165,28 @@ Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_syn
callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X28));
callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X29));
callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X30));
- callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D8));
- callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D9));
- callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D10));
- callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D11));
- callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D12));
- callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D13));
- callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D14));
- callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D15));
}
uint32_t Arm64JniCallingConvention::CoreSpillMask() const {
// Compute spill mask to agree with callee saves initialized in the constructor
- uint32_t result = 0;
- result = 1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 |
- 1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR;
- return result;
+ // Note: The native jni function may call to some VM runtime functions which may suspend
+ // or trigger GC. And the jni method frame will become top quick frame in those cases.
+ // So we need to satisfy GC to save LR and callee-save registers which is similar to
+ // CalleeSaveMethod(RefOnly) frame.
+ // Jni function is the native function which the java code wants to call.
+ // Jni method is the method that compiled by jni compiler.
+ // Call chain: managed code(java) --> jni method --> jni function.
+ // Thread register(X18, scratched by aapcs64) is not saved on stack, it is saved in ETR(X21).
+ // Suspend register(x19) is preserved by aapcs64 and it is not used in Jni method.
+ return 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 | 1 << X25 |
+ 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR;
}
uint32_t Arm64JniCallingConvention::FpSpillMask() const {
// Compute spill mask to agree with callee saves initialized in the constructor
- uint32_t result = 0;
- result = 1 << D8 | 1 << D9 | 1 << D10 | 1 << D11 | 1 << D12 | 1 << D13 |
- 1 << D14 | 1 << D15;
- return result;
+ // Note: All callee-save fp registers will be preserved by aapcs64. And they are not used
+ // in the jni method.
+ return 0;
}
ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const {
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 5b97ba0a02..3f90f21b66 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -626,7 +626,7 @@ void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
// Move ETR(Callee saved) back to TR(Caller saved) reg. We use ETR on calls
// to external functions that might trash TR. We do not need the original
- // X19 saved in BuildFrame().
+ // ETR(X21) saved in BuildFrame().
___ Mov(reg_x(TR), reg_x(ETR));
___ Blr(temp);
@@ -644,20 +644,43 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
// TODO: *create APCS FP - end of FP chain;
// *add support for saving a different set of callee regs.
- // For now we check that the size of callee regs vector is 20
- // equivalent to the APCS callee saved regs [X19, x30] [D8, D15].
- CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize);
- ___ PushCalleeSavedRegisters();
-
- // Move TR(Caller saved) to ETR(Callee saved). The original X19 has been
- // saved by PushCalleeSavedRegisters(). This way we make sure that TR is not
- // trashed by native code.
- ___ Mov(reg_x(ETR), reg_x(TR));
-
+ // For now we check that the size of callee regs vector is 11.
+ CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize);
// Increase frame to required size - must be at least space to push StackReference<Method>.
- CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize);
- size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
- IncreaseFrameSize(adjust);
+ CHECK_GT(frame_size, kJniRefSpillRegsSize * kFramePointerSize);
+ IncreaseFrameSize(frame_size);
+
+ // TODO: Ugly hard code...
+ // Should generate these according to the spill mask automatically.
+ // TUNING: Use stp.
+ // Note: Must match Arm64JniCallingConvention::CoreSpillMask().
+ size_t reg_offset = frame_size;
+ reg_offset -= 8;
+ StoreToOffset(LR, SP, reg_offset);
+ reg_offset -= 8;
+ StoreToOffset(X29, SP, reg_offset);
+ reg_offset -= 8;
+ StoreToOffset(X28, SP, reg_offset);
+ reg_offset -= 8;
+ StoreToOffset(X27, SP, reg_offset);
+ reg_offset -= 8;
+ StoreToOffset(X26, SP, reg_offset);
+ reg_offset -= 8;
+ StoreToOffset(X25, SP, reg_offset);
+ reg_offset -= 8;
+ StoreToOffset(X24, SP, reg_offset);
+ reg_offset -= 8;
+ StoreToOffset(X23, SP, reg_offset);
+ reg_offset -= 8;
+ StoreToOffset(X22, SP, reg_offset);
+ reg_offset -= 8;
+ StoreToOffset(X21, SP, reg_offset);
+ reg_offset -= 8;
+ StoreToOffset(X20, SP, reg_offset);
+
+ // Move TR(Caller saved) to ETR(Callee saved). The original (ETR)X21 has been saved on stack.
+ // This way we make sure that TR is not trashed by native code.
+ ___ Mov(reg_x(ETR), reg_x(TR));
// Write StackReference<Method>.
DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>));
@@ -690,22 +713,46 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) {
CHECK_ALIGNED(frame_size, kStackAlignment);
- // For now we only check that the size of the frame is greater than the
- // no of APCS callee saved regs [X19, X30] [D8, D15].
- CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize);
- CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize);
-
- // Decrease frame size to start of callee saved regs.
- size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
- DecreaseFrameSize(adjust);
+ // For now we only check that the size of the frame is greater than the spill size.
+ CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize);
+ CHECK_GT(frame_size, kJniRefSpillRegsSize * kFramePointerSize);
- // We move ETR (Callee Saved) back to TR (Caller Saved) which might have
- // been trashed in the native call. The original X19 (ETR) is restored as
- // part of PopCalleeSavedRegisters().
+ // We move ETR(aapcs64 callee saved) back to TR(aapcs64 caller saved) which might have
+ // been trashed in the native call. The original ETR(X21) is restored from stack.
___ Mov(reg_x(TR), reg_x(ETR));
+ // TODO: Ugly hard code...
+ // Should generate these according to the spill mask automatically.
+ // TUNING: Use ldp.
+ // Note: Must match Arm64JniCallingConvention::CoreSpillMask().
+ size_t reg_offset = frame_size;
+ reg_offset -= 8;
+ LoadFromOffset(LR, SP, reg_offset);
+ reg_offset -= 8;
+ LoadFromOffset(X29, SP, reg_offset);
+ reg_offset -= 8;
+ LoadFromOffset(X28, SP, reg_offset);
+ reg_offset -= 8;
+ LoadFromOffset(X27, SP, reg_offset);
+ reg_offset -= 8;
+ LoadFromOffset(X26, SP, reg_offset);
+ reg_offset -= 8;
+ LoadFromOffset(X25, SP, reg_offset);
+ reg_offset -= 8;
+ LoadFromOffset(X24, SP, reg_offset);
+ reg_offset -= 8;
+ LoadFromOffset(X23, SP, reg_offset);
+ reg_offset -= 8;
+ LoadFromOffset(X22, SP, reg_offset);
+ reg_offset -= 8;
+ LoadFromOffset(X21, SP, reg_offset);
+ reg_offset -= 8;
+ LoadFromOffset(X20, SP, reg_offset);
+
+ // Decrease frame size to start of callee saved regs.
+ DecreaseFrameSize(frame_size);
+
// Pop callee saved and return to LR.
- ___ PopCalleeSavedRegisters();
___ Ret();
}
diff --git a/compiler/utils/arm64/constants_arm64.h b/compiler/utils/arm64/constants_arm64.h
index 2a08c95654..0cbbb1eeff 100644
--- a/compiler/utils/arm64/constants_arm64.h
+++ b/compiler/utils/arm64/constants_arm64.h
@@ -29,12 +29,12 @@
namespace art {
namespace arm64 {
-constexpr unsigned int kCalleeSavedRegsSize = 20;
+constexpr unsigned int kJniRefSpillRegsSize = 11;
// Vixl buffer size.
constexpr size_t kBufferSizeArm64 = 4096*2;
-} // arm64
-} // art
+} // namespace arm64
+} // namespace art
#endif // ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_