diff options
author | Zheng Xu <zheng.xu@arm.com> | 2014-07-25 11:49:42 +0800 |
---|---|---|
committer | Andreas Gampe <agampe@google.com> | 2014-07-29 00:38:14 -0700 |
commit | b551fdcda9eb128c80de37c4fb978968bec6d4b3 (patch) | |
tree | 62942f412f2275e2e9188f71c370cd95ec91e17f /compiler | |
parent | 2815f1242c6c3ea1fc2df7bb5e4bd1924f4e75f7 (diff) | |
download | android_art-b551fdcda9eb128c80de37c4fb978968bec6d4b3.tar.gz android_art-b551fdcda9eb128c80de37c4fb978968bec6d4b3.tar.bz2 android_art-b551fdcda9eb128c80de37c4fb978968bec6d4b3.zip |
AArch64: Clean up CalleeSaveMethod frame and the use of temp registers.
CalleeSaveMethod frame size changes :
SaveAll : 368 -> 176
RefOnly : 176 -> 96
RefsAndArgs : 304 -> 224
JNI register spill size changes :
160 -> 88
In the transition assembly, use registers following the rules:
1. x0-x7 as temp/argument registers.
2. IP0, IP1 as scratch registers.
3. After correct type of callee-save-frame has been setup, all registers
are scratch-able(probably except xSELF and xSUSPEND).
4. When restore callee-save-frame, IP0 and IP1 should be untouched.
5. From C to managed code, we assume all callee save register in AAPCS
will be restored by managed code except x19(SUSPEND).
In quick compiler:
1. Use IP0, IP1 as scratch register.
2. Use IP1 as hidden argument register(IP0 will be scratched by
trampoline.)
Change-Id: I05ed9d418b01b9e87218a7608536f57e7a286e4c
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/dex/quick/arm64/arm64_lir.h | 8 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/call_arm64.cc | 28 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/target_arm64.cc | 8 | ||||
-rw-r--r-- | compiler/jni/quick/arm64/calling_convention_arm64.cc | 33 | ||||
-rw-r--r-- | compiler/utils/arm64/assembler_arm64.cc | 99 | ||||
-rw-r--r-- | compiler/utils/arm64/constants_arm64.h | 6 |
6 files changed, 117 insertions, 65 deletions
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index d0633afc9e..3a8ea3f96e 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -142,6 +142,8 @@ enum A64NativeRegisterPool { rwsp = rw31, // Aliases which are not defined in "ARM Architecture Reference, register names". + rxIP0 = rx16, + rxIP1 = rx17, rxSUSPEND = rx19, rxSELF = rx18, rxLR = rx30, @@ -150,6 +152,8 @@ enum A64NativeRegisterPool { * the 64-bit view. However, for now we'll define a 32-bit view to keep these from being * allocated as 32-bit temp registers. */ + rwIP0 = rw16, + rwIP1 = rw17, rwSUSPEND = rw19, rwSELF = rw18, rwLR = rw30, @@ -165,6 +169,10 @@ A64_REGISTER_CODE_LIST(A64_DEFINE_REGSTORAGES) constexpr RegStorage rs_xzr(RegStorage::kValid | rxzr); constexpr RegStorage rs_wzr(RegStorage::kValid | rwzr); +constexpr RegStorage rs_xIP0(RegStorage::kValid | rxIP0); +constexpr RegStorage rs_wIP0(RegStorage::kValid | rwIP0); +constexpr RegStorage rs_xIP1(RegStorage::kValid | rxIP1); +constexpr RegStorage rs_wIP1(RegStorage::kValid | rwIP1); // Reserved registers. constexpr RegStorage rs_xSUSPEND(RegStorage::kValid | rxSUSPEND); constexpr RegStorage rs_xSELF(RegStorage::kValid | rxSELF); diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 5e95500b25..e584548558 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -319,8 +319,8 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) LockTemp(rs_x5); LockTemp(rs_x6); LockTemp(rs_x7); - LockTemp(rs_x8); - LockTemp(rs_x9); + LockTemp(rs_xIP0); + LockTemp(rs_xIP1); /* * We can safely skip the stack overflow check if we're @@ -341,7 +341,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) { if (!large_frame) { // Load stack limit - LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9); + LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP1); } } else { // TODO(Arm64) Implement implicit checks. @@ -386,10 +386,10 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_); m2l_->ClobberCallerSave(); ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow); - m2l_->LockTemp(rs_x8); - m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_x8); - m2l_->NewLIR1(kA64Br1x, rs_x8.GetReg()); - m2l_->FreeTemp(rs_x8); + m2l_->LockTemp(rs_xIP0); + m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0); + m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg()); + m2l_->FreeTemp(rs_xIP0); } private: @@ -399,11 +399,11 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) if (large_frame) { // Compare Expected SP against bottom of stack. // Branch to throw target if there is not enough room. - OpRegRegImm(kOpSub, rs_x9, rs_sp, frame_size_without_spills); - LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8); - LIR* branch = OpCmpBranch(kCondUlt, rs_x9, rs_x8, nullptr); + OpRegRegImm(kOpSub, rs_xIP1, rs_sp, frame_size_without_spills); + LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP0); + LIR* branch = OpCmpBranch(kCondUlt, rs_xIP1, rs_xIP0, nullptr); AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size)); - OpRegCopy(rs_sp, rs_x9); // Establish stack after checks. + OpRegCopy(rs_sp, rs_xIP1); // Establish stack after checks. } else { /* * If the frame is small enough we are guaranteed to have enough space that remains to @@ -411,7 +411,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) * Establishes stack before checks. */ OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size_without_spills); - LIR* branch = OpCmpBranch(kCondUlt, rs_sp, rs_x9, nullptr); + LIR* branch = OpCmpBranch(kCondUlt, rs_sp, rs_xIP1, nullptr); AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_)); } } else { @@ -431,8 +431,8 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) FreeTemp(rs_x5); FreeTemp(rs_x6); FreeTemp(rs_x7); - FreeTemp(rs_x8); - FreeTemp(rs_x9); + FreeTemp(rs_xIP0); + FreeTemp(rs_xIP1); } void Arm64Mir2Lir::GenExitSequence() { diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index dec81cb2f1..9b4546a94b 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -48,14 +48,12 @@ static constexpr RegStorage dp_regs_arr[] = rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15, rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23, rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31}; +// Note: we are not able to call to C function since rs_xSELF is a special register need to be +// preserved but would be scratched by native functions follow aapcs64. static constexpr RegStorage reserved_regs_arr[] = {rs_wSUSPEND, rs_wSELF, rs_wsp, rs_wLR, rs_wzr}; static constexpr RegStorage reserved64_regs_arr[] = {rs_xSUSPEND, rs_xSELF, rs_sp, rs_xLR, rs_xzr}; -// TUNING: Are there too many temp registers and too less promote target? -// This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler -// Note: we are not able to call to C function directly if it un-match C ABI. -// Currently, rs_rA64_SELF is not a callee save register which does not match C ABI. static constexpr RegStorage core_temps_arr[] = {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7, rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16, @@ -132,7 +130,7 @@ RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) { case kRet0: res_reg = rs_w0; break; case kRet1: res_reg = rs_w1; break; case kInvokeTgt: res_reg = rs_wLR; break; - case kHiddenArg: res_reg = rs_w12; break; + case kHiddenArg: res_reg = rs_wIP1; break; case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break; case kCount: res_reg = RegStorage::InvalidReg(); break; default: res_reg = RegStorage::InvalidReg(); diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc index 0a00d7d8ac..b95dad261e 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.cc +++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc @@ -152,7 +152,8 @@ const ManagedRegisterEntrySpills& Arm64ManagedRuntimeCallingConvention::EntrySpi Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty) : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) { - callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X19)); + // TODO: Ugly hard code... + // Should generate these according to the spill mask automatically. callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X20)); callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X21)); callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X22)); @@ -164,30 +165,28 @@ Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_syn callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X28)); callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X29)); callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X30)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D8)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D9)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D10)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D11)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D12)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D13)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D14)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D15)); } uint32_t Arm64JniCallingConvention::CoreSpillMask() const { // Compute spill mask to agree with callee saves initialized in the constructor - uint32_t result = 0; - result = 1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 | - 1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR; - return result; + // Note: The native jni function may call to some VM runtime functions which may suspend + // or trigger GC. And the jni method frame will become top quick frame in those cases. + // So we need to satisfy GC to save LR and callee-save registers which is similar to + // CalleeSaveMethod(RefOnly) frame. + // Jni function is the native function which the java code wants to call. + // Jni method is the method that compiled by jni compiler. + // Call chain: managed code(java) --> jni method --> jni function. + // Thread register(X18, scratched by aapcs64) is not saved on stack, it is saved in ETR(X21). + // Suspend register(x19) is preserved by aapcs64 and it is not used in Jni method. + return 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 | 1 << X25 | + 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR; } uint32_t Arm64JniCallingConvention::FpSpillMask() const { // Compute spill mask to agree with callee saves initialized in the constructor - uint32_t result = 0; - result = 1 << D8 | 1 << D9 | 1 << D10 | 1 << D11 | 1 << D12 | 1 << D13 | - 1 << D14 | 1 << D15; - return result; + // Note: All callee-save fp registers will be preserved by aapcs64. And they are not used + // in the jni method. + return 0; } ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const { diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 5b97ba0a02..3f90f21b66 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -626,7 +626,7 @@ void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) { // Move ETR(Callee saved) back to TR(Caller saved) reg. We use ETR on calls // to external functions that might trash TR. We do not need the original - // X19 saved in BuildFrame(). + // ETR(X21) saved in BuildFrame(). ___ Mov(reg_x(TR), reg_x(ETR)); ___ Blr(temp); @@ -644,20 +644,43 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, // TODO: *create APCS FP - end of FP chain; // *add support for saving a different set of callee regs. - // For now we check that the size of callee regs vector is 20 - // equivalent to the APCS callee saved regs [X19, x30] [D8, D15]. - CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize); - ___ PushCalleeSavedRegisters(); - - // Move TR(Caller saved) to ETR(Callee saved). The original X19 has been - // saved by PushCalleeSavedRegisters(). This way we make sure that TR is not - // trashed by native code. - ___ Mov(reg_x(ETR), reg_x(TR)); - + // For now we check that the size of callee regs vector is 11. + CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize); // Increase frame to required size - must be at least space to push StackReference<Method>. - CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize); - size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize); - IncreaseFrameSize(adjust); + CHECK_GT(frame_size, kJniRefSpillRegsSize * kFramePointerSize); + IncreaseFrameSize(frame_size); + + // TODO: Ugly hard code... + // Should generate these according to the spill mask automatically. + // TUNING: Use stp. + // Note: Must match Arm64JniCallingConvention::CoreSpillMask(). + size_t reg_offset = frame_size; + reg_offset -= 8; + StoreToOffset(LR, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X29, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X28, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X27, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X26, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X25, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X24, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X23, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X22, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X21, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X20, SP, reg_offset); + + // Move TR(Caller saved) to ETR(Callee saved). The original (ETR)X21 has been saved on stack. + // This way we make sure that TR is not trashed by native code. + ___ Mov(reg_x(ETR), reg_x(TR)); // Write StackReference<Method>. DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>)); @@ -690,22 +713,46 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); - // For now we only check that the size of the frame is greater than the - // no of APCS callee saved regs [X19, X30] [D8, D15]. - CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize); - CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize); - - // Decrease frame size to start of callee saved regs. - size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize); - DecreaseFrameSize(adjust); + // For now we only check that the size of the frame is greater than the spill size. + CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize); + CHECK_GT(frame_size, kJniRefSpillRegsSize * kFramePointerSize); - // We move ETR (Callee Saved) back to TR (Caller Saved) which might have - // been trashed in the native call. The original X19 (ETR) is restored as - // part of PopCalleeSavedRegisters(). + // We move ETR(aapcs64 callee saved) back to TR(aapcs64 caller saved) which might have + // been trashed in the native call. The original ETR(X21) is restored from stack. ___ Mov(reg_x(TR), reg_x(ETR)); + // TODO: Ugly hard code... + // Should generate these according to the spill mask automatically. + // TUNING: Use ldp. + // Note: Must match Arm64JniCallingConvention::CoreSpillMask(). + size_t reg_offset = frame_size; + reg_offset -= 8; + LoadFromOffset(LR, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X29, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X28, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X27, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X26, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X25, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X24, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X23, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X22, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X21, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X20, SP, reg_offset); + + // Decrease frame size to start of callee saved regs. + DecreaseFrameSize(frame_size); + // Pop callee saved and return to LR. - ___ PopCalleeSavedRegisters(); ___ Ret(); } diff --git a/compiler/utils/arm64/constants_arm64.h b/compiler/utils/arm64/constants_arm64.h index 2a08c95654..0cbbb1eeff 100644 --- a/compiler/utils/arm64/constants_arm64.h +++ b/compiler/utils/arm64/constants_arm64.h @@ -29,12 +29,12 @@ namespace art { namespace arm64 { -constexpr unsigned int kCalleeSavedRegsSize = 20; +constexpr unsigned int kJniRefSpillRegsSize = 11; // Vixl buffer size. constexpr size_t kBufferSizeArm64 = 4096*2; -} // arm64 -} // art +} // namespace arm64 +} // namespace art #endif // ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_ |