diff options
author | Zheng Xu <zheng.xu@arm.com> | 2014-07-25 11:49:42 +0800 |
---|---|---|
committer | Andreas Gampe <agampe@google.com> | 2014-07-29 00:38:14 -0700 |
commit | b551fdcda9eb128c80de37c4fb978968bec6d4b3 (patch) | |
tree | 62942f412f2275e2e9188f71c370cd95ec91e17f | |
parent | 2815f1242c6c3ea1fc2df7bb5e4bd1924f4e75f7 (diff) | |
download | android_art-b551fdcda9eb128c80de37c4fb978968bec6d4b3.tar.gz android_art-b551fdcda9eb128c80de37c4fb978968bec6d4b3.tar.bz2 android_art-b551fdcda9eb128c80de37c4fb978968bec6d4b3.zip |
AArch64: Clean up CalleeSaveMethod frame and the use of temp registers.
CalleeSaveMethod frame size changes :
SaveAll : 368 -> 176
RefOnly : 176 -> 96
RefsAndArgs : 304 -> 224
JNI register spill size changes :
160 -> 88
In the transition assembly, use registers following the rules:
1. x0-x7 as temp/argument registers.
2. IP0, IP1 as scratch registers.
3. After correct type of callee-save-frame has been setup, all registers
are scratch-able(probably except xSELF and xSUSPEND).
4. When restore callee-save-frame, IP0 and IP1 should be untouched.
5. From C to managed code, we assume all callee save register in AAPCS
will be restored by managed code except x19(SUSPEND).
In quick compiler:
1. Use IP0, IP1 as scratch register.
2. Use IP1 as hidden argument register(IP0 will be scratched by
trampoline.)
Change-Id: I05ed9d418b01b9e87218a7608536f57e7a286e4c
-rw-r--r-- | compiler/dex/quick/arm64/arm64_lir.h | 8 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/call_arm64.cc | 28 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/target_arm64.cc | 8 | ||||
-rw-r--r-- | compiler/jni/quick/arm64/calling_convention_arm64.cc | 33 | ||||
-rw-r--r-- | compiler/utils/arm64/assembler_arm64.cc | 99 | ||||
-rw-r--r-- | compiler/utils/arm64/constants_arm64.h | 6 | ||||
-rw-r--r-- | runtime/arch/arm/quick_method_frame_info_arm.h | 16 | ||||
-rw-r--r-- | runtime/arch/arm64/asm_support_arm64.S | 7 | ||||
-rw-r--r-- | runtime/arch/arm64/asm_support_arm64.h | 22 | ||||
-rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 449 | ||||
-rw-r--r-- | runtime/arch/arm64/quick_method_frame_info_arm64.h | 60 | ||||
-rw-r--r-- | runtime/arch/arm64/registers_arm64.h | 2 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_trampoline_entrypoints.cc | 24 |
13 files changed, 406 insertions, 356 deletions
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index d0633afc9e..3a8ea3f96e 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -142,6 +142,8 @@ enum A64NativeRegisterPool { rwsp = rw31, // Aliases which are not defined in "ARM Architecture Reference, register names". + rxIP0 = rx16, + rxIP1 = rx17, rxSUSPEND = rx19, rxSELF = rx18, rxLR = rx30, @@ -150,6 +152,8 @@ enum A64NativeRegisterPool { * the 64-bit view. However, for now we'll define a 32-bit view to keep these from being * allocated as 32-bit temp registers. */ + rwIP0 = rw16, + rwIP1 = rw17, rwSUSPEND = rw19, rwSELF = rw18, rwLR = rw30, @@ -165,6 +169,10 @@ A64_REGISTER_CODE_LIST(A64_DEFINE_REGSTORAGES) constexpr RegStorage rs_xzr(RegStorage::kValid | rxzr); constexpr RegStorage rs_wzr(RegStorage::kValid | rwzr); +constexpr RegStorage rs_xIP0(RegStorage::kValid | rxIP0); +constexpr RegStorage rs_wIP0(RegStorage::kValid | rwIP0); +constexpr RegStorage rs_xIP1(RegStorage::kValid | rxIP1); +constexpr RegStorage rs_wIP1(RegStorage::kValid | rwIP1); // Reserved registers. constexpr RegStorage rs_xSUSPEND(RegStorage::kValid | rxSUSPEND); constexpr RegStorage rs_xSELF(RegStorage::kValid | rxSELF); diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 5e95500b25..e584548558 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -319,8 +319,8 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) LockTemp(rs_x5); LockTemp(rs_x6); LockTemp(rs_x7); - LockTemp(rs_x8); - LockTemp(rs_x9); + LockTemp(rs_xIP0); + LockTemp(rs_xIP1); /* * We can safely skip the stack overflow check if we're @@ -341,7 +341,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) { if (!large_frame) { // Load stack limit - LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9); + LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP1); } } else { // TODO(Arm64) Implement implicit checks. @@ -386,10 +386,10 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_); m2l_->ClobberCallerSave(); ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow); - m2l_->LockTemp(rs_x8); - m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_x8); - m2l_->NewLIR1(kA64Br1x, rs_x8.GetReg()); - m2l_->FreeTemp(rs_x8); + m2l_->LockTemp(rs_xIP0); + m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0); + m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg()); + m2l_->FreeTemp(rs_xIP0); } private: @@ -399,11 +399,11 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) if (large_frame) { // Compare Expected SP against bottom of stack. // Branch to throw target if there is not enough room. - OpRegRegImm(kOpSub, rs_x9, rs_sp, frame_size_without_spills); - LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8); - LIR* branch = OpCmpBranch(kCondUlt, rs_x9, rs_x8, nullptr); + OpRegRegImm(kOpSub, rs_xIP1, rs_sp, frame_size_without_spills); + LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP0); + LIR* branch = OpCmpBranch(kCondUlt, rs_xIP1, rs_xIP0, nullptr); AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size)); - OpRegCopy(rs_sp, rs_x9); // Establish stack after checks. + OpRegCopy(rs_sp, rs_xIP1); // Establish stack after checks. } else { /* * If the frame is small enough we are guaranteed to have enough space that remains to @@ -411,7 +411,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) * Establishes stack before checks. */ OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size_without_spills); - LIR* branch = OpCmpBranch(kCondUlt, rs_sp, rs_x9, nullptr); + LIR* branch = OpCmpBranch(kCondUlt, rs_sp, rs_xIP1, nullptr); AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_)); } } else { @@ -431,8 +431,8 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) FreeTemp(rs_x5); FreeTemp(rs_x6); FreeTemp(rs_x7); - FreeTemp(rs_x8); - FreeTemp(rs_x9); + FreeTemp(rs_xIP0); + FreeTemp(rs_xIP1); } void Arm64Mir2Lir::GenExitSequence() { diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index dec81cb2f1..9b4546a94b 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -48,14 +48,12 @@ static constexpr RegStorage dp_regs_arr[] = rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15, rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23, rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31}; +// Note: we are not able to call to C function since rs_xSELF is a special register need to be +// preserved but would be scratched by native functions follow aapcs64. static constexpr RegStorage reserved_regs_arr[] = {rs_wSUSPEND, rs_wSELF, rs_wsp, rs_wLR, rs_wzr}; static constexpr RegStorage reserved64_regs_arr[] = {rs_xSUSPEND, rs_xSELF, rs_sp, rs_xLR, rs_xzr}; -// TUNING: Are there too many temp registers and too less promote target? -// This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler -// Note: we are not able to call to C function directly if it un-match C ABI. -// Currently, rs_rA64_SELF is not a callee save register which does not match C ABI. static constexpr RegStorage core_temps_arr[] = {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7, rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16, @@ -132,7 +130,7 @@ RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) { case kRet0: res_reg = rs_w0; break; case kRet1: res_reg = rs_w1; break; case kInvokeTgt: res_reg = rs_wLR; break; - case kHiddenArg: res_reg = rs_w12; break; + case kHiddenArg: res_reg = rs_wIP1; break; case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break; case kCount: res_reg = RegStorage::InvalidReg(); break; default: res_reg = RegStorage::InvalidReg(); diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc index 0a00d7d8ac..b95dad261e 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.cc +++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc @@ -152,7 +152,8 @@ const ManagedRegisterEntrySpills& Arm64ManagedRuntimeCallingConvention::EntrySpi Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty) : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) { - callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X19)); + // TODO: Ugly hard code... + // Should generate these according to the spill mask automatically. callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X20)); callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X21)); callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X22)); @@ -164,30 +165,28 @@ Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_syn callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X28)); callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X29)); callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X30)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D8)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D9)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D10)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D11)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D12)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D13)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D14)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D15)); } uint32_t Arm64JniCallingConvention::CoreSpillMask() const { // Compute spill mask to agree with callee saves initialized in the constructor - uint32_t result = 0; - result = 1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 | - 1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR; - return result; + // Note: The native jni function may call to some VM runtime functions which may suspend + // or trigger GC. And the jni method frame will become top quick frame in those cases. + // So we need to satisfy GC to save LR and callee-save registers which is similar to + // CalleeSaveMethod(RefOnly) frame. + // Jni function is the native function which the java code wants to call. + // Jni method is the method that compiled by jni compiler. + // Call chain: managed code(java) --> jni method --> jni function. + // Thread register(X18, scratched by aapcs64) is not saved on stack, it is saved in ETR(X21). + // Suspend register(x19) is preserved by aapcs64 and it is not used in Jni method. + return 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 | 1 << X25 | + 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR; } uint32_t Arm64JniCallingConvention::FpSpillMask() const { // Compute spill mask to agree with callee saves initialized in the constructor - uint32_t result = 0; - result = 1 << D8 | 1 << D9 | 1 << D10 | 1 << D11 | 1 << D12 | 1 << D13 | - 1 << D14 | 1 << D15; - return result; + // Note: All callee-save fp registers will be preserved by aapcs64. And they are not used + // in the jni method. + return 0; } ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const { diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 5b97ba0a02..3f90f21b66 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -626,7 +626,7 @@ void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) { // Move ETR(Callee saved) back to TR(Caller saved) reg. We use ETR on calls // to external functions that might trash TR. We do not need the original - // X19 saved in BuildFrame(). + // ETR(X21) saved in BuildFrame(). ___ Mov(reg_x(TR), reg_x(ETR)); ___ Blr(temp); @@ -644,20 +644,43 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, // TODO: *create APCS FP - end of FP chain; // *add support for saving a different set of callee regs. - // For now we check that the size of callee regs vector is 20 - // equivalent to the APCS callee saved regs [X19, x30] [D8, D15]. - CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize); - ___ PushCalleeSavedRegisters(); - - // Move TR(Caller saved) to ETR(Callee saved). The original X19 has been - // saved by PushCalleeSavedRegisters(). This way we make sure that TR is not - // trashed by native code. - ___ Mov(reg_x(ETR), reg_x(TR)); - + // For now we check that the size of callee regs vector is 11. + CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize); // Increase frame to required size - must be at least space to push StackReference<Method>. - CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize); - size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize); - IncreaseFrameSize(adjust); + CHECK_GT(frame_size, kJniRefSpillRegsSize * kFramePointerSize); + IncreaseFrameSize(frame_size); + + // TODO: Ugly hard code... + // Should generate these according to the spill mask automatically. + // TUNING: Use stp. + // Note: Must match Arm64JniCallingConvention::CoreSpillMask(). + size_t reg_offset = frame_size; + reg_offset -= 8; + StoreToOffset(LR, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X29, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X28, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X27, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X26, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X25, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X24, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X23, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X22, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X21, SP, reg_offset); + reg_offset -= 8; + StoreToOffset(X20, SP, reg_offset); + + // Move TR(Caller saved) to ETR(Callee saved). The original (ETR)X21 has been saved on stack. + // This way we make sure that TR is not trashed by native code. + ___ Mov(reg_x(ETR), reg_x(TR)); // Write StackReference<Method>. DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>)); @@ -690,22 +713,46 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); - // For now we only check that the size of the frame is greater than the - // no of APCS callee saved regs [X19, X30] [D8, D15]. - CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize); - CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize); - - // Decrease frame size to start of callee saved regs. - size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize); - DecreaseFrameSize(adjust); + // For now we only check that the size of the frame is greater than the spill size. + CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize); + CHECK_GT(frame_size, kJniRefSpillRegsSize * kFramePointerSize); - // We move ETR (Callee Saved) back to TR (Caller Saved) which might have - // been trashed in the native call. The original X19 (ETR) is restored as - // part of PopCalleeSavedRegisters(). + // We move ETR(aapcs64 callee saved) back to TR(aapcs64 caller saved) which might have + // been trashed in the native call. The original ETR(X21) is restored from stack. ___ Mov(reg_x(TR), reg_x(ETR)); + // TODO: Ugly hard code... + // Should generate these according to the spill mask automatically. + // TUNING: Use ldp. + // Note: Must match Arm64JniCallingConvention::CoreSpillMask(). + size_t reg_offset = frame_size; + reg_offset -= 8; + LoadFromOffset(LR, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X29, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X28, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X27, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X26, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X25, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X24, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X23, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X22, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X21, SP, reg_offset); + reg_offset -= 8; + LoadFromOffset(X20, SP, reg_offset); + + // Decrease frame size to start of callee saved regs. + DecreaseFrameSize(frame_size); + // Pop callee saved and return to LR. - ___ PopCalleeSavedRegisters(); ___ Ret(); } diff --git a/compiler/utils/arm64/constants_arm64.h b/compiler/utils/arm64/constants_arm64.h index 2a08c95654..0cbbb1eeff 100644 --- a/compiler/utils/arm64/constants_arm64.h +++ b/compiler/utils/arm64/constants_arm64.h @@ -29,12 +29,12 @@ namespace art { namespace arm64 { -constexpr unsigned int kCalleeSavedRegsSize = 20; +constexpr unsigned int kJniRefSpillRegsSize = 11; // Vixl buffer size. constexpr size_t kBufferSizeArm64 = 4096*2; -} // arm64 -} // art +} // namespace arm64 +} // namespace art #endif // ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_ diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h index 83cacac5be..7595e94e26 100644 --- a/runtime/arch/arm/quick_method_frame_info_arm.h +++ b/runtime/arch/arm/quick_method_frame_info_arm.h @@ -63,6 +63,22 @@ constexpr QuickMethodFrameInfo ArmCalleeSaveMethodFrameInfo(Runtime::CalleeSaveT ArmCalleeSaveFpSpills(type)); } +constexpr size_t ArmCalleeSaveFpr1Offset(Runtime::CalleeSaveType type) { + return ArmCalleeSaveFrameSize(type) - + (POPCOUNT(ArmCalleeSaveCoreSpills(type)) + + POPCOUNT(ArmCalleeSaveFpSpills(type))) * kArmPointerSize; +} + +constexpr size_t ArmCalleeSaveGpr1Offset(Runtime::CalleeSaveType type) { + return ArmCalleeSaveFrameSize(type) - + POPCOUNT(ArmCalleeSaveCoreSpills(type)) * kArmPointerSize; +} + +constexpr size_t ArmCalleeSaveLrOffset(Runtime::CalleeSaveType type) { + return ArmCalleeSaveFrameSize(type) - + POPCOUNT(ArmCalleeSaveCoreSpills(type) & (-(1 << LR))) * kArmPointerSize; +} + } // namespace arm } // namespace art diff --git a/runtime/arch/arm64/asm_support_arm64.S b/runtime/arch/arm64/asm_support_arm64.S index 55de1ecfa0..be167faae6 100644 --- a/runtime/arch/arm64/asm_support_arm64.S +++ b/runtime/arch/arm64/asm_support_arm64.S @@ -24,15 +24,22 @@ // Register holding suspend check count down. // 32-bit is enough for the suspend register. #define wSUSPEND w19 +// xSUSPEND is 64-bit view of wSUSPEND. +// Used to save/restore the register scratched by managed code. +#define xSUSPEND x19 // Register holding Thread::Current(). #define xSELF x18 +// x18 is not preserved by aapcs64, save it on xETR(External Thread reg) for restore and later use. +#define xETR x21 // Frame Pointer #define xFP x29 // Link Register #define xLR x30 // Define the intraprocedural linkage temporary registers. #define xIP0 x16 +#define wIP0 w16 #define xIP1 x17 +#define wIP1 w17 .macro ENTRY name diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h index f353408baa..7f0f56f274 100644 --- a/runtime/arch/arm64/asm_support_arm64.h +++ b/runtime/arch/arm64/asm_support_arm64.h @@ -19,28 +19,26 @@ #include "asm_support.h" -// TODO Thread offsets need to be checked when on Aarch64. - // Note: these callee save methods loads require read barriers. -// Offset of field Runtime::callee_save_methods_[kSaveAll] +// Offset of field Runtime::callee_save_methods_[kSaveAll] verified in InitCpu #define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0 -// Offset of field Runtime::callee_save_methods_[kRefsOnly] +// Offset of field Runtime::callee_save_methods_[kRefsOnly] verified in InitCpu #define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 8 -// Offset of field Runtime::callee_save_methods_[kRefsAndArgs] +// Offset of field Runtime::callee_save_methods_[kRefsAndArgs] verified in InitCpu #define RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 16 -// Offset of field Thread::suspend_count_ verified in InitCpu +// Offset of field Thread::suspend_count_ #define THREAD_FLAGS_OFFSET 0 -// Offset of field Thread::card_table_ verified in InitCpu +// Offset of field Thread::card_table_ #define THREAD_CARD_TABLE_OFFSET 112 -// Offset of field Thread::exception_ verified in InitCpu +// Offset of field Thread::exception_ #define THREAD_EXCEPTION_OFFSET 120 -// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu +// Offset of field Thread::thin_lock_thread_id_ #define THREAD_ID_OFFSET 12 -#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 368 -#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 176 -#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 304 +#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176 +#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 96 +#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 224 // Expected size of a heap reference #define HEAP_REFERENCE_SIZE 4 diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 2201b55849..ba85d32c98 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -24,183 +24,161 @@ * Runtime::CreateCalleeSaveMethod(kSaveAll) */ .macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME - adrp x9, :got:_ZN3art7Runtime9instance_E - ldr x9, [x9, #:got_lo12:_ZN3art7Runtime9instance_E] + adrp xIP0, :got:_ZN3art7Runtime9instance_E + ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E] // Our registers aren't intermixed - just spill in order. - ldr x9,[x9] // x9 = & (art::Runtime * art::Runtime.instance_) . + ldr xIP0, [xIP0] // xIP0 = & (art::Runtime * art::Runtime.instance_) . - // x9 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs] . + // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs] . THIS_LOAD_REQUIRES_READ_BARRIER - ldr x9, [x9, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ] + ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ] - sub sp, sp, #368 - .cfi_adjust_cfa_offset 368 + sub sp, sp, #176 + .cfi_adjust_cfa_offset 176 // Ugly compile-time check, but we only have the preprocessor. -#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 368) +#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 176) #error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM64) size not as expected." #endif - // FP args - stp d0, d1, [sp, #8] - stp d2, d3, [sp, #24] - stp d4, d5, [sp, #40] - stp d6, d7, [sp, #56] - // FP callee-saves - stp d8, d9, [sp, #72] - stp d10, d11, [sp, #88] - stp d12, d13, [sp, #104] - stp d14, d15, [sp, #120] - - stp d16, d17, [sp, #136] - stp d18, d19, [sp, #152] - stp d20, d21, [sp, #168] - stp d22, d23, [sp, #184] - stp d24, d25, [sp, #200] - stp d26, d27, [sp, #216] - stp d28, d29, [sp, #232] - stp d30, d31, [sp, #248] - + stp d8, d9, [sp, #8] + stp d10, d11, [sp, #24] + stp d12, d13, [sp, #40] + stp d14, d15, [sp, #56] - // Callee saved. - stp xSELF, x19, [sp, #264] - .cfi_rel_offset x18, 264 - .cfi_rel_offset x19, 272 + // Reserved registers + stp xSELF, xSUSPEND, [sp, #72] + .cfi_rel_offset x18, 72 + .cfi_rel_offset x19, 80 - stp x20, x21, [sp, #280] - .cfi_rel_offset x20, 280 - .cfi_rel_offset x21, 288 + // callee-saves + stp x20, x21, [sp, #88] + .cfi_rel_offset x20, 88 + .cfi_rel_offset x21, 96 - stp x22, x23, [sp, #296] - .cfi_rel_offset x22, 296 - .cfi_rel_offset x23, 304 + stp x22, x23, [sp, #104] + .cfi_rel_offset x22, 104 + .cfi_rel_offset x23, 112 - stp x24, x25, [sp, #312] - .cfi_rel_offset x24, 312 - .cfi_rel_offset x25, 320 + stp x24, x25, [sp, #120] + .cfi_rel_offset x24, 120 + .cfi_rel_offset x25, 128 - stp x26, x27, [sp, #328] - .cfi_rel_offset x26, 328 - .cfi_rel_offset x27, 336 + stp x26, x27, [sp, #136] + .cfi_rel_offset x26, 136 + .cfi_rel_offset x27, 144 - stp x28, xFP, [sp, #344] // Save FP. - .cfi_rel_offset x28, 344 - .cfi_rel_offset x29, 352 + stp x28, x29, [sp, #152] + .cfi_rel_offset x28, 152 + .cfi_rel_offset x29, 160 - str xLR, [sp, #360] - .cfi_rel_offset x30, 360 + str xLR, [sp, #168] + .cfi_rel_offset x30, 168 // Loads appropriate callee-save-method - str x9, [sp] // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs] - + str xIP0, [sp] // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs] .endm /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kRefsOnly). */ -// WIP. .macro SETUP_REF_ONLY_CALLEE_SAVE_FRAME - adrp x9, :got:_ZN3art7Runtime9instance_E - ldr x9, [x9, #:got_lo12:_ZN3art7Runtime9instance_E] + adrp xIP0, :got:_ZN3art7Runtime9instance_E + ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E] // Our registers aren't intermixed - just spill in order. - ldr x9,[x9] // x9 = & (art::Runtime * art::Runtime.instance_) . + ldr xIP0, [xIP0] // xIP0 = & (art::Runtime * art::Runtime.instance_) . - // x9 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs] . + // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs] . THIS_LOAD_REQUIRES_READ_BARRIER - ldr x9, [x9, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ] + ldr xIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ] - sub sp, sp, #176 - .cfi_adjust_cfa_offset 176 + sub sp, sp, #96 + .cfi_adjust_cfa_offset 96 // Ugly compile-time check, but we only have the preprocessor. -#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 176) +#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 96) #error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM64) size not as expected." #endif - // FP callee-saves - stp d8, d9, [sp, #8] - stp d10, d11, [sp, #24] - stp d12, d13, [sp, #40] - stp d14, d15, [sp, #56] + // Callee-saves + stp x20, x21, [sp, #8] + .cfi_rel_offset x20, 8 + .cfi_rel_offset x21, 16 - // Callee saved. - stp xSELF, x19, [sp, #72] - .cfi_rel_offset x18, 72 - .cfi_rel_offset x19, 80 + stp x22, x23, [sp, #24] + .cfi_rel_offset x22, 24 + .cfi_rel_offset x23, 32 - stp x20, x21, [sp, #88] - .cfi_rel_offset x20, 88 - .cfi_rel_offset x21, 96 + stp x24, x25, [sp, #40] + .cfi_rel_offset x24, 40 + .cfi_rel_offset x25, 48 - stp x22, x23, [sp, #104] - .cfi_rel_offset x22, 104 - .cfi_rel_offset x23, 112 + stp x26, x27, [sp, #56] + .cfi_rel_offset x26, 56 + .cfi_rel_offset x27, 64 - stp x24, x25, [sp, #120] - .cfi_rel_offset x24, 120 - .cfi_rel_offset x25, 128 - - stp x26, x27, [sp, #136] - .cfi_rel_offset x26, 136 - .cfi_rel_offset x27, 144 + stp x28, x29, [sp, #72] + .cfi_rel_offset x28, 72 + .cfi_rel_offset x29, 80 - stp x28, xFP, [sp, #152] // Save FP. - .cfi_rel_offset x28, 152 - .cfi_rel_offset x29, 160 + // LR + str xLR, [sp, #88] + .cfi_rel_offset x30, 88 - str xLR, [sp, #168] - .cfi_rel_offset x30, 168 + // Save xSELF to xETR. + mov xETR, xSELF // Loads appropriate callee-save-method - str x9, [sp] // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs] + str xIP0, [sp] // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs] .endm +// TODO: Probably no need to restore registers preserved by aapcs64. .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME - // FP callee saves - ldp d8, d9, [sp, #8] - ldp d10, d11, [sp, #24] - ldp d12, d13, [sp, #40] - ldp d14, d15, [sp, #56] - - // Callee saved. - ldp xSELF, x19, [sp, #72] - .cfi_restore x18 - .cfi_restore x19 + // Restore xSELF. + mov xSELF, xETR - ldp x20, x21, [sp, #88] + // Callee-saves + ldp x20, x21, [sp, #8] .cfi_restore x20 .cfi_restore x21 - ldp x22, x23, [sp, #104] + ldp x22, x23, [sp, #24] .cfi_restore x22 .cfi_restore x23 - ldp x24, x25, [sp, #120] + ldp x24, x25, [sp, #40] .cfi_restore x24 .cfi_restore x25 - ldp x26, x27, [sp, #136] + ldp x26, x27, [sp, #56] .cfi_restore x26 .cfi_restore x27 - ldp x28, xFP, [sp, #152] // Save FP. + ldp x28, x29, [sp, #72] .cfi_restore x28 .cfi_restore x29 - ldr xLR, [sp, #168] + // LR + ldr xLR, [sp, #88] .cfi_restore x30 - add sp, sp, #176 - .cfi_adjust_cfa_offset -176 + add sp, sp, #96 + .cfi_adjust_cfa_offset -96 .endm .macro POP_REF_ONLY_CALLEE_SAVE_FRAME - add sp, sp, #176 - .cfi_adjust_cfa_offset -176 + // Restore xSELF as it might be scratched. + mov xSELF, xETR + // ETR + ldr xETR, [sp, #16] + .cfi_restore x21 + + add sp, sp, #96 + .cfi_adjust_cfa_offset -96 .endm .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN @@ -210,62 +188,61 @@ .macro SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL - sub sp, sp, #304 - .cfi_adjust_cfa_offset 304 + sub sp, sp, #224 + .cfi_adjust_cfa_offset 224 // Ugly compile-time check, but we only have the preprocessor. -#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 304) +#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 224) #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM64) size not as expected." #endif - stp d0, d1, [sp, #16] - stp d2, d3, [sp, #32] - stp d4, d5, [sp, #48] - stp d6, d7, [sp, #64] - stp d8, d9, [sp, #80] - stp d10, d11, [sp, #96] - stp d12, d13, [sp, #112] - stp d14, d15, [sp, #128] - - stp x1, x2, [sp, #144] - .cfi_rel_offset x1, 144 - .cfi_rel_offset x2, 152 - - stp x3, x4, [sp, #160] - .cfi_rel_offset x3, 160 - .cfi_rel_offset x4, 168 - - stp x5, x6, [sp, #176] - .cfi_rel_offset x5, 176 - .cfi_rel_offset x6, 184 - - stp x7, xSELF, [sp, #192] - .cfi_rel_offset x7, 192 - .cfi_rel_offset x18, 200 - - stp x19, x20, [sp, #208] - .cfi_rel_offset x19, 208 - .cfi_rel_offset x20, 216 - - stp x21, x22, [sp, #224] - .cfi_rel_offset x21, 224 - .cfi_rel_offset x22, 232 - - stp x23, x24, [sp, #240] - .cfi_rel_offset x23, 240 - .cfi_rel_offset x24, 248 - - stp x25, x26, [sp, #256] - .cfi_rel_offset x25, 256 - .cfi_rel_offset x26, 264 - - stp x27, x28, [sp, #272] - .cfi_rel_offset x27, 272 - .cfi_rel_offset x28, 280 - - stp xFP, xLR, [sp, #288] - .cfi_rel_offset x29, 288 - .cfi_rel_offset x30, 296 + // FP args + stp d0, d1, [sp, #16] + stp d2, d3, [sp, #32] + stp d4, d5, [sp, #48] + stp d6, d7, [sp, #64] + + // args and x20(callee-save) + stp x1, x2, [sp, #80] + .cfi_rel_offset x1, 80 + .cfi_rel_offset x2, 88 + + stp x3, x4, [sp, #96] + .cfi_rel_offset x3, 96 + .cfi_rel_offset x4, 104 + + stp x5, x6, [sp, #112] + .cfi_rel_offset x5, 112 + .cfi_rel_offset x6, 120 + + stp x7, x20, [sp, #128] + .cfi_rel_offset x7, 128 + .cfi_rel_offset x20, 136 + + // Callee-saves. + stp x21, x22, [sp, #144] + .cfi_rel_offset x21, 144 + .cfi_rel_offset x22, 152 + + stp x23, x24, [sp, #160] + .cfi_rel_offset x23, 160 + .cfi_rel_offset x24, 168 + + stp x25, x26, [sp, #176] + .cfi_rel_offset x25, 176 + .cfi_rel_offset x26, 184 + + stp x27, x28, [sp, #192] + .cfi_rel_offset x27, 192 + .cfi_rel_offset x28, 200 + + // x29(callee-save) and LR + stp x29, xLR, [sp, #208] + .cfi_rel_offset x29, 208 + .cfi_rel_offset x30, 216 + + // Save xSELF to xETR. + mov xETR, xSELF .endm /* @@ -275,75 +252,73 @@ * TODO This is probably too conservative - saving FP & LR. */ .macro SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME - adrp x9, :got:_ZN3art7Runtime9instance_E - ldr x9, [x9, #:got_lo12:_ZN3art7Runtime9instance_E] + adrp xIP0, :got:_ZN3art7Runtime9instance_E + ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E] // Our registers aren't intermixed - just spill in order. - ldr x9,[x9] // x9 = & (art::Runtime * art::Runtime.instance_) . + ldr xIP0, [xIP0] // xIP0 = & (art::Runtime * art::Runtime.instance_) . - // x9 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs] . + // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs] . THIS_LOAD_REQUIRES_READ_BARRIER - ldr x9, [x9, RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ] + ldr xIP0, [xIP0, RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ] SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL - str x9, [sp] // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs] + str xIP0, [sp] // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs] .endm +// TODO: Probably no need to restore registers preserved by aapcs64. .macro RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME + // Restore xSELF. + mov xSELF, xETR - ldp d0, d1, [sp, #16] - ldp d2, d3, [sp, #32] - ldp d4, d5, [sp, #48] - ldp d6, d7, [sp, #64] - ldp d8, d9, [sp, #80] - ldp d10, d11, [sp, #96] - ldp d12, d13, [sp, #112] - ldp d14, d15, [sp, #128] - - // args. - ldp x1, x2, [sp, #144] + // FP args + ldp d0, d1, [sp, #16] + ldp d2, d3, [sp, #32] + ldp d4, d5, [sp, #48] + ldp d6, d7, [sp, #64] + + // args and x20(callee-save) + ldp x1, x2, [sp, #80] .cfi_restore x1 .cfi_restore x2 - ldp x3, x4, [sp, #160] + ldp x3, x4, [sp, #96] .cfi_restore x3 .cfi_restore x4 - ldp x5, x6, [sp, #176] + ldp x5, x6, [sp, #112] .cfi_restore x5 .cfi_restore x6 - ldp x7, xSELF, [sp, #192] + ldp x7, x20, [sp, #128] .cfi_restore x7 - .cfi_restore x18 - - ldp x19, x20, [sp, #208] - .cfi_restore x19 .cfi_restore x20 - ldp x21, x22, [sp, #224] + // Callee-saves. + ldp x21, x22, [sp, #144] .cfi_restore x21 .cfi_restore x22 - ldp x23, x24, [sp, #240] + ldp x23, x24, [sp, #160] .cfi_restore x23 .cfi_restore x24 - ldp x25, x26, [sp, #256] + ldp x25, x26, [sp, #176] .cfi_restore x25 .cfi_restore x26 - ldp x27, x28, [sp, #272] + ldp x27, x28, [sp, #192] .cfi_restore x27 .cfi_restore x28 - ldp xFP, xLR, [sp, #288] + // x29(callee-save) and LR + ldp x29, xLR, [sp, #208] .cfi_restore x29 .cfi_restore x30 - add sp, sp, #304 - .cfi_adjust_cfa_offset -304 + add sp, sp, #224 + .cfi_adjust_cfa_offset -224 .endm .macro RETURN_IF_RESULT_IS_ZERO @@ -381,7 +356,7 @@ .endm .macro RETURN_OR_DELIVER_PENDING_EXCEPTION - RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x9 + RETURN_OR_DELIVER_PENDING_EXCEPTION_REG xIP0 .endm // Same as above with x1. This is helpful in stubs that want to avoid clobbering another register. @@ -400,7 +375,7 @@ .extern \cxx_name ENTRY \c_name SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context - mov x0, xSELF // pass Thread::Current + mov x0, xSELF // pass Thread::Current mov x1, sp // pass SP b \cxx_name // \cxx_name(Thread*, SP) END \c_name @@ -410,7 +385,7 @@ END \c_name .extern \cxx_name ENTRY \c_name SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context. - mov x1, xSELF // pass Thread::Current. + mov x1, xSELF // pass Thread::Current. mov x2, sp // pass SP. b \cxx_name // \cxx_name(arg, Thread*, SP). brk 0 @@ -421,7 +396,7 @@ END \c_name .extern \cxx_name ENTRY \c_name SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context - mov x2, xSELF // pass Thread::Current + mov x2, xSELF // pass Thread::Current mov x3, sp // pass SP b \cxx_name // \cxx_name(arg1, arg2, Thread*, SP) brk 0 @@ -478,7 +453,7 @@ ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFr * * Adapted from ARM32 code. * - * Clobbers x12. + * Clobbers xIP0. */ .macro INVOKE_TRAMPOLINE c_name, cxx_name .extern \cxx_name @@ -491,10 +466,10 @@ ENTRY \c_name mov x3, xSELF // pass Thread::Current mov x4, sp bl \cxx_name // (method_idx, this, caller, Thread*, SP) - mov x12, x1 // save Method*->code_ + mov xIP0, x1 // save Method*->code_ RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME cbz x0, 1f // did we find the target? if not go to exception delivery - br x12 // tail call to target + br xIP0 // tail call to target 1: DELIVER_PENDING_EXCEPTION END \c_name @@ -511,7 +486,7 @@ INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvo .macro INVOKE_STUB_CREATE_FRAME -SAVE_SIZE=6*8 // x4, x5, x19(wSUSPEND), SP, LR & FP saved. +SAVE_SIZE=6*8 // x4, x5, xSUSPEND, SP, LR & FP saved. SAVE_SIZE_AND_METHOD=SAVE_SIZE+STACK_REFERENCE_SIZE @@ -527,7 +502,7 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+STACK_REFERENCE_SIZE .cfi_def_cfa_register x10 // before this. .cfi_adjust_cfa_offset SAVE_SIZE - stp x9, x19, [x10, #32] // Save old stack pointer and x19(wSUSPEND) + stp x9, xSUSPEND, [x10, #32] // Save old stack pointer and xSUSPEND .cfi_rel_offset sp, 32 .cfi_rel_offset x19, 40 @@ -608,7 +583,7 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+STACK_REFERENCE_SIZE str x0, [x4] .Lexit_art_quick_invoke_stub\@: - ldp x2, x19, [xFP, #32] // Restore stack pointer and x19. + ldp x2, xSUSPEND, [xFP, #32] // Restore stack pointer and xSUSPEND. .cfi_restore x19 mov sp, x2 .cfi_restore sp @@ -636,6 +611,7 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+STACK_REFERENCE_SIZE * | FP'' | <- SP' * +----------------------+ * +----------------------+ + * | x19 | <- Used as wSUSPEND, won't be restored by managed code. * | SP' | * | X5 | * | X4 | Saved registers @@ -1241,8 +1217,6 @@ END \name .endm // Macros taking opportunity of code similarities for downcalls with referrer. - -// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY .macro ONE_ARG_REF_DOWNCALL name, entrypoint, return .extern \entrypoint ENTRY \name @@ -1256,7 +1230,6 @@ ENTRY \name END \name .endm -// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY .macro TWO_ARG_REF_DOWNCALL name, entrypoint, return .extern \entrypoint ENTRY \name @@ -1270,7 +1243,6 @@ ENTRY \name END \name .endm -// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return .extern \entrypoint ENTRY \name @@ -1363,8 +1335,8 @@ ENTRY art_quick_proxy_invoke_handler mov x2, xSELF // pass Thread::Current mov x3, sp // pass SP bl artQuickProxyInvokeHandler // (Method* proxy method, receiver, Thread*, SP) - ldr xSELF, [sp, #200] // Restore self pointer. - ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET] + // Use xETR as xSELF might be scratched by native function above. + ldr x2, [xETR, THREAD_EXCEPTION_OFFSET] cbnz x2, .Lexception_in_proxy // success if no exception is pending RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME // Restore frame fmov d0, x0 // Store result in d0 in case it was float or double @@ -1375,14 +1347,14 @@ ENTRY art_quick_proxy_invoke_handler END art_quick_proxy_invoke_handler /* - * Called to resolve an imt conflict. x12 is a hidden argument that holds the target method's + * Called to resolve an imt conflict. xIP1 is a hidden argument that holds the target method's * dex method index. */ ENTRY art_quick_imt_conflict_trampoline ldr w0, [sp, #0] // load caller Method* ldr w0, [x0, #METHOD_DEX_CACHE_METHODS_OFFSET] // load dex_cache_resolved_methods add x0, x0, #OBJECT_ARRAY_DATA_OFFSET // get starting address of data - ldr w0, [x0, x12, lsl 2] // load the target method + ldr w0, [x0, xIP1, lsl 2] // load the target method b art_quick_invoke_interface_trampoline END art_quick_imt_conflict_trampoline @@ -1392,10 +1364,10 @@ ENTRY art_quick_resolution_trampoline mov x3, sp bl artQuickResolutionTrampoline // (called, receiver, Thread*, SP) cbz x0, 1f - mov x9, x0 // Remember returned code pointer in x9. + mov xIP0, x0 // Remember returned code pointer in xIP0. ldr w0, [sp, #0] // artQuickResolutionTrampoline puts called method in *SP. RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME - br x9 + br xIP0 1: RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME DELIVER_PENDING_EXCEPTION @@ -1419,7 +1391,6 @@ END art_quick_resolution_trampoline * | X22 | callee save * | X21 | callee save * | X20 | callee save - * | X19 | callee save * | X7 | arg7 * | X6 | arg6 * | X5 | arg5 @@ -1427,14 +1398,6 @@ END art_quick_resolution_trampoline * | X3 | arg3 * | X2 | arg2 * | X1 | arg1 - * | D15 | float arg 8 - * | D14 | float arg 8 - * | D13 | float arg 8 - * | D12 | callee save - * | D11 | callee save - * | D10 | callee save - * | D9 | callee save - * | D8 | callee save * | D7 | float arg 8 * | D6 | float arg 7 * | D5 | float arg 6 @@ -1476,8 +1439,8 @@ ENTRY art_quick_generic_jni_trampoline // of the frame when the handle scope is inserted. mov xFP, sp - mov x8, #5120 - sub sp, sp, x8 + mov xIP0, #5120 + sub sp, sp, xIP0 // prepare for artQuickGenericJniTrampoline call // (Thread*, SP) @@ -1517,17 +1480,14 @@ ENTRY art_quick_generic_jni_trampoline add sp, sp, #128 - blr xIP0 // native call. - - // Restore self pointer. - ldr xSELF, [x28, #200] + blr xIP0 // native call. // result sign extension is handled in C code // prepare for artQuickGenericJniEndTrampoline call // (Thread*, result, result_f) // x0 x1 x2 <= C calling convention mov x1, x0 // Result (from saved) - mov x0, xSELF // Thread register + mov x0, xETR // Thread register, original xSELF might be scratched by native code. fmov x2, d0 // d0 will contain floating point result, but needs to go into x2 bl artQuickGenericJniEndTrampoline @@ -1536,11 +1496,9 @@ ENTRY art_quick_generic_jni_trampoline mov sp, x28 .cfi_def_cfa_register sp - // Restore self pointer. - ldr xSELF, [x28, #200] - // Pending exceptions possible. - ldr x1, [xSELF, THREAD_EXCEPTION_OFFSET] + // Use xETR as xSELF might be scratched by native code + ldr x1, [xETR, THREAD_EXCEPTION_OFFSET] cbnz x1, .Lexception_in_native // Tear down the callee-save frame. @@ -1553,7 +1511,6 @@ ENTRY art_quick_generic_jni_trampoline .Lentry_error: mov sp, x28 .cfi_def_cfa_register sp - ldr xSELF, [x28, #200] .Lexception_in_native: RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME DELIVER_PENDING_EXCEPTION @@ -1592,19 +1549,19 @@ END art_quick_to_interpreter_bridge ENTRY art_quick_instrumentation_entry SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME - mov x19, x0 // Preserve method reference in a callee-save. + mov x20, x0 // Preserve method reference in a callee-save. mov x2, xSELF mov x3, sp mov x4, xLR bl artInstrumentationMethodEntryFromCode // (Method*, Object*, Thread*, SP, LR) - mov x9, x0 // x0 = result of call. - mov x0, x19 // Reload method reference. + mov xIP0, x0 // x0 = result of call. + mov x0, x20 // Reload method reference. RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME // Note: will restore xSELF adr xLR, art_quick_instrumentation_exit - br x9 // Tail-call method with lr set to art_quick_instrumentation_exit. + br xIP0 // Tail-call method with lr set to art_quick_instrumentation_exit. END art_quick_instrumentation_entry .extern artInstrumentationMethodExitFromCode @@ -1627,18 +1584,16 @@ ENTRY art_quick_instrumentation_exit mov x0, xSELF // Pass Thread. bl artInstrumentationMethodExitFromCode // (Thread*, SP, gpr_res, fpr_res) - mov x9, x0 // Return address from instrumentation call. + mov xIP0, x0 // Return address from instrumentation call. mov xLR, x1 // r1 is holding link register if we're to bounce to deoptimize ldr d0, [sp, #8] // Restore floating-point result. ldr x0, [sp], 16 // Restore integer result, and drop stack area. .cfi_adjust_cfa_offset 16 - // Need to restore x18. - ldr xSELF, [sp, #72] POP_REF_ONLY_CALLEE_SAVE_FRAME - br x9 // Tail-call out. + br xIP0 // Tail-call out. END art_quick_instrumentation_exit /* @@ -1703,15 +1658,15 @@ ENTRY art_quick_indexof .Lindexof_loop4: ldrh w6, [x0, #2]! ldrh w7, [x0, #2]! - ldrh w8, [x0, #2]! - ldrh w9, [x0, #2]! + ldrh wIP0, [x0, #2]! + ldrh wIP1, [x0, #2]! cmp w6, w1 b.eq .Lmatch_0 cmp w7, w1 b.eq .Lmatch_1 - cmp w8, w1 + cmp wIP0, w1 b.eq .Lmatch_2 - cmp w9, w1 + cmp wIP1, w1 b.eq .Lmatch_3 subs w2, w2, #4 b.ge .Lindexof_loop4 @@ -1855,17 +1810,17 @@ ENTRY art_quick_string_compareto ret .Ldo_memcmp16: - mov x14, x0 // Save x0 and LR. __memcmp16 does not use these temps. - mov x15, xLR // TODO: Codify and check that? + mov xIP0, x0 // Save x0 and LR. __memcmp16 does not use these temps. + mov xIP1, xLR // TODO: Codify and check that? mov x0, x2 uxtw x2, w3 bl __memcmp16 - mov xLR, x15 // Restore LR. + mov xLR, xIP1 // Restore LR. cmp x0, #0 // Check the memcmp difference. - csel x0, x0, x14, ne // x0 := x0 != 0 ? x14(prev x0=length diff) : x1. + csel x0, x0, xIP0, ne // x0 := x0 != 0 ? xIP0(prev x0=length diff) : x1. ret END art_quick_string_compareto diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h index cb830acdea..15c6c07592 100644 --- a/runtime/arch/arm64/quick_method_frame_info_arm64.h +++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h @@ -20,53 +20,53 @@ #include "quick/quick_method_frame_info.h" #include "registers_arm64.h" #include "runtime.h" // for Runtime::CalleeSaveType. +#include "utils.h" // for POPCOUNT namespace art { namespace arm64 { +// Registers need to be restored but not preserved by aapcs64. +static constexpr uint32_t kArm64CalleeSaveAlwaysSpills = + // Note: ArtMethod::GetReturnPcOffsetInBytes() rely on the assumption that + // LR is always saved on the top of the frame for all targets. + // That is, lr = *(sp + framesize - pointsize). + (1 << art::arm64::LR); // Callee saved registers static constexpr uint32_t kArm64CalleeSaveRefSpills = - (1 << art::arm64::X19) | (1 << art::arm64::X20) | (1 << art::arm64::X21) | - (1 << art::arm64::X22) | (1 << art::arm64::X23) | (1 << art::arm64::X24) | - (1 << art::arm64::X25) | (1 << art::arm64::X26) | (1 << art::arm64::X27) | - (1 << art::arm64::X28); + (1 << art::arm64::X20) | (1 << art::arm64::X21) | (1 << art::arm64::X22) | + (1 << art::arm64::X23) | (1 << art::arm64::X24) | (1 << art::arm64::X25) | + (1 << art::arm64::X26) | (1 << art::arm64::X27) | (1 << art::arm64::X28) | + (1 << art::arm64::X29); // X0 is the method pointer. Not saved. static constexpr uint32_t kArm64CalleeSaveArgSpills = (1 << art::arm64::X1) | (1 << art::arm64::X2) | (1 << art::arm64::X3) | (1 << art::arm64::X4) | (1 << art::arm64::X5) | (1 << art::arm64::X6) | (1 << art::arm64::X7); -// TODO This is conservative. Only ALL should include the thread register. -// The thread register is not preserved by the aapcs64. -// LR is always saved. -static constexpr uint32_t kArm64CalleeSaveAllSpills = 0; // (1 << art::arm64::LR); +static constexpr uint32_t kArm64CalleeSaveAllSpills = + // Thread register. + (1 << art::arm64::X18) | + // Suspend register. + 1 << art::arm64::X19; -// Save callee-saved floating point registers. Rest are scratch/parameters. +static constexpr uint32_t kArm64CalleeSaveFpAlwaysSpills = 0; +static constexpr uint32_t kArm64CalleeSaveFpRefSpills = 0; static constexpr uint32_t kArm64CalleeSaveFpArgSpills = (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) | (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) | (1 << art::arm64::D6) | (1 << art::arm64::D7); -static constexpr uint32_t kArm64CalleeSaveFpRefSpills = +static constexpr uint32_t kArm64FpAllSpills = (1 << art::arm64::D8) | (1 << art::arm64::D9) | (1 << art::arm64::D10) | (1 << art::arm64::D11) | (1 << art::arm64::D12) | (1 << art::arm64::D13) | (1 << art::arm64::D14) | (1 << art::arm64::D15); -static constexpr uint32_t kArm64FpAllSpills = - kArm64CalleeSaveFpArgSpills | - (1 << art::arm64::D16) | (1 << art::arm64::D17) | (1 << art::arm64::D18) | - (1 << art::arm64::D19) | (1 << art::arm64::D20) | (1 << art::arm64::D21) | - (1 << art::arm64::D22) | (1 << art::arm64::D23) | (1 << art::arm64::D24) | - (1 << art::arm64::D25) | (1 << art::arm64::D26) | (1 << art::arm64::D27) | - (1 << art::arm64::D28) | (1 << art::arm64::D29) | (1 << art::arm64::D30) | - (1 << art::arm64::D31); constexpr uint32_t Arm64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { - return kArm64CalleeSaveRefSpills | + return kArm64CalleeSaveAlwaysSpills | kArm64CalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) | - (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0) | (1 << art::arm64::FP) | - (1 << art::arm64::X18) | (1 << art::arm64::LR); + (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0); } constexpr uint32_t Arm64CalleeSaveFpSpills(Runtime::CalleeSaveType type) { - return kArm64CalleeSaveFpRefSpills | + return kArm64CalleeSaveFpAlwaysSpills | kArm64CalleeSaveFpRefSpills | (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) | (type == Runtime::kSaveAll ? kArm64FpAllSpills : 0); } @@ -83,6 +83,22 @@ constexpr QuickMethodFrameInfo Arm64CalleeSaveMethodFrameInfo(Runtime::CalleeSav Arm64CalleeSaveFpSpills(type)); } +constexpr size_t Arm64CalleeSaveFpr1Offset(Runtime::CalleeSaveType type) { + return Arm64CalleeSaveFrameSize(type) - + (POPCOUNT(Arm64CalleeSaveCoreSpills(type)) + + POPCOUNT(Arm64CalleeSaveFpSpills(type))) * kArm64PointerSize; +} + +constexpr size_t Arm64CalleeSaveGpr1Offset(Runtime::CalleeSaveType type) { + return Arm64CalleeSaveFrameSize(type) - + POPCOUNT(Arm64CalleeSaveCoreSpills(type)) * kArm64PointerSize; +} + +constexpr size_t Arm64CalleeSaveLrOffset(Runtime::CalleeSaveType type) { + return Arm64CalleeSaveFrameSize(type) - + POPCOUNT(Arm64CalleeSaveCoreSpills(type) & (-(1 << LR))) * kArm64PointerSize; +} + } // namespace arm64 } // namespace art diff --git a/runtime/arch/arm64/registers_arm64.h b/runtime/arch/arm64/registers_arm64.h index ea346e0ffa..9ccab70bb9 100644 --- a/runtime/arch/arm64/registers_arm64.h +++ b/runtime/arch/arm64/registers_arm64.h @@ -57,7 +57,7 @@ enum Register { X30 = 30, X31 = 31, TR = 18, // ART Thread Register - Managed Runtime (Caller Saved Reg) - ETR = 19, // ART Thread Register - External Calls (Callee Saved Reg) + ETR = 21, // ART Thread Register - External Calls (Callee Saved Reg) IP0 = 16, // Used as scratch by VIXL. IP1 = 17, // Used as scratch by ART JNI Assembler. FP = 29, diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 8bc3707894..fa198d7ef5 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -58,9 +58,12 @@ class QuickArgumentVisitor { static constexpr bool kQuickSoftFloatAbi = true; // This is a soft float ABI. static constexpr size_t kNumQuickGprArgs = 3; // 3 arguments passed in GPRs. static constexpr size_t kNumQuickFprArgs = 0; // 0 arguments passed in FPRs. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0; // Offset of first FPR arg. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 8; // Offset of first GPR arg. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 44; // Offset of return address. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = + arm::ArmCalleeSaveFpr1Offset(Runtime::kRefsAndArgs); // Offset of first FPR arg. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = + arm::ArmCalleeSaveGpr1Offset(Runtime::kRefsAndArgs); // Offset of first GPR arg. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = + arm::ArmCalleeSaveLrOffset(Runtime::kRefsAndArgs); // Offset of return address. static size_t GprIndexToGprOffset(uint32_t gpr_index) { return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA); } @@ -74,13 +77,13 @@ class QuickArgumentVisitor { // | arg1 spill | | // | Method* | --- // | LR | - // | X28 | + // | X29 | // | : | - // | X19 | + // | X20 | // | X7 | // | : | // | X1 | - // | D15 | + // | D7 | // | : | // | D0 | // | | padding @@ -88,9 +91,12 @@ class QuickArgumentVisitor { static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. static constexpr size_t kNumQuickGprArgs = 7; // 7 arguments passed in GPRs. static constexpr size_t kNumQuickFprArgs = 8; // 8 arguments passed in FPRs. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16; // Offset of first FPR arg. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 144; // Offset of first GPR arg. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 296; // Offset of return address. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = + arm64::Arm64CalleeSaveFpr1Offset(Runtime::kRefsAndArgs); // Offset of first FPR arg. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = + arm64::Arm64CalleeSaveGpr1Offset(Runtime::kRefsAndArgs); // Offset of first GPR arg. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = + arm64::Arm64CalleeSaveLrOffset(Runtime::kRefsAndArgs); // Offset of return address. static size_t GprIndexToGprOffset(uint32_t gpr_index) { return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA); } |