summaryrefslogtreecommitdiffstats
path: root/runtime/arch
diff options
context:
space:
mode:
authorSerguei Katkov <serguei.i.katkov@intel.com>2014-07-08 17:21:53 +0700
committerChao-ying Fu <chao-ying.fu@intel.com>2014-07-10 13:31:47 -0700
commitc380191f3048db2a3796d65db8e5d5a5e7b08c65 (patch)
tree56f7f5fc60f8445ead63cd43faf06b9e1dfda6b2 /runtime/arch
parentcba6b1fc88fd54c35211fd49a7a7501cfcdaa170 (diff)
downloadart-c380191f3048db2a3796d65db8e5d5a5e7b08c65.tar.gz
art-c380191f3048db2a3796d65db8e5d5a5e7b08c65.tar.bz2
art-c380191f3048db2a3796d65db8e5d5a5e7b08c65.zip
x86_64: Enable fp-reg promotion
Patch introduces 4 register XMM12-15 available for promotion of fp virtual registers. Change-Id: I3f89ad07fc8ae98b70f550eada09be7b693ffb67 Signed-off-by: Serguei Katkov <serguei.i.katkov@intel.com> Signed-off-by: Chao-ying Fu <chao-ying.fu@intel.com>
Diffstat (limited to 'runtime/arch')
-rw-r--r--runtime/arch/x86_64/asm_support_x86_64.h6
-rw-r--r--runtime/arch/x86_64/context_x86_64.cc49
-rw-r--r--runtime/arch/x86_64/entrypoints_init_x86_64.cc4
-rw-r--r--runtime/arch/x86_64/jni_entrypoints_x86_64.S16
-rw-r--r--runtime/arch/x86_64/quick_entrypoints_x86_64.S172
-rw-r--r--runtime/arch/x86_64/quick_method_frame_info_x86_64.h6
-rw-r--r--runtime/arch/x86_64/registers_x86_64.cc9
7 files changed, 204 insertions, 58 deletions
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index bff8501cf2..05d0ef8761 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -35,9 +35,9 @@
// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
#define THREAD_ID_OFFSET 12
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 + 4*8
+#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 + 4*8
+#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 + 4*8
// Expected size of a heap reference
#define HEAP_REFERENCE_SIZE 4
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index e1f47ee3d4..7699eaf9d4 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -78,6 +78,18 @@ void X86_64Context::SmashCallerSaves() {
gprs_[R9] = nullptr;
gprs_[R10] = nullptr;
gprs_[R11] = nullptr;
+ fprs_[XMM0] = nullptr;
+ fprs_[XMM1] = nullptr;
+ fprs_[XMM2] = nullptr;
+ fprs_[XMM3] = nullptr;
+ fprs_[XMM4] = nullptr;
+ fprs_[XMM5] = nullptr;
+ fprs_[XMM6] = nullptr;
+ fprs_[XMM7] = nullptr;
+ fprs_[XMM8] = nullptr;
+ fprs_[XMM9] = nullptr;
+ fprs_[XMM10] = nullptr;
+ fprs_[XMM11] = nullptr;
}
bool X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
@@ -102,41 +114,26 @@ bool X86_64Context::SetFPR(uint32_t reg, uintptr_t value) {
}
}
+extern "C" void art_quick_do_long_jump(uintptr_t*, uintptr_t*);
+
void X86_64Context::DoLongJump() {
#if defined(__x86_64__)
- // Array of GPR values, filled from the context backward for the long jump pop. We add a slot at
- // the top for the stack pointer that doesn't get popped in a pop-all.
- volatile uintptr_t gprs[kNumberOfCpuRegisters + 1];
+ uintptr_t gprs[kNumberOfCpuRegisters + 1];
+ uintptr_t fprs[kNumberOfFloatRegisters];
+
for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86_64Context::kBadGprBase + i;
}
+ for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) {
+ fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : X86_64Context::kBadFprBase + i;
+ }
+
// We want to load the stack pointer one slot below so that the ret will pop eip.
uintptr_t rsp = gprs[kNumberOfCpuRegisters - RSP - 1] - kWordSize;
gprs[kNumberOfCpuRegisters] = rsp;
*(reinterpret_cast<uintptr_t*>(rsp)) = rip_;
- __asm__ __volatile__(
- "movq %0, %%rsp\n\t" // RSP points to gprs.
- "popq %%r15\n\t" // Load all registers except RSP and RIP with values in gprs.
- "popq %%r14\n\t"
- "popq %%r13\n\t"
- "popq %%r12\n\t"
- "popq %%r11\n\t"
- "popq %%r10\n\t"
- "popq %%r9\n\t"
- "popq %%r8\n\t"
- "popq %%rdi\n\t"
- "popq %%rsi\n\t"
- "popq %%rbp\n\t"
- "addq $8, %%rsp\n\t"
- "popq %%rbx\n\t"
- "popq %%rdx\n\t"
- "popq %%rcx\n\t"
- "popq %%rax\n\t"
- "popq %%rsp\n\t" // Load stack pointer.
- "ret\n\t" // From higher in the stack pop rip.
- : // output.
- : "g"(&gprs[0]) // input.
- :); // clobber.
+
+ art_quick_do_long_jump(gprs, fprs);
#else
UNIMPLEMENTED(FATAL);
#endif
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 609d1c6500..204d52c723 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -35,7 +35,7 @@ extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*);
extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
// Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
+extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
const mirror::Class* ref_class);
extern "C" void art_quick_check_cast(void*, void*);
@@ -129,7 +129,7 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
ResetQuickAllocEntryPoints(qpoints);
// Cast
- qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
+ qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code;
qpoints->pCheckCast = art_quick_check_cast;
// DexCache
diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
index d668797ba4..f6736df11f 100644
--- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
@@ -28,8 +28,8 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub
PUSH rdx // Arg.
PUSH rcx // Arg.
// Create space for FPR args, plus padding for alignment
- subq LITERAL(72), %rsp
- CFI_ADJUST_CFA_OFFSET(72)
+ subq LITERAL(72 + 4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(72 + 4 * 8)
// Save FPRs.
movq %xmm0, 0(%rsp)
movq %xmm1, 8(%rsp)
@@ -39,6 +39,10 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub
movq %xmm5, 40(%rsp)
movq %xmm6, 48(%rsp)
movq %xmm7, 56(%rsp)
+ movq %xmm12, 64(%rsp)
+ movq %xmm13, 72(%rsp)
+ movq %xmm14, 80(%rsp)
+ movq %xmm15, 88(%rsp)
// prepare call
movq %gs:THREAD_SELF_OFFSET, %rdi // RDI := Thread::Current()
// call
@@ -52,8 +56,12 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub
movq 40(%rsp), %xmm5
movq 48(%rsp), %xmm6
movq 56(%rsp), %xmm7
- addq LITERAL(72), %rsp
- CFI_ADJUST_CFA_OFFSET(-72)
+ movq 64(%rsp), %xmm12
+ movq 72(%rsp), %xmm13
+ movq 80(%rsp), %xmm14
+ movq 88(%rsp), %xmm15
+ addq LITERAL(72 + 4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-72 - 4 * 8)
POP rcx // Arg.
POP rdx // Arg.
POP rsi // Arg.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 8fa947c9b3..7f7226c0ad 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -16,6 +16,26 @@
#include "asm_support_x86_64.S"
+MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
+ // Create space for ART FP callee-saved registers
+ subq LITERAL(4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(4 * 8)
+ movq %xmm12, 0(%rsp)
+ movq %xmm13, 8(%rsp)
+ movq %xmm14, 16(%rsp)
+ movq %xmm15, 24(%rsp)
+END_MACRO
+
+MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME)
+ // Restore ART FP callee-saved registers
+ movq 0(%rsp), %xmm12
+ movq 8(%rsp), %xmm13
+ movq 16(%rsp), %xmm14
+ movq 24(%rsp), %xmm15
+ addq LITERAL(4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(- 4 * 8)
+END_MACRO
+
// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
/*
@@ -37,6 +57,14 @@ MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
PUSH r12 // Callee save.
PUSH rbp // Callee save.
PUSH rbx // Callee save.
+ // Create space for FPR args, plus padding for alignment
+ subq LITERAL(4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(4 * 8)
+ // Save FPRs.
+ movq %xmm12, 0(%rsp)
+ movq %xmm13, 8(%rsp)
+ movq %xmm14, 16(%rsp)
+ movq %xmm15, 24(%rsp)
subq MACRO_LITERAL(8), %rsp // Space for Method* (also aligns the frame).
CFI_ADJUST_CFA_OFFSET(8)
// R10 := ArtMethod* for save all callee save frame method.
@@ -46,7 +74,7 @@ MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
// Ugly compile-time check, but we only have the preprocessor.
// Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 8 + 8)
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8)
#error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
#endif
#endif // __APPLE__
@@ -71,8 +99,14 @@ MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME)
PUSH r12 // Callee save.
PUSH rbp // Callee save.
PUSH rbx // Callee save.
- subq MACRO_LITERAL(8), %rsp // Space for Method* (also aligns the frame).
- CFI_ADJUST_CFA_OFFSET(8)
+ // Create space for FPR args, plus padding for alignment
+ subq LITERAL(8 + 4*8), %rsp
+ CFI_ADJUST_CFA_OFFSET(8 + 4*8)
+ // Save FPRs.
+ movq %xmm12, 8(%rsp)
+ movq %xmm13, 16(%rsp)
+ movq %xmm14, 24(%rsp)
+ movq %xmm15, 32(%rsp)
// R10 := ArtMethod* for refs only callee save frame method.
movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
// Store ArtMethod* to bottom of stack.
@@ -80,15 +114,19 @@ MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME)
// Ugly compile-time check, but we only have the preprocessor.
// Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 8 + 8)
+#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8)
#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
#endif
#endif // __APPLE__
END_MACRO
MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
- addq MACRO_LITERAL(8), %rsp
- CFI_ADJUST_CFA_OFFSET(-8)
+ movq 8(%rsp), %xmm12
+ movq 16(%rsp), %xmm13
+ movq 24(%rsp), %xmm14
+ movq 32(%rsp), %xmm15
+ addq LITERAL(8 + 4*8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-8 - 4*8)
// TODO: optimize by not restoring callee-saves restored by the ABI
POP rbx
POP rbp
@@ -123,8 +161,8 @@ MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
PUSH rdx // Quick arg 2.
PUSH rcx // Quick arg 3.
// Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
- subq MACRO_LITERAL(80), %rsp
- CFI_ADJUST_CFA_OFFSET(80)
+ subq MACRO_LITERAL(80 + 4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
// R10 := ArtMethod* for ref and args callee save frame method.
movq RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
// Save FPRs.
@@ -136,12 +174,16 @@ MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
movq %xmm5, 56(%rsp)
movq %xmm6, 64(%rsp)
movq %xmm7, 72(%rsp)
+ movq %xmm12, 80(%rsp)
+ movq %xmm13, 88(%rsp)
+ movq %xmm14, 96(%rsp)
+ movq %xmm15, 104(%rsp)
// Store ArtMethod* to bottom of stack.
movq %r10, 0(%rsp)
// Ugly compile-time check, but we only have the preprocessor.
// Last +8: implicit return address pushed on stack when caller made call.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 80 + 8)
+#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 4*8 + 80 + 8)
#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
#endif
#endif // __APPLE__
@@ -157,8 +199,12 @@ MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
movq 56(%rsp), %xmm5
movq 64(%rsp), %xmm6
movq 72(%rsp), %xmm7
- addq MACRO_LITERAL(80), %rsp
- CFI_ADJUST_CFA_OFFSET(-80)
+ movq 80(%rsp), %xmm12
+ movq 88(%rsp), %xmm13
+ movq 96(%rsp), %xmm14
+ movq 104(%rsp), %xmm15
+ addq MACRO_LITERAL(80 + 4 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8))
// Restore callee and GPR args, mixed together to agree with core spills bitmap.
POP rcx
POP rdx
@@ -536,6 +582,58 @@ DEFINE_FUNCTION art_quick_invoke_static_stub
#endif // __APPLE__
END_FUNCTION art_quick_invoke_static_stub
+ /*
+ * Long jump stub.
+ * On entry:
+ * rdi = gprs
+ * rsi = fprs
+ */
+DEFINE_FUNCTION art_quick_do_long_jump
+#if defined(__APPLE__)
+ int3
+ int3
+#else
+ // Restore FPRs.
+ movq 0(%rsi), %xmm0
+ movq 8(%rsi), %xmm1
+ movq 16(%rsi), %xmm2
+ movq 24(%rsi), %xmm3
+ movq 32(%rsi), %xmm4
+ movq 40(%rsi), %xmm5
+ movq 48(%rsi), %xmm6
+ movq 56(%rsi), %xmm7
+ movq 64(%rsi), %xmm8
+ movq 72(%rsi), %xmm9
+ movq 80(%rsi), %xmm10
+ movq 88(%rsi), %xmm11
+ movq 96(%rsi), %xmm12
+ movq 104(%rsi), %xmm13
+ movq 112(%rsi), %xmm14
+ movq 120(%rsi), %xmm15
+ // Restore FPRs.
+ movq %rdi, %rsp // RSP points to gprs.
+ // Load all registers except RSP and RIP with values in gprs.
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rdi
+ popq %rsi
+ popq %rbp
+ addq LITERAL(8), %rsp // Skip rsp
+ popq %rbx
+ popq %rdx
+ popq %rcx
+ popq %rax
+ popq %rsp // Load stack pointer.
+ ret // From higher in the stack pop rip.
+#endif // __APPLE__
+END_FUNCTION art_quick_do_long_jump
+
MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
DEFINE_FUNCTION VAR(c_name, 0)
SETUP_REF_ONLY_CALLEE_SAVE_FRAME // save ref containing registers for GC
@@ -820,13 +918,17 @@ END_FUNCTION art_quick_unlock_object
DEFINE_FUNCTION art_quick_check_cast
PUSH rdi // Save args for exc
PUSH rsi
+ SETUP_FP_CALLEE_SAVE_FRAME
call PLT_SYMBOL(artIsAssignableFromCode) // (Class* klass, Class* ref_klass)
testq %rax, %rax
jz 1f // jump forward if not assignable
+ RESTORE_FP_CALLEE_SAVE_FRAME
addq LITERAL(16), %rsp // pop arguments
CFI_ADJUST_CFA_OFFSET(-16)
+
ret
1:
+ RESTORE_FP_CALLEE_SAVE_FRAME
POP rsi // Pop arguments
POP rdi
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context
@@ -907,6 +1009,7 @@ DEFINE_FUNCTION art_quick_aput_obj
PUSH rdx
subq LITERAL(8), %rsp // Alignment padding.
CFI_ADJUST_CFA_OFFSET(8)
+ SETUP_FP_CALLEE_SAVE_FRAME
// "Uncompress" = do nothing, as already zero-extended on load.
movl CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
@@ -918,6 +1021,7 @@ DEFINE_FUNCTION art_quick_aput_obj
testq %rax, %rax
jz .Lthrow_array_store_exception
+ RESTORE_FP_CALLEE_SAVE_FRAME
// Restore arguments.
addq LITERAL(8), %rsp
CFI_ADJUST_CFA_OFFSET(-8)
@@ -934,6 +1038,7 @@ DEFINE_FUNCTION art_quick_aput_obj
// movb %dl, (%rdx, %rdi)
ret
.Lthrow_array_store_exception:
+ RESTORE_FP_CALLEE_SAVE_FRAME
// Restore arguments.
addq LITERAL(8), %rsp
CFI_ADJUST_CFA_OFFSET(-8)
@@ -1012,8 +1117,8 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler
PUSH rdx // Quick arg 2.
PUSH rcx // Quick arg 3.
// Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
- subq LITERAL(80), %rsp
- CFI_ADJUST_CFA_OFFSET(80)
+ subq LITERAL(80 + 4*8), %rsp
+ CFI_ADJUST_CFA_OFFSET(80 + 4*8)
// Save FPRs.
movq %xmm0, 16(%rsp)
movq %xmm1, 24(%rsp)
@@ -1023,14 +1128,18 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler
movq %xmm5, 56(%rsp)
movq %xmm6, 64(%rsp)
movq %xmm7, 72(%rsp)
+ movq %xmm12, 80(%rsp)
+ movq %xmm13, 88(%rsp)
+ movq %xmm14, 96(%rsp)
+ movq %xmm15, 104(%rsp)
// Store proxy method to bottom of stack.
movq %rdi, 0(%rsp)
movq %gs:THREAD_SELF_OFFSET, %rdx // Pass Thread::Current().
movq %rsp, %rcx // Pass SP.
call PLT_SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
movq %rax, %xmm0 // Copy return value in case of float returns.
- addq LITERAL(168), %rsp // Pop arguments.
- CFI_ADJUST_CFA_OFFSET(-168)
+ addq LITERAL(168 + 4*8), %rsp // Pop arguments.
+ CFI_ADJUST_CFA_OFFSET(-168 - 4*8)
RETURN_OR_DELIVER_PENDING_EXCEPTION
END_FUNCTION art_quick_proxy_invoke_handler
@@ -1156,8 +1265,8 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline
PUSH rdx // Quick arg 2.
PUSH rcx // Quick arg 3.
// Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
- subq LITERAL(80), %rsp
- CFI_ADJUST_CFA_OFFSET(80)
+ subq LITERAL(80 + 4*8), %rsp
+ CFI_ADJUST_CFA_OFFSET(80 + 4*8)
// Save FPRs.
movq %xmm0, 16(%rsp)
movq %xmm1, 24(%rsp)
@@ -1167,6 +1276,10 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline
movq %xmm5, 56(%rsp)
movq %xmm6, 64(%rsp)
movq %xmm7, 72(%rsp)
+ movq %xmm12, 80(%rsp)
+ movq %xmm13, 88(%rsp)
+ movq %xmm14, 96(%rsp)
+ movq %xmm15, 104(%rsp)
movq %rdi, 0(%rsp) // Store native ArtMethod* to bottom of stack.
movq %rsp, %rbp // save SP at (old) callee-save frame
CFI_DEF_CFA_REGISTER(rbp)
@@ -1260,9 +1373,13 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline
movq 56(%rsp), %xmm5
movq 64(%rsp), %xmm6
movq 72(%rsp), %xmm7
+ movq 80(%rsp), %xmm12
+ movq 88(%rsp), %xmm13
+ movq 96(%rsp), %xmm14
+ movq 104(%rsp), %xmm15
// was 80 bytes
- addq LITERAL(80), %rsp
- CFI_ADJUST_CFA_OFFSET(-80)
+ addq LITERAL(80 + 4*8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
// Save callee and GPR args, mixed together to agree with core spills bitmap.
POP rcx // Arg.
POP rdx // Arg.
@@ -1292,9 +1409,13 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline
movq 56(%rsp), %xmm5
movq 64(%rsp), %xmm6
movq 72(%rsp), %xmm7
- // was 80 bytes
- addq LITERAL(80), %rsp
- CFI_ADJUST_CFA_OFFSET(-80)
+ movq 80(%rsp), %xmm12
+ movq 88(%rsp), %xmm13
+ movq 96(%rsp), %xmm14
+ movq 104(%rsp), %xmm15
+ // was 80 + 32 bytes
+ addq LITERAL(80 + 4*8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
// Save callee and GPR args, mixed together to agree with core spills bitmap.
POP rcx // Arg.
POP rdx // Arg.
@@ -1450,3 +1571,10 @@ DEFINE_FUNCTION art_quick_string_compareto
END_FUNCTION art_quick_string_compareto
UNIMPLEMENTED art_quick_memcmp16
+
+DEFINE_FUNCTION art_quick_assignable_from_code
+ SETUP_FP_CALLEE_SAVE_FRAME
+ call PLT_SYMBOL(artIsAssignableFromCode) // (const mirror::Class*, const mirror::Class*)
+ RESTORE_FP_CALLEE_SAVE_FRAME
+ ret
+END_FUNCTION art_quick_assignable_from_code
diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
index 618390903b..53aa212a88 100644
--- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
+++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
@@ -34,6 +34,9 @@ static constexpr uint32_t kX86_64CalleeSaveFpArgSpills =
(1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) |
(1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
(1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7);
+static constexpr uint32_t kX86_64CalleeSaveFpSpills =
+ (1 << art::x86_64::XMM12) | (1 << art::x86_64::XMM13) |
+ (1 << art::x86_64::XMM14) | (1 << art::x86_64::XMM15);
constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
return kX86_64CalleeSaveRefSpills |
@@ -42,7 +45,8 @@ constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
}
constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
- return (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0);
+ return kX86_64CalleeSaveFpSpills |
+ (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0);
}
constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/x86_64/registers_x86_64.cc b/runtime/arch/x86_64/registers_x86_64.cc
index 38f3494502..f29c42652b 100644
--- a/runtime/arch/x86_64/registers_x86_64.cc
+++ b/runtime/arch/x86_64/registers_x86_64.cc
@@ -34,5 +34,14 @@ std::ostream& operator<<(std::ostream& os, const Register& rhs) {
return os;
}
+std::ostream& operator<<(std::ostream& os, const FloatRegister& rhs) {
+ if (rhs >= XMM0 && rhs <= XMM15) {
+ os << "xmm" << static_cast<int>(rhs);
+ } else {
+ os << "Register[" << static_cast<int>(rhs) << "]";
+ }
+ return os;
+}
+
} // namespace x86_64
} // namespace art