diff options
author | Vladimir Marko <vmarko@google.com> | 2015-01-27 10:48:44 +0000 |
---|---|---|
committer | Vladimir Marko <vmarko@google.com> | 2015-01-27 13:41:29 +0000 |
commit | 949c91fb91f40a4a80b2b492913cf8541008975e (patch) | |
tree | 45c840d1d6fd0ab71d96cb6c61931f468b3a0adf | |
parent | aeb47bb12420e65b4b5f61164e6396ea93734a0a (diff) | |
download | android_art-949c91fb91f40a4a80b2b492913cf8541008975e.tar.gz android_art-949c91fb91f40a4a80b2b492913cf8541008975e.tar.bz2 android_art-949c91fb91f40a4a80b2b492913cf8541008975e.zip |
Revert "ART: Implement X86 hard float (Quick/JNI/Baseline)"
And the 3 Mac build fixes. Fix conflicts in context_x86.* .
This reverts commits
3d2c8e74c27efee58e24ec31441124f3f21384b9 ,
34eda1dd66b92a361797c63d57fa19e83c08a1b4 ,
f601d1954348b71186fa160a0ae6a1f4f1c5aee6 ,
bc503348a1da573488503cc2819c9e30807bea31 .
Bug: 19150481
Change-Id: I6650ee30a7d261159380fe2119e14379e4dc9970
-rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 18 | ||||
-rwxr-xr-x | compiler/dex/quick/x86/target_x86.cc | 47 | ||||
-rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 18 | ||||
-rw-r--r-- | compiler/jni/quick/x86/calling_convention_x86.cc | 59 | ||||
-rw-r--r-- | compiler/jni/quick/x86/calling_convention_x86.h | 6 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 68 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 11 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 27 | ||||
-rw-r--r-- | runtime/arch/stub_test.cc | 2 | ||||
-rw-r--r-- | runtime/arch/x86/asm_support_x86.h | 4 | ||||
-rw-r--r-- | runtime/arch/x86/context_x86.cc | 52 | ||||
-rw-r--r-- | runtime/arch/x86/context_x86.h | 28 | ||||
-rw-r--r-- | runtime/arch/x86/quick_entrypoints_x86.S | 379 | ||||
-rw-r--r-- | runtime/arch/x86/quick_method_frame_info_x86.h | 21 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_trampoline_entrypoints.cc | 14 | ||||
-rw-r--r-- | runtime/mirror/art_method.cc | 4 | ||||
-rw-r--r-- | runtime/oat.h | 2 |
17 files changed, 167 insertions, 593 deletions
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 811d4f5d7b..3815722f10 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -41,15 +41,22 @@ class X86Mir2Lir : public Mir2Lir { } protected: Mir2Lir* m2l_; + private: size_t cur_core_reg_; size_t cur_fp_reg_; }; - class InToRegStorageX86Mapper : public InToRegStorageX86_64Mapper { + class InToRegStorageX86Mapper : public InToRegStorageMapper { public: - explicit InToRegStorageX86Mapper(Mir2Lir* m2l) - : InToRegStorageX86_64Mapper(m2l) { } + explicit InToRegStorageX86Mapper(Mir2Lir* m2l) : m2l_(m2l), cur_core_reg_(0) {} virtual RegStorage GetNextReg(ShortyArg arg); + virtual void Reset() OVERRIDE { + cur_core_reg_ = 0; + } + protected: + Mir2Lir* m2l_; + private: + size_t cur_core_reg_; }; InToRegStorageX86_64Mapper in_to_reg_storage_x86_64_mapper_; @@ -113,12 +120,9 @@ class X86Mir2Lir : public Mir2Lir { if (cu_->target64) { return As64BitReg(TargetReg32(symbolic_reg)); } else { - if (symbolic_reg >= kFArg0 && symbolic_reg <= kFArg3) { - // We want an XMM, not a pair. - return As64BitReg(TargetReg32(symbolic_reg)); - } // x86: construct a pair. DCHECK((kArg0 <= symbolic_reg && symbolic_reg < kArg3) || + (kFArg0 <= symbolic_reg && symbolic_reg < kFArg3) || (kRet0 == symbolic_reg)); return RegStorage::MakeRegPair(TargetReg32(symbolic_reg), TargetReg32(static_cast<SpecialTargetRegister>(symbolic_reg + 1))); diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 03370963a7..bc64aadd67 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -180,10 +180,10 @@ static const RegStorage RegStorage32FromSpecialTargetRegister_Target32[] { RegStorage::InvalidReg(), // kArg5 RegStorage::InvalidReg(), // kArg6 RegStorage::InvalidReg(), // kArg7 - rs_fr0, // kFArg0 - rs_fr1, // kFArg1 - rs_fr2, // kFArg2 - rs_fr3, // kFArg3 + rs_rAX, // kFArg0 + rs_rCX, // kFArg1 + rs_rDX, // kFArg2 + rs_rBX, // kFArg3 RegStorage::InvalidReg(), // kFArg4 RegStorage::InvalidReg(), // kFArg5 RegStorage::InvalidReg(), // kFArg6 @@ -200,7 +200,7 @@ static const RegStorage RegStorage32FromSpecialTargetRegister_Target32[] { rs_rDX, // kRet1 rs_rAX, // kInvokeTgt rs_rAX, // kHiddenArg - used to hold the method index before copying to fr0. - rs_fr7, // kHiddenFpArg + rs_fr0, // kHiddenFpArg rs_rCX, // kCount }; @@ -545,13 +545,13 @@ void X86Mir2Lir::LockCallTemps() { LockTemp(TargetReg32(kArg1)); LockTemp(TargetReg32(kArg2)); LockTemp(TargetReg32(kArg3)); - LockTemp(TargetReg32(kFArg0)); - LockTemp(TargetReg32(kFArg1)); - LockTemp(TargetReg32(kFArg2)); - LockTemp(TargetReg32(kFArg3)); if (cu_->target64) { LockTemp(TargetReg32(kArg4)); LockTemp(TargetReg32(kArg5)); + LockTemp(TargetReg32(kFArg0)); + LockTemp(TargetReg32(kFArg1)); + LockTemp(TargetReg32(kFArg2)); + LockTemp(TargetReg32(kFArg3)); LockTemp(TargetReg32(kFArg4)); LockTemp(TargetReg32(kFArg5)); LockTemp(TargetReg32(kFArg6)); @@ -566,13 +566,13 @@ void X86Mir2Lir::FreeCallTemps() { FreeTemp(TargetReg32(kArg2)); FreeTemp(TargetReg32(kArg3)); FreeTemp(TargetReg32(kHiddenArg)); - FreeTemp(TargetReg32(kFArg0)); - FreeTemp(TargetReg32(kFArg1)); - FreeTemp(TargetReg32(kFArg2)); - FreeTemp(TargetReg32(kFArg3)); if (cu_->target64) { FreeTemp(TargetReg32(kArg4)); FreeTemp(TargetReg32(kArg5)); + FreeTemp(TargetReg32(kFArg0)); + FreeTemp(TargetReg32(kFArg1)); + FreeTemp(TargetReg32(kFArg2)); + FreeTemp(TargetReg32(kFArg3)); FreeTemp(TargetReg32(kFArg4)); FreeTemp(TargetReg32(kFArg5)); FreeTemp(TargetReg32(kFArg6)); @@ -2460,23 +2460,14 @@ RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(ShortyArg arg) { RegStorage X86Mir2Lir::InToRegStorageX86Mapper::GetNextReg(ShortyArg arg) { const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3}; const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg); - const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3}; - const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg); RegStorage result = RegStorage::InvalidReg(); - if (arg.IsFP()) { - if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) { - return m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++], - arg.IsWide() ? kWide : kNotWide); - } - } else { - if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { - result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], - arg.IsRef() ? kRef : kNotWide); - if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) { - result = RegStorage::MakeRegPair( - result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide)); - } + if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], + arg.IsRef() ? kRef : kNotWide); + if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + result = RegStorage::MakeRegPair( + result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide)); } } return result; diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 7dea09a579..bc4cb5a9d8 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -57,15 +57,15 @@ namespace art { * x86-64/x32 gs: holds it. * * For floating point we don't support CPUs without SSE2 support (ie newer than PIII): - * Native: x86 | x86-64 / x32 | ART x86 | ART x86-64 - * XMM0: caller | caller, arg1 | caller, arg1, float return value | caller, arg1, float return value - * XMM1: caller | caller, arg2 | caller, arg2, scratch | caller, arg2, scratch - * XMM2: caller | caller, arg3 | caller, arg3, scratch | caller, arg3, scratch - * XMM3: caller | caller, arg4 | caller, arg4, scratch | caller, arg4, scratch - * XMM4: caller | caller, arg5 | caller, scratch | caller, arg5, scratch - * XMM5: caller | caller, arg6 | caller, scratch | caller, arg6, scratch - * XMM6: caller | caller, arg7 | caller, scratch | caller, arg7, scratch - * XMM7: caller | caller, arg8 | caller, scratch | caller, arg8, scratch + * Native: x86 | x86-64 / x32 | ART x86 | ART x86-64 + * XMM0: caller | caller, arg1 | caller, float return value | caller, arg1, float return value + * XMM1: caller | caller, arg2 | caller, scratch | caller, arg2, scratch + * XMM2: caller | caller, arg3 | caller, scratch | caller, arg3, scratch + * XMM3: caller | caller, arg4 | caller, scratch | caller, arg4, scratch + * XMM4: caller | caller, arg5 | caller, scratch | caller, arg5, scratch + * XMM5: caller | caller, arg6 | caller, scratch | caller, arg6, scratch + * XMM6: caller | caller, arg7 | caller, scratch | caller, arg7, scratch + * XMM7: caller | caller, arg8 | caller, scratch | caller, arg8, scratch * --- x86-64/x32 registers * XMM8 .. 11: caller save available as scratch registers for ART. * XMM12 .. 15: callee save available as promoted registers for ART. diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc index fc72e88c00..a5686e1ac7 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.cc +++ b/compiler/jni/quick/x86/calling_convention_x86.cc @@ -77,34 +77,12 @@ bool X86ManagedRuntimeCallingConvention::IsCurrentParamInRegister() { } bool X86ManagedRuntimeCallingConvention::IsCurrentParamOnStack() { - // We assume all parameters are on stack, args coming via registers are spilled as entry_spills. - return true; + return true; // Everything is passed by stack } ManagedRegister X86ManagedRuntimeCallingConvention::CurrentParamRegister() { - ManagedRegister res = ManagedRegister::NoRegister(); - if (!IsCurrentParamAFloatOrDouble()) { - switch (gpr_arg_count_) { - case 0: res = X86ManagedRegister::FromCpuRegister(ECX); break; - case 1: res = X86ManagedRegister::FromCpuRegister(EDX); break; - case 2: res = X86ManagedRegister::FromCpuRegister(EBX); break; - } - } else if (itr_float_and_doubles_ < 4) { - // First four float parameters are passed via XMM0..XMM3 - res = X86ManagedRegister::FromXmmRegister( - static_cast<XmmRegister>(XMM0 + itr_float_and_doubles_)); - } - return res; -} - -ManagedRegister X86ManagedRuntimeCallingConvention::CurrentParamHighLongRegister() { - ManagedRegister res = ManagedRegister::NoRegister(); - DCHECK(IsCurrentParamALong()); - switch (gpr_arg_count_) { - case 0: res = X86ManagedRegister::FromCpuRegister(EDX); break; - case 1: res = X86ManagedRegister::FromCpuRegister(EBX); break; - } - return res; + LOG(FATAL) << "Should not reach here"; + return ManagedRegister::NoRegister(); } FrameOffset X86ManagedRuntimeCallingConvention::CurrentParamStackOffset() { @@ -117,32 +95,15 @@ const ManagedRegisterEntrySpills& X86ManagedRuntimeCallingConvention::EntrySpill // We spill the argument registers on X86 to free them up for scratch use, we then assume // all arguments are on the stack. if (entry_spills_.size() == 0) { - ResetIterator(FrameOffset(0)); - while (HasNext()) { - ManagedRegister in_reg = CurrentParamRegister(); - if (!in_reg.IsNoRegister()) { - int32_t size = IsParamADouble(itr_args_) ? 8 : 4; - int32_t spill_offset = CurrentParamStackOffset().Uint32Value(); - ManagedRegisterSpill spill(in_reg, size, spill_offset); - entry_spills_.push_back(spill); - if (IsCurrentParamALong() && !IsCurrentParamAReference()) { // Long. - // special case, as we may need a second register here. - in_reg = CurrentParamHighLongRegister(); - if (!in_reg.IsNoRegister()) { - // We have to spill the second half of the long. - ManagedRegisterSpill spill2(in_reg, size, spill_offset + 4); - entry_spills_.push_back(spill2); - // Long was allocated in 2 registers. - gpr_arg_count_++; - } - } - - // Keep track of the number of GPRs allocated. - if (!IsCurrentParamAFloatOrDouble()) { - gpr_arg_count_++; + size_t num_spills = NumArgs() + NumLongOrDoubleArgs(); + if (num_spills > 0) { + entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(ECX)); + if (num_spills > 1) { + entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(EDX)); + if (num_spills > 2) { + entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(EBX)); } } - Next(); } } return entry_spills_; diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h index b1b3598a8e..025eb6d40e 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.h +++ b/compiler/jni/quick/x86/calling_convention_x86.h @@ -28,8 +28,7 @@ class X86ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingCon public: explicit X86ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) - : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize), - gpr_arg_count_(0) {} + : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {} ~X86ManagedRuntimeCallingConvention() OVERRIDE {} // Calling convention ManagedRegister ReturnRegister() OVERRIDE; @@ -41,10 +40,7 @@ class X86ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingCon ManagedRegister CurrentParamRegister() OVERRIDE; FrameOffset CurrentParamStackOffset() OVERRIDE; const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE; - private: - int gpr_arg_count_; - ManagedRegister CurrentParamHighLongRegister(); ManagedRegisterEntrySpills entry_spills_; DISALLOW_COPY_AND_ASSIGN(X86ManagedRuntimeCallingConvention); }; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 66f1d5e58d..c0fdcaa8aa 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -36,9 +36,8 @@ static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX }; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); -static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 }; -static constexpr size_t kRuntimeParameterFpuRegistersLength = - arraysize(kRuntimeParameterFpuRegisters); +static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { }; +static constexpr size_t kRuntimeParameterFpuRegistersLength = 0; static constexpr int kC2ConditionMask = 0x400; @@ -505,49 +504,30 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimFloat: case Primitive::kPrimNot: { uint32_t index = gp_index_++; - stack_index_++; if (index < calling_convention.GetNumberOfRegisters()) { return Location::RegisterLocation(calling_convention.GetRegisterAt(index)); } else { - return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); + return Location::StackSlot(calling_convention.GetStackOffsetOf(index)); } } - case Primitive::kPrimLong: { + case Primitive::kPrimLong: + case Primitive::kPrimDouble: { uint32_t index = gp_index_; gp_index_ += 2; - stack_index_ += 2; if (index + 1 < calling_convention.GetNumberOfRegisters()) { X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair( calling_convention.GetRegisterPairAt(index)); return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); } else if (index + 1 == calling_convention.GetNumberOfRegisters()) { - // stack_index_ is the right offset for the memory. - return Location::QuickParameter(index, stack_index_ - 2); - } else { - return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); - } - } - - case Primitive::kPrimFloat: { - uint32_t index = fp_index_++; - stack_index_++; - if (index < calling_convention.GetNumberOfFpuRegisters()) { - return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); - } else { - return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); - } - } - - case Primitive::kPrimDouble: { - uint32_t index = fp_index_++; - stack_index_ += 2; - if (index < calling_convention.GetNumberOfFpuRegisters()) { - return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); + // On X86, the register index and stack index of a quick parameter is the same, since + // we are passing floating pointer values in core registers. + return Location::QuickParameter(index, index); } else { - return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); + return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index)); } } @@ -1206,7 +1186,7 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { HandleInvoke(invoke); // Add the hidden argument. - invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7)); + invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM0)); } void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -1408,17 +1388,31 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - // Processing a Dex `float-to-long' or 'double-to-long' instruction. + case Primitive::kPrimFloat: { + // Processing a Dex `float-to-long' instruction. InvokeRuntimeCallingConvention calling_convention; - XmmRegister parameter = calling_convention.GetFpuRegisterAt(0); - locations->SetInAt(0, Location::FpuRegisterLocation(parameter)); + // Note that on x86 floating-point parameters are passed + // through core registers (here, EAX). + locations->SetInAt(0, Location::RegisterLocation( + calling_convention.GetRegisterAt(0))); + // The runtime helper puts the result in EAX, EDX. + locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); + break; + } + case Primitive::kPrimDouble: { + // Processing a Dex `double-to-long' instruction. + InvokeRuntimeCallingConvention calling_convention; + // Note that on x86 floating-point parameters are passed + // through core registers (here, EAX and ECX). + locations->SetInAt(0, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(0), + calling_convention.GetRegisterAt(1))); // The runtime helper puts the result in EAX, EDX. locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); + break; } - break; + break; default: LOG(FATAL) << "Unexpected type conversion from " << input_type diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 55d71e39c4..73b647c1c4 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -36,8 +36,8 @@ class SlowPathCodeX86; static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX }; static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX }; static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); -static constexpr XmmRegister kParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 }; -static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); +static constexpr XmmRegister kParameterFpuRegisters[] = { }; +static constexpr size_t kParameterFpuRegistersLength = 0; class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegister> { public: @@ -58,18 +58,13 @@ class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegiste class InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitor() : gp_index_(0) {} Location GetNextLocation(Primitive::Type type); private: InvokeDexCallingConvention calling_convention; - // The current index for cpu registers. uint32_t gp_index_; - // The current index for fpu registers. - uint32_t fp_index_; - // The current stack index. - uint32_t stack_index_; DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); }; diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 1f0dba5fc9..3f266fecfc 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1537,12 +1537,8 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, uint32_t reg_offset = 1; CHECK_ALIGNED(frame_size, kStackAlignment); - int gpr_count = 0; for (int i = spill_regs.size() - 1; i >= 0; --i) { - x86::X86ManagedRegister spill = spill_regs.at(i).AsX86(); - DCHECK(spill.IsCpuRegister()); - pushl(spill.AsCpuRegister()); - gpr_count++; + pushl(spill_regs.at(i).AsX86().AsCpuRegister()); // DW_CFA_advance_loc DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); @@ -1556,7 +1552,7 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, } // return address then method on stack - int32_t adjust = frame_size - (gpr_count * kFramePointerSize) - + int32_t adjust = frame_size - (spill_regs.size() * kFramePointerSize) - sizeof(StackReference<mirror::ArtMethod>) /*method*/ - kFramePointerSize /*return address*/; addl(ESP, Immediate(-adjust)); @@ -1576,18 +1572,9 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); for (size_t i = 0; i < entry_spills.size(); ++i) { - ManagedRegisterSpill spill = entry_spills.at(i); - if (spill.AsX86().IsCpuRegister()) { - movl(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsCpuRegister()); - } else { - DCHECK(spill.AsX86().IsXmmRegister()); - if (spill.getSize() == 8) { - movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister()); - } else { - CHECK_EQ(spill.getSize(), 4); - movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister()); - } - } + movl(Address(ESP, frame_size + sizeof(StackReference<mirror::ArtMethod>) + + (i * kFramePointerSize)), + entry_spills.at(i).AsX86().AsCpuRegister()); } } @@ -1597,9 +1584,7 @@ void X86Assembler::RemoveFrame(size_t frame_size, addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) - sizeof(StackReference<mirror::ArtMethod>))); for (size_t i = 0; i < spill_regs.size(); ++i) { - x86::X86ManagedRegister spill = spill_regs.at(i).AsX86(); - DCHECK(spill.IsCpuRegister()); - popl(spill.AsCpuRegister()); + popl(spill_regs.at(i).AsX86().AsCpuRegister()); } ret(); } diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index 6acc2a7829..4b67c835df 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -302,7 +302,7 @@ class StubTest : public CommonRuntimeTest { #if defined(__i386__) // TODO: Set the thread? __asm__ __volatile__( - "movd %[hidden], %%xmm7\n\t" + "movd %[hidden], %%xmm0\n\t" "subl $12, %%esp\n\t" // Align stack. "pushl %[referrer]\n\t" // Store referrer "call *%%edi\n\t" // Call the stub diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h index b0a6017b47..5a88f8012e 100644 --- a/runtime/arch/x86/asm_support_x86.h +++ b/runtime/arch/x86/asm_support_x86.h @@ -21,8 +21,6 @@ #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 32 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32 - -// 32 bytes for GPRs and 32 bytes for FPRs. -#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (32 + 32) +#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 32 #endif // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_ diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc index 4ea4684f06..2a6ff14c75 100644 --- a/runtime/arch/x86/context_x86.cc +++ b/runtime/arch/x86/context_x86.cc @@ -30,9 +30,6 @@ void X86Context::Reset() { for (size_t i = 0; i < kNumberOfCpuRegisters; i++) { gprs_[i] = nullptr; } - for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) { - fprs_[i] = nullptr; - } gprs_[ESP] = &esp_; // Initialize registers with easy to spot debug values. esp_ = X86Context::kBadGprBase + ESP; @@ -43,7 +40,7 @@ void X86Context::FillCalleeSaves(const StackVisitor& fr) { mirror::ArtMethod* method = fr.GetMethod(); const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo(); size_t spill_count = POPCOUNT(frame_info.CoreSpillMask()); - size_t fp_spill_count = POPCOUNT(frame_info.FpSpillMask()); + DCHECK_EQ(frame_info.FpSpillMask(), 0u); if (spill_count > 0) { // Lowest number spill is farthest away, walk registers and fill into context. int j = 2; // Offset j to skip return address spill. @@ -54,24 +51,6 @@ void X86Context::FillCalleeSaves(const StackVisitor& fr) { } } } - if (fp_spill_count > 0) { - // Lowest number spill is farthest away, walk registers and fill into context. - size_t j = 2; // Offset j to skip return address spill. - size_t fp_spill_size_in_words = fp_spill_count * 2; - for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) { - if (((frame_info.FpSpillMask() >> i) & 1) != 0) { - // There are 2 pieces to each XMM register, to match VR size. - fprs_[2*i] = reinterpret_cast<uint32_t*>( - fr.CalleeSaveAddress(spill_count + fp_spill_size_in_words - j, - frame_info.FrameSizeInBytes())); - fprs_[2*i+1] = reinterpret_cast<uint32_t*>( - fr.CalleeSaveAddress(spill_count + fp_spill_size_in_words - j - 1, - frame_info.FrameSizeInBytes())); - // Two void* per XMM register. - j += 2; - } - } - } } void X86Context::SmashCallerSaves() { @@ -80,7 +59,6 @@ void X86Context::SmashCallerSaves() { gprs_[EDX] = const_cast<uintptr_t*>(&gZero); gprs_[ECX] = nullptr; gprs_[EBX] = nullptr; - memset(&fprs_[0], '\0', sizeof(fprs_)); } void X86Context::SetGPR(uint32_t reg, uintptr_t value) { @@ -90,11 +68,14 @@ void X86Context::SetGPR(uint32_t reg, uintptr_t value) { *gprs_[reg] = value; } -void X86Context::SetFPR(uint32_t reg, uintptr_t value) { - CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters)); - DCHECK(IsAccessibleFPR(reg)); - CHECK_NE(fprs_[reg], reinterpret_cast<const uint32_t*>(&gZero)); - *fprs_[reg] = value; +uintptr_t X86Context::GetFPR(uint32_t reg ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Floating-point registers are all caller save in X86"; + UNREACHABLE(); +} + +void X86Context::SetFPR(uint32_t reg ATTRIBUTE_UNUSED, uintptr_t value ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Floating-point registers are all caller save in X86"; + UNREACHABLE(); } void X86Context::DoLongJump() { @@ -105,30 +86,17 @@ void X86Context::DoLongJump() { for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) { gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86Context::kBadGprBase + i; } - uint32_t fprs[kNumberOfFloatRegisters]; - for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) { - fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : X86Context::kBadFprBase + i; - } // We want to load the stack pointer one slot below so that the ret will pop eip. uintptr_t esp = gprs[kNumberOfCpuRegisters - ESP - 1] - sizeof(intptr_t); gprs[kNumberOfCpuRegisters] = esp; *(reinterpret_cast<uintptr_t*>(esp)) = eip_; __asm__ __volatile__( - "movl %1, %%ebx\n\t" // Address base of FPRs. - "movsd 0(%%ebx), %%xmm0\n\t" // Load up XMM0-XMM7. - "movsd 8(%%ebx), %%xmm1\n\t" - "movsd 16(%%ebx), %%xmm2\n\t" - "movsd 24(%%ebx), %%xmm3\n\t" - "movsd 32(%%ebx), %%xmm4\n\t" - "movsd 40(%%ebx), %%xmm5\n\t" - "movsd 48(%%ebx), %%xmm6\n\t" - "movsd 56(%%ebx), %%xmm7\n\t" "movl %0, %%esp\n\t" // ESP points to gprs. "popal\n\t" // Load all registers except ESP and EIP with values in gprs. "popl %%esp\n\t" // Load stack pointer. "ret\n\t" // From higher in the stack pop eip. : // output. - : "g"(&gprs[0]), "g"(&fprs[0]) // input. + : "g"(&gprs[0]) // input. :); // clobber. #else UNIMPLEMENTED(FATAL); diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h index c66a9dcb19..8b7804d616 100644 --- a/runtime/arch/x86/context_x86.h +++ b/runtime/arch/x86/context_x86.h @@ -61,16 +61,11 @@ class X86Context : public Context { void SetGPR(uint32_t reg, uintptr_t value) OVERRIDE; - bool IsAccessibleFPR(uint32_t reg) OVERRIDE { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters)); - return fprs_[reg] != nullptr; + bool IsAccessibleFPR(uint32_t reg ATTRIBUTE_UNUSED) OVERRIDE { + return false; } - uintptr_t GetFPR(uint32_t reg) OVERRIDE { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters)); - DCHECK(IsAccessibleFPR(reg)); - return *fprs_[reg]; - } + uintptr_t GetFPR(uint32_t reg) OVERRIDE; void SetFPR(uint32_t reg, uintptr_t value) OVERRIDE; @@ -78,22 +73,9 @@ class X86Context : public Context { void DoLongJump() OVERRIDE; private: - // Pretend XMM registers are made of uin32_t pieces, because they are manipulated - // in uint32_t chunks. - enum { - XMM0_0 = 0, XMM0_1, - XMM1_0, XMM1_1, - XMM2_0, XMM2_1, - XMM3_0, XMM3_1, - XMM4_0, XMM4_1, - XMM5_0, XMM5_1, - XMM6_0, XMM6_1, - XMM7_0, XMM7_1, - kNumberOfFloatRegisters}; - - // Pointers to register locations. Values are initialized to NULL or the special registers below. + // Pointers to register locations, floating point registers are all caller save. Values are + // initialized to NULL or the special registers below. uintptr_t* gprs_[kNumberOfCpuRegisters]; - uint32_t* fprs_[kNumberOfFloatRegisters]; // Hold values for esp and eip if they are not located within a stack frame. EIP is somewhat // special in that it cannot be encoded normally as a register operand to an instruction (except // in 64bit addressing modes). diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index b245dc3fec..71534030c4 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -90,15 +90,6 @@ MACRO2(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME, got_reg, temp_reg) PUSH ebx // Save args PUSH edx PUSH ecx - // Create space for FPR args. - subl MACRO_LITERAL(4 * 8), %esp - CFI_ADJUST_CFA_OFFSET(4 * 8) - // Save FPRs. - movsd %xmm0, 0(%esp) - movsd %xmm1, 8(%esp) - movsd %xmm2, 16(%esp) - movsd %xmm3, 24(%esp) - SETUP_GOT_NOSAVE VAR(got_reg, 0) // Load Runtime::instance_ from GOT. movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg, 0)), REG_VAR(temp_reg, 1) @@ -111,7 +102,7 @@ MACRO2(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME, got_reg, temp_reg) // Ugly compile-time check, but we only have the preprocessor. // Last +4: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 7*4 + 4*8 + 4) +#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 7*4 + 4) #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86) size not as expected." #endif END_MACRO @@ -121,39 +112,20 @@ END_MACRO * Runtime::CreateCalleeSaveMethod(kRefsAndArgs) where the method is passed in EAX. */ MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_EAX) - // Save callee and GPR args, mixed together to agree with core spills bitmap. PUSH edi // Save callee saves PUSH esi PUSH ebp PUSH ebx // Save args PUSH edx PUSH ecx - - // Create space for FPR args. - subl MACRO_LITERAL(32), %esp - CFI_ADJUST_CFA_OFFSET(32) - - // Save FPRs. - movsd %xmm0, 0(%esp) - movsd %xmm1, 8(%esp) - movsd %xmm2, 16(%esp) - movsd %xmm3, 24(%esp) - PUSH eax // Store the ArtMethod reference at the bottom of the stack. // Store esp as the stop quick frame. movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET END_MACRO MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME) - // Restore FPRs. EAX is still on the stack. - movsd 4(%esp), %xmm0 - movsd 12(%esp), %xmm1 - movsd 20(%esp), %xmm2 - movsd 28(%esp), %xmm3 - - addl MACRO_LITERAL(36), %esp // Remove FPRs and EAX. - CFI_ADJUST_CFA_OFFSET(-36) - + addl MACRO_LITERAL(4), %esp // Remove padding + CFI_ADJUST_CFA_OFFSET(-4) POP ecx // Restore args except eax POP edx POP ebx @@ -162,30 +134,6 @@ MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME) POP edi END_MACRO -// Restore register and jump to routine -// Inputs: EDI contains pointer to code. -// Notes: Need to pop EAX too (restores Method*) -MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME_AND_JUMP) - POP eax // Restore Method* - - // Restore FPRs. - movsd 0(%esp), %xmm0 - movsd 8(%esp), %xmm1 - movsd 16(%esp), %xmm2 - movsd 24(%esp), %xmm3 - - addl MACRO_LITERAL(32), %esp // Remove FPRs. - CFI_ADJUST_CFA_OFFSET(-32) - - POP ecx // Restore args except eax - POP edx - POP ebx - POP ebp // Restore callee saves - POP esi - xchgl 0(%esp),%edi // restore EDI and place code pointer as only value on stack - ret -END_MACRO - /* * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending * exception is Thread::Current()->exception_. @@ -295,14 +243,13 @@ MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name) DEFINE_FUNCTION RAW_VAR(c_name, 0) SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME ebx, ebx movl %esp, %edx // remember SP - // Outgoing argument set up subl MACRO_LITERAL(12), %esp // alignment padding CFI_ADJUST_CFA_OFFSET(12) PUSH edx // pass SP pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) - pushl 32+32(%edx) // pass caller Method* + pushl 32(%edx) // pass caller Method* CFI_ADJUST_CFA_OFFSET(4) PUSH ecx // pass arg2 PUSH eax // pass arg1 @@ -310,17 +257,6 @@ MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name) movl %edx, %edi // save code pointer in EDI addl MACRO_LITERAL(36), %esp // Pop arguments skip eax CFI_ADJUST_CFA_OFFSET(-36) - - // Restore FPRs. - movsd 0(%esp), %xmm0 - movsd 8(%esp), %xmm1 - movsd 16(%esp), %xmm2 - movsd 24(%esp), %xmm3 - - // Remove space for FPR args. - addl MACRO_LITERAL(4 * 8), %esp - CFI_ADJUST_CFA_OFFSET(-4 * 8) - POP ecx // Restore args except eax POP edx POP ebx @@ -348,63 +284,7 @@ INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvoke INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck /* - * Helper for quick invocation stub to set up XMM registers. Assumes EBX == shorty, - * ECX == arg_array. Clobbers EBX, ECX and al. Branches to xmm_setup_finished if it encounters - * the end of the shorty. - */ -MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished) -1: // LOOP - movb (%ebx), %al // al := *shorty - addl MACRO_LITERAL(1), %ebx // shorty++ - cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto xmm_setup_finished - je RAW_VAR(finished, 1) - cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto FOUND_DOUBLE - je 2f - cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto FOUND_FLOAT - je 3f - addl MACRO_LITERAL(4), %ecx // arg_array++ - // Handle extra space in arg array taken by a long. - cmpb MACRO_LITERAL(74), %al // if (al != 'J') goto LOOP - jne 1b - addl MACRO_LITERAL(4), %ecx // arg_array++ - jmp 1b // goto LOOP -2: // FOUND_DOUBLE - movsd (%ecx), REG_VAR(xmm_reg, 0) - addl MACRO_LITERAL(8), %ecx // arg_array+=2 - jmp 4f -3: // FOUND_FLOAT - movss (%ecx), REG_VAR(xmm_reg, 0) - addl MACRO_LITERAL(4), %ecx // arg_array++ -4: -END_MACRO - - /* - * Helper for quick invocation stub to set up GPR registers. Assumes ESI == shorty, - * EDI == arg_array. Clobbers ESI, EDI and al. Branches to gpr_setup_finished if it encounters - * the end of the shorty. - */ -MACRO1(SKIP_OVER_FLOATS, finished) -1: // LOOP - movb (%esi), %al // al := *shorty - addl MACRO_LITERAL(1), %esi // shorty++ - cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto gpr_setup_finished - je RAW_VAR(finished, 0) - cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto SKIP_FLOAT - je 3f - cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto SKIP_DOUBLE - je 4f - jmp 5f -3: // SKIP_FLOAT - addl MACRO_LITERAL(4), %edi // arg_array++ - jmp 1b -4: // SKIP_DOUBLE - addl MACRO_LITERAL(8), %edi // arg_array+=2 - jmp 1b -5: -END_MACRO - - /* - * Quick invocation stub (non-static). + * Quick invocation stub. * On entry: * [sp] = return address * [sp + 4] = method pointer @@ -415,17 +295,6 @@ END_MACRO * [sp + 24] = shorty */ DEFINE_FUNCTION art_quick_invoke_stub - // Set up argument XMM registers. - mov 24(%esp), %ebx // EBX := shorty + 1 ; ie skip return arg character. - addl LITERAL(1), %ebx - mov 8(%esp), %ecx // ECX := arg_array + 4 ; ie skip this pointer. - addl LITERAL(4), %ecx - LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished - LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished - LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished - LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished - .balign 16 -.Lxmm_setup_finished: PUSH ebp // save ebp PUSH ebx // save ebx PUSH esi // save esi @@ -439,41 +308,18 @@ DEFINE_FUNCTION art_quick_invoke_stub andl LITERAL(0xFFFFFFF0), %ebx subl LITERAL(20), %ebx // remove space for return address, ebx, ebp, esi and edi subl %ebx, %esp // reserve stack space for argument array - + SETUP_GOT_NOSAVE ebx // clobbers ebx (harmless here) + lea 4(%esp), %eax // use stack pointer + method ptr as dest for memcpy + pushl 28(%ebp) // push size of region to memcpy + pushl 24(%ebp) // push arg array as source of memcpy + pushl %eax // push stack pointer as destination of memcpy + call PLT_SYMBOL(memcpy) // (void*, const void*, size_t) + addl LITERAL(12), %esp // pop arguments to memcpy movl LITERAL(0), (%esp) // store NULL for method* - - // Copy arg array into stack. - movl 28(%ebp), %ecx // ECX = size of args - movl 24(%ebp), %esi // ESI = argument array - leal 4(%esp), %edi // EDI = just after Method* in stack arguments - rep movsb // while (ecx--) { *edi++ = *esi++ } - - mov 40(%ebp), %esi // ESI := shorty + 1 ; ie skip return arg character. - addl LITERAL(1), %esi - mov 24(%ebp), %edi // EDI := arg_array - mov 0(%edi), %ecx // ECX := this pointer - addl LITERAL(4), %edi // EDI := arg_array + 4 ; ie skip this pointer. - - // Enumerate the possible cases for loading GPRS. - // edx (and maybe ebx): - SKIP_OVER_FLOATS .Lgpr_setup_finished - cmpb LITERAL(74), %al // if (al == 'J') goto FOUND_LONG - je .LfirstLong - // Must be an integer value. - movl (%edi), %edx - addl LITERAL(4), %edi // arg_array++ - - // Now check ebx - SKIP_OVER_FLOATS .Lgpr_setup_finished - // Must be first word of a long, or an integer. - movl (%edi), %ebx - jmp .Lgpr_setup_finished -.LfirstLong: - movl (%edi), %edx - movl 4(%edi), %ebx - // Nothing left to load. -.Lgpr_setup_finished: mov 20(%ebp), %eax // move method pointer into eax + mov 4(%esp), %ecx // copy arg1 into ecx + mov 8(%esp), %edx // copy arg2 into edx + mov 12(%esp), %ebx // copy arg3 into ebx call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32(%eax) // call the method mov %ebp, %esp // restore stack pointer CFI_DEF_CFA_REGISTER(esp) @@ -498,119 +344,6 @@ DEFINE_FUNCTION art_quick_invoke_stub ret END_FUNCTION art_quick_invoke_stub - /* - * Quick invocation stub (static). - * On entry: - * [sp] = return address - * [sp + 4] = method pointer - * [sp + 8] = argument array or NULL for no argument methods - * [sp + 12] = size of argument array in bytes - * [sp + 16] = (managed) thread pointer - * [sp + 20] = JValue* result - * [sp + 24] = shorty - */ -DEFINE_FUNCTION art_quick_invoke_static_stub - // Set up argument XMM registers. - mov 24(%esp), %ebx // EBX := shorty + 1 ; ie skip return arg character. - addl LITERAL(1), %ebx - mov 8(%esp), %ecx // ECX := arg_array - LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2 - LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2 - LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2 - LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2 - .balign 16 -.Lxmm_setup_finished2: - PUSH ebp // save ebp - PUSH ebx // save ebx - PUSH esi // save esi - PUSH edi // save edi - mov %esp, %ebp // copy value of stack pointer into base pointer - CFI_DEF_CFA_REGISTER(ebp) - mov 28(%ebp), %ebx // get arg array size - // reserve space for return addr, method*, ebx, ebp, esi, and edi in frame - addl LITERAL(36), %ebx - // align frame size to 16 bytes - andl LITERAL(0xFFFFFFF0), %ebx - subl LITERAL(20), %ebx // remove space for return address, ebx, ebp, esi and edi - subl %ebx, %esp // reserve stack space for argument array - - movl LITERAL(0), (%esp) // store NULL for method* - - // Copy arg array into stack. - movl 28(%ebp), %ecx // ECX = size of args - movl 24(%ebp), %esi // ESI = argument array - leal 4(%esp), %edi // EDI = just after Method* in stack arguments - rep movsb // while (ecx--) { *edi++ = *esi++ } - - mov 40(%ebp), %esi // ESI := shorty + 1 ; ie skip return arg character. - addl LITERAL(1), %esi - mov 24(%ebp), %edi // EDI := arg_array - - // Enumerate the possible cases for loading GPRS. - // ecx (and maybe edx) - SKIP_OVER_FLOATS .Lgpr_setup_finished2 - cmpb LITERAL(74), %al // if (al == 'J') goto FOUND_LONG - je .LfirstLong2 - // Must be an integer value. Load into ECX. - movl (%edi), %ecx - addl LITERAL(4), %edi // arg_array++ - - // Now check edx (and maybe ebx). - SKIP_OVER_FLOATS .Lgpr_setup_finished2 - cmpb LITERAL(74), %al // if (al == 'J') goto FOUND_LONG - je .LSecondLong2 - // Must be an integer. Load into EDX. - movl (%edi), %edx - addl LITERAL(4), %edi // arg_array++ - - // Is there anything for ebx? - SKIP_OVER_FLOATS .Lgpr_setup_finished2 - // First word of long or integer. Load into EBX. - movl (%edi), %ebx - jmp .Lgpr_setup_finished2 -.LSecondLong2: - // EDX:EBX is long. That is all. - movl (%edi), %edx - movl 4(%edi), %ebx - jmp .Lgpr_setup_finished2 -.LfirstLong2: - // ECX:EDX is a long - movl (%edi), %ecx - movl 4(%edi), %edx - addl LITERAL(8), %edi // arg_array += 2 - - // Anything for EBX? - SKIP_OVER_FLOATS .Lgpr_setup_finished2 - // First word of long or integer. Load into EBX. - movl (%edi), %ebx - jmp .Lgpr_setup_finished2 - // Nothing left to load. -.Lgpr_setup_finished2: - mov 20(%ebp), %eax // move method pointer into eax - call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32(%eax) // call the method - mov %ebp, %esp // restore stack pointer - CFI_DEF_CFA_REGISTER(esp) - POP edi // pop edi - POP esi // pop esi - POP ebx // pop ebx - POP ebp // pop ebp - mov 20(%esp), %ecx // get result pointer - mov %eax, (%ecx) // store the result assuming its a long, int or Object* - mov %edx, 4(%ecx) // store the other half of the result - mov 24(%esp), %edx // get the shorty - cmpb LITERAL(68), (%edx) // test if result type char == 'D' - je .Lreturn_double_quick2 - cmpb LITERAL(70), (%edx) // test if result type char == 'F' - je .Lreturn_float_quick2 - ret -.Lreturn_double_quick2: - movsd %xmm0, (%ecx) // store the floating point result - ret -.Lreturn_float_quick2: - movss %xmm0, (%ecx) // store the floating point result - ret -END_FUNCTION art_quick_invoke_static_stub - MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro) DEFINE_FUNCTION RAW_VAR(c_name, 0) SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC @@ -1109,20 +842,20 @@ END_FUNCTION art_quick_memcpy NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret DEFINE_FUNCTION art_quick_d2l - subl LITERAL(12), %esp // alignment padding, room for argument - CFI_ADJUST_CFA_OFFSET(12) - movsd %xmm0, 0(%esp) // arg a - call SYMBOL(art_d2l) // (jdouble a) + PUSH eax // alignment padding + PUSH ecx // pass arg2 a.hi + PUSH eax // pass arg1 a.lo + call SYMBOL(art_d2l) // (jdouble a) addl LITERAL(12), %esp // pop arguments CFI_ADJUST_CFA_OFFSET(-12) ret END_FUNCTION art_quick_d2l DEFINE_FUNCTION art_quick_f2l - subl LITERAL(12), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(12) - movss %xmm0, 0(%esp) // arg a - call SYMBOL(art_f2l) // (jfloat a) + subl LITERAL(8), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(8) + PUSH eax // pass arg1 a + call SYMBOL(art_f2l) // (jfloat a) addl LITERAL(12), %esp // pop arguments CFI_ADJUST_CFA_OFFSET(-12) ret @@ -1282,8 +1015,8 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler movd %eax, %xmm0 // place return value also into floating point return value movd %edx, %xmm1 punpckldq %xmm1, %xmm0 - addl LITERAL(76), %esp // pop arguments - CFI_ADJUST_CFA_OFFSET(-76) + addl LITERAL(44), %esp // pop arguments + CFI_ADJUST_CFA_OFFSET(-44) RETURN_OR_DELIVER_PENDING_EXCEPTION // return or deliver exception END_FUNCTION art_quick_proxy_invoke_handler @@ -1295,7 +1028,7 @@ DEFINE_FUNCTION art_quick_imt_conflict_trampoline PUSH ecx movl 8(%esp), %eax // load caller Method* movl MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET(%eax), %eax // load dex_cache_resolved_methods - movd %xmm7, %ecx // get target method index stored in xmm0 + movd %xmm0, %ecx // get target method index stored in xmm0 movl MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4), %eax // load the target method POP ecx jmp SYMBOL(art_quick_invoke_interface_trampoline) @@ -1314,7 +1047,14 @@ DEFINE_FUNCTION art_quick_resolution_trampoline addl LITERAL(16), %esp // pop arguments test %eax, %eax // if code pointer is NULL goto deliver pending exception jz 1f - RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME_AND_JUMP + POP eax // called method + POP ecx // restore args + POP edx + POP ebx + POP ebp // restore callee saves except EDI + POP esi + xchgl 0(%esp),%edi // restore EDI and place code pointer as only value on stack + ret // tail call into method 1: RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME DELIVER_PENDING_EXCEPTION @@ -1348,6 +1088,7 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movl %edx, %esp // On x86 there are no registers passed, so nothing to pop here. + // Native call. call *%eax @@ -1374,10 +1115,8 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline jnz .Lexception_in_native // Tear down the callee-save frame. - // Remove space for FPR args and EAX - addl LITERAL(4 + 4 * 8), %esp - CFI_ADJUST_CFA_OFFSET(-(4 + 4 * 8)) - + addl LITERAL(4), %esp // Remove padding + CFI_ADJUST_CFA_OFFSET(-4) POP ecx addl LITERAL(4), %esp // Avoid edx, as it may be part of the result. CFI_ADJUST_CFA_OFFSET(-4) @@ -1407,21 +1146,12 @@ DEFINE_FUNCTION art_quick_to_interpreter_bridge CFI_ADJUST_CFA_OFFSET(4) PUSH eax // pass method call SYMBOL(artQuickToInterpreterBridge) // (method, Thread*, SP) - addl LITERAL(16), %esp // pop arguments - CFI_ADJUST_CFA_OFFSET(-16) - - // Return eax:edx in xmm0 also. - movd %eax, %xmm0 + movd %eax, %xmm0 // place return value also into floating point return value movd %edx, %xmm1 punpckldq %xmm1, %xmm0 - - addl LITERAL(48), %esp // Remove FPRs and EAX, ECX, EDX, EBX. - CFI_ADJUST_CFA_OFFSET(-48) - - POP ebp // Restore callee saves - POP esi - POP edi - + addl LITERAL(16), %esp // pop arguments + CFI_ADJUST_CFA_OFFSET(-16) + RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME RETURN_OR_DELIVER_PENDING_EXCEPTION // return or deliver exception END_FUNCTION art_quick_to_interpreter_bridge @@ -1441,25 +1171,18 @@ DEFINE_FUNCTION art_quick_instrumentation_entry PUSH eax // Pass Method*. call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR) addl LITERAL(28), %esp // Pop arguments upto saved Method*. - movl 60(%esp), %edi // Restore edi. - movl %eax, 60(%esp) // Place code* over edi, just under return pc. + movl 28(%esp), %edi // Restore edi. + movl %eax, 28(%esp) // Place code* over edi, just under return pc. movl SYMBOL(art_quick_instrumentation_exit)@GOT(%ebx), %ebx // Place instrumentation exit as return pc. ebx holds the GOT computed on entry. - movl %ebx, 64(%esp) - movl 0(%esp), %eax // Restore eax. - // Restore FPRs (extra 4 bytes of offset due to EAX push at top). - movsd 8(%esp), %xmm0 - movsd 16(%esp), %xmm1 - movsd 24(%esp), %xmm2 - movsd 32(%esp), %xmm3 - - // Restore GPRs. - movl 40(%esp), %ecx // Restore ecx. - movl 48(%esp), %edx // Restore edx. - movl 48(%esp), %ebx // Restore ebx. - movl 52(%esp), %ebp // Restore ebp. - movl 56(%esp), %esi // Restore esi. - addl LITERAL(60), %esp // Wind stack back upto code*. + movl %ebx, 32(%esp) + movl (%esp), %eax // Restore eax. + movl 8(%esp), %ecx // Restore ecx. + movl 12(%esp), %edx // Restore edx. + movl 16(%esp), %ebx // Restore ebx. + movl 20(%esp), %ebp // Restore ebp. + movl 24(%esp), %esi // Restore esi. + addl LITERAL(28), %esp // Wind stack back upto code*. ret // Call method (and pop). END_FUNCTION art_quick_instrumentation_entry diff --git a/runtime/arch/x86/quick_method_frame_info_x86.h b/runtime/arch/x86/quick_method_frame_info_x86.h index 9bba531638..b9dc0d8b19 100644 --- a/runtime/arch/x86/quick_method_frame_info_x86.h +++ b/runtime/arch/x86/quick_method_frame_info_x86.h @@ -24,44 +24,25 @@ namespace art { namespace x86 { -enum XMM { - XMM0 = 0, - XMM1 = 1, - XMM2 = 2, - XMM3 = 3, - XMM4 = 4, - XMM5 = 5, - XMM6 = 6, - XMM7 = 7, -}; - static constexpr uint32_t kX86CalleeSaveRefSpills = (1 << art::x86::EBP) | (1 << art::x86::ESI) | (1 << art::x86::EDI); static constexpr uint32_t kX86CalleeSaveArgSpills = (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX); -static constexpr uint32_t kX86CalleeSaveFpArgSpills = - (1 << art::x86::XMM0) | (1 << art::x86::XMM1) | - (1 << art::x86::XMM2) | (1 << art::x86::XMM3); constexpr uint32_t X86CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { return kX86CalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kX86CalleeSaveArgSpills : 0) | (1 << art::x86::kNumberOfCpuRegisters); // fake return address callee save } -constexpr uint32_t X86CalleeSaveFpSpills(Runtime::CalleeSaveType type) { - return type == Runtime::kRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0; -} - constexpr uint32_t X86CalleeSaveFrameSize(Runtime::CalleeSaveType type) { return RoundUp((POPCOUNT(X86CalleeSaveCoreSpills(type)) /* gprs */ + - 2 * POPCOUNT(X86CalleeSaveFpSpills(type)) /* fprs */ + 1 /* Method* */) * kX86PointerSize, kStackAlignment); } constexpr QuickMethodFrameInfo X86CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) { return QuickMethodFrameInfo(X86CalleeSaveFrameSize(type), X86CalleeSaveCoreSpills(type), - X86CalleeSaveFpSpills(type)); + 0u); } } // namespace x86 diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 8ab90eb7fd..9947b55e2f 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -198,20 +198,16 @@ class QuickArgumentVisitor { // | EBX | arg3 // | EDX | arg2 // | ECX | arg1 - // | XMM3 | float arg 4 - // | XMM2 | float arg 3 - // | XMM1 | float arg 2 - // | XMM0 | float arg 1 // | EAX/Method* | <- sp static constexpr bool kAlignPairRegister = false; - static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. + static constexpr bool kQuickSoftFloatAbi = true; // This is a soft float ABI. static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; static constexpr size_t kNumQuickGprArgs = 3; // 3 arguments passed in GPRs. - static constexpr size_t kNumQuickFprArgs = 4; // 4 arguments passed in FPRs. + static constexpr size_t kNumQuickFprArgs = 0; // 0 arguments passed in FPRs. static constexpr bool kGprFprLockstep = false; - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 4; // Offset of first FPR arg. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4 + 4*8; // Offset of first GPR arg. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 28 + 4*8; // Offset of return address. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0; // Offset of first FPR arg. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4; // Offset of first GPR arg. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 28; // Offset of return address. static size_t GprIndexToGprOffset(uint32_t gpr_index) { return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA); } diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc index b2016dcc82..288f6a60bb 100644 --- a/runtime/mirror/art_method.cc +++ b/runtime/mirror/art_method.cc @@ -41,7 +41,7 @@ namespace mirror { extern "C" void art_quick_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*, const char*); -#if defined(__LP64__) || defined(__arm__) || defined(__i386__) +#if defined(__LP64__) || defined(__arm__) extern "C" void art_quick_invoke_static_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*, const char*); #endif @@ -415,7 +415,7 @@ void ArtMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* << "Don't call compiled code when -Xint " << PrettyMethod(this); } -#if defined(__LP64__) || defined(__arm__) || defined(__i386__) +#if defined(__LP64__) || defined(__arm__) if (!IsStatic()) { (*art_quick_invoke_stub)(this, args, args_size, self, result, shorty); } else { diff --git a/runtime/oat.h b/runtime/oat.h index 3e2860602e..8e63d3ae8d 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '5', '4', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '5', '3', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; |