summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolas Geoffray <ngeoffray@google.com>2015-01-08 14:52:29 +0000
committerNicolas Geoffray <ngeoffray@google.com>2015-01-12 14:08:55 +0000
commit425f239c291d435f519a1cf4bdd9ccc9a2c0c070 (patch)
tree6c4ec2cef8fd0caf45712191bcbc5d72ed0d318b
parent11adb76fbc2dc3d8cbb6665945ff5d6733e2a8e6 (diff)
downloadandroid_art-425f239c291d435f519a1cf4bdd9ccc9a2c0c070.tar.gz
android_art-425f239c291d435f519a1cf4bdd9ccc9a2c0c070.tar.bz2
android_art-425f239c291d435f519a1cf4bdd9ccc9a2c0c070.zip
Fix handling of long argument spanning register/memory.
Comment in arm_lir.h says: * If a 64-bit argument would span the register/memory argument * boundary, it will instead be fully passed in the frame. This change implements such logic for all platforms. We still need to pass the low part in register as well because I haven't ported the jni compilers (x86 and mips) to it. Once the jni compilers are updated, we can remove the register assignment. Note that this greatly simplifies optimizing's register allocator by not having to understand a long spanning register and memory. Change-Id: I59706ca5d47269fc46e5489ac99bd6576e87e7f3
-rwxr-xr-xcompiler/dex/quick/gen_invoke.cc14
-rw-r--r--compiler/jni/quick/arm/calling_convention_arm.cc6
-rw-r--r--compiler/optimizing/code_generator_arm.cc39
-rw-r--r--compiler/optimizing/code_generator_x86.cc3
-rw-r--r--runtime/entrypoints/quick/quick_trampoline_entrypoints.cc11
5 files changed, 18 insertions, 55 deletions
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index c99be641a9..4139b51a61 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -410,7 +410,7 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
// If the wide input appeared as single, flush it and go
// as it comes from memory.
if (t_loc->wide && reg.Valid() && !reg.Is64Bit()) {
- StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, k32, kNotVolatile);
+ // The memory already holds the half. Don't do anything.
reg = RegStorage::InvalidReg();
}
@@ -881,21 +881,23 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
if (rl_arg.wide) {
// if reg is not 64-bit (it is half of 64-bit) then handle it separately.
if (!reg.Is64Bit()) {
- // TODO: REVISIT: This adds a spill of low part while we could just copy it.
ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
if (rl_arg.location == kLocPhysReg) {
int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
- // Dump it to memory and then load only low part
+ // Dump it to memory.
StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
LoadBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile);
} else {
- int out_offset = StackVisitor::GetOutVROffset(i + 1, cu_->instruction_set);
+ int high_offset = StackVisitor::GetOutVROffset(i + 1, cu_->instruction_set);
// First, use target reg for high part.
LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + 1), reg, k32,
kNotVolatile);
- StoreBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile);
- // Now load target reg with low part.
+ StoreBaseDisp(TargetPtrReg(kSp), high_offset, reg, k32, kNotVolatile);
+ // Now, use target reg for low part.
LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low), reg, k32, kNotVolatile);
+ int low_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+ // And store it to the expected memory location.
+ StoreBaseDisp(TargetPtrReg(kSp), low_offset, reg, k32, kNotVolatile);
}
} else {
LoadValueDirectWideFixed(rl_arg, reg);
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index a3323e133a..fd207150f7 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -168,9 +168,13 @@ const ManagedRegisterEntrySpills& ArmManagedRuntimeCallingConvention::EntrySpill
} else {
// FIXME: Pointer this returns as both reference and long.
if (IsCurrentParamALong() && !IsCurrentParamAReference()) { // Long.
- if (gpr_index < arraysize(kHFCoreArgumentRegisters)) {
+ // If it spans register and memory, we must use the value in memory.
+ if (gpr_index < arraysize(kHFCoreArgumentRegisters) - 1) {
entry_spills_.push_back(
ArmManagedRegister::FromCoreRegister(kHFCoreArgumentRegisters[gpr_index++]));
+ } else if (gpr_index == arraysize(kHFCoreArgumentRegisters) - 1) {
+ gpr_index++;
+ entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
} else {
entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d0a72bb42a..1cc2dcc9b8 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -593,8 +593,6 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
ArmManagedRegister pair = ArmManagedRegister::FromRegisterPair(
calling_convention.GetRegisterPairAt(index));
return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
- } else if (index + 1 == calling_convention.GetNumberOfRegisters()) {
- return Location::QuickParameter(index, stack_index);
} else {
return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
}
@@ -711,16 +709,6 @@ void CodeGeneratorARM::Move64(Location destination, Location source) {
Location::RegisterLocation(destination.AsRegisterPairLow<Register>()));
} else if (source.IsFpuRegister()) {
UNIMPLEMENTED(FATAL);
- } else if (source.IsQuickParameter()) {
- uint16_t register_index = source.GetQuickParameterRegisterIndex();
- uint16_t stack_index = source.GetQuickParameterStackIndex();
- InvokeDexCallingConvention calling_convention;
- EmitParallelMoves(
- Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
- Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
- Location::StackSlot(
- calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()),
- Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()));
} else {
// No conflict possible, so just do the moves.
DCHECK(source.IsDoubleStackSlot());
@@ -741,22 +729,6 @@ void CodeGeneratorARM::Move64(Location destination, Location source) {
} else {
UNIMPLEMENTED(FATAL);
}
- } else if (destination.IsQuickParameter()) {
- InvokeDexCallingConvention calling_convention;
- uint16_t register_index = destination.GetQuickParameterRegisterIndex();
- uint16_t stack_index = destination.GetQuickParameterStackIndex();
- if (source.IsRegisterPair()) {
- UNIMPLEMENTED(FATAL);
- } else if (source.IsFpuRegister()) {
- UNIMPLEMENTED(FATAL);
- } else {
- DCHECK(source.IsDoubleStackSlot());
- EmitParallelMoves(
- Location::StackSlot(source.GetStackIndex()),
- Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
- Location::StackSlot(source.GetHighStackIndex(kArmWordSize)),
- Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1)));
- }
} else {
DCHECK(destination.IsDoubleStackSlot());
if (source.IsRegisterPair()) {
@@ -769,17 +741,6 @@ void CodeGeneratorARM::Move64(Location destination, Location source) {
__ StoreToOffset(kStoreWordPair, source.AsRegisterPairLow<Register>(),
SP, destination.GetStackIndex());
}
- } else if (source.IsQuickParameter()) {
- InvokeDexCallingConvention calling_convention;
- uint16_t register_index = source.GetQuickParameterRegisterIndex();
- uint16_t stack_index = source.GetQuickParameterStackIndex();
- // Just move the low part. The only time a source is a quick parameter is
- // when moving the parameter to its stack locations. And the (Java) caller
- // of this method has already done that.
- __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(register_index),
- SP, destination.GetStackIndex());
- DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(),
- static_cast<size_t>(destination.GetHighStackIndex(kArmWordSize)));
} else if (source.IsFpuRegisterPair()) {
__ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()),
SP,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index d377cb57c1..04e36cc58a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -643,9 +643,10 @@ void CodeGeneratorX86::Move64(Location destination, Location source) {
DCHECK(source.IsDoubleStackSlot());
EmitParallelMoves(
Location::StackSlot(source.GetStackIndex()),
- Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
+ Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index)),
Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1)));
+ __ movl(calling_convention.GetRegisterAt(register_index), Address(ESP, source.GetStackIndex()));
}
} else if (destination.IsFpuRegister()) {
if (source.IsDoubleStackSlot()) {
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 9db1646f5f..4bec70acd6 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -304,14 +304,9 @@ class QuickArgumentVisitor {
}
uint64_t ReadSplitLongParam() const {
- DCHECK(IsSplitLongOrDouble());
- // Read low half from register.
- uint64_t low_half = *reinterpret_cast<uint32_t*>(GetParamAddress());
- // Read high half from the stack. As current stack_index_ indexes the argument, the high part
- // index should be (stack_index_ + 1).
- uint64_t high_half = *reinterpret_cast<uint32_t*>(stack_args_
- + (stack_index_ + 1) * kBytesStackArgLocation);
- return (low_half & 0xffffffffULL) | (high_half << 32);
+ // The splitted long is always available through the stack.
+ return *reinterpret_cast<uint64_t*>(stack_args_
+ + stack_index_ * kBytesStackArgLocation);
}
void VisitArguments() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {