diff options
author | Roland Levillain <rpl@google.com> | 2015-04-20 15:14:36 +0100 |
---|---|---|
committer | Roland Levillain <rpl@google.com> | 2015-04-20 15:53:12 +0100 |
commit | 232ade0b9401404ad4b61b1003551b58b96195a8 (patch) | |
tree | 54fe7cc37674246dead84f883a4c8be2123e7d26 /compiler/optimizing/code_generator_x86.cc | |
parent | 2e0f89b1b61685f7c322a4c6ec3e3b4839e76d64 (diff) | |
download | art-232ade0b9401404ad4b61b1003551b58b96195a8.tar.gz art-232ade0b9401404ad4b61b1003551b58b96195a8.tar.bz2 art-232ade0b9401404ad4b61b1003551b58b96195a8.zip |
Revert "Revert "Optimizing: Fix long-to-fp conversion on x86.""
This reverts commit 386ce406f150645158d6067c4e0a36565aefc44f.
Bug: 20413424
Change-Id: I6e93ff132907f2653f1ae12d6676ff2298f62ca1
Diffstat (limited to 'compiler/optimizing/code_generator_x86.cc')
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 158 |
1 files changed, 85 insertions, 73 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0f1175563e..a1475000ee 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1547,10 +1547,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimLong: // Processing a Dex `long-to-float' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::Any()); break; case Primitive::kPrimDouble: @@ -1580,10 +1578,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimLong: // Processing a Dex `long-to-double' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::Any()); break; case Primitive::kPrimFloat: @@ -1804,37 +1800,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimLong: { // Processing a Dex `long-to-float' instruction. - Register low = in.AsRegisterPairLow<Register>(); - Register high = in.AsRegisterPairHigh<Register>(); - XmmRegister result = out.AsFpuRegister<XmmRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - - // Operations use doubles for precision reasons (each 32-bit - // half of a long fits in the 53-bit mantissa of a double, - // but not in the 24-bit mantissa of a float). This is - // especially important for the low bits. The result is - // eventually converted to float. - - // low = low - 2^31 (to prevent bit 31 of `low` to be - // interpreted as a sign bit) - __ subl(low, Immediate(0x80000000)); - // temp = int-to-double(high) - __ cvtsi2sd(temp, high); - // temp = temp * 2^32 - __ LoadLongConstant(constant, k2Pow32EncodingForDouble); - __ mulsd(temp, constant); - // result = int-to-double(low) - __ cvtsi2sd(result, low); - // result = result + 2^31 (restore the original value of `low`) - __ LoadLongConstant(constant, k2Pow31EncodingForDouble); - __ addsd(result, constant); - // result = result + temp - __ addsd(result, temp); - // result = double-to-float(result) - __ cvtsd2ss(result, result); - // Restore low. - __ addl(low, Immediate(0x80000000)); + size_t adjustment = 0; + + // Create stack space for the call to + // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below. + // TODO: enhance register allocator to ask for stack temporaries. + if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) { + adjustment = Primitive::ComponentSize(Primitive::kPrimLong); + __ subl(ESP, Immediate(adjustment)); + } + + // Load the value to the FP stack, using temporaries if needed. + PushOntoFPStack(in, 0, adjustment, false, true); + + if (out.IsStackSlot()) { + __ fstps(Address(ESP, out.GetStackIndex() + adjustment)); + } else { + __ fstps(Address(ESP, 0)); + Location stack_temp = Location::StackSlot(0); + codegen_->Move32(out, stack_temp); + } + + // Remove the temporary stack space we allocated. + if (adjustment != 0) { + __ addl(ESP, Immediate(adjustment)); + } break; } @@ -1863,29 +1853,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimLong: { // Processing a Dex `long-to-double' instruction. - Register low = in.AsRegisterPairLow<Register>(); - Register high = in.AsRegisterPairHigh<Register>(); - XmmRegister result = out.AsFpuRegister<XmmRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - - // low = low - 2^31 (to prevent bit 31 of `low` to be - // interpreted as a sign bit) - __ subl(low, Immediate(0x80000000)); - // temp = int-to-double(high) - __ cvtsi2sd(temp, high); - // temp = temp * 2^32 - __ LoadLongConstant(constant, k2Pow32EncodingForDouble); - __ mulsd(temp, constant); - // result = int-to-double(low) - __ cvtsi2sd(result, low); - // result = result + 2^31 (restore the original value of `low`) - __ LoadLongConstant(constant, k2Pow31EncodingForDouble); - __ addsd(result, constant); - // result = result + temp - __ addsd(result, temp); - // Restore low. - __ addl(low, Immediate(0x80000000)); + size_t adjustment = 0; + + // Create stack space for the call to + // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below. + // TODO: enhance register allocator to ask for stack temporaries. + if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) { + adjustment = Primitive::ComponentSize(Primitive::kPrimLong); + __ subl(ESP, Immediate(adjustment)); + } + + // Load the value to the FP stack, using temporaries if needed. + PushOntoFPStack(in, 0, adjustment, false, true); + + if (out.IsDoubleStackSlot()) { + __ fstpl(Address(ESP, out.GetStackIndex() + adjustment)); + } else { + __ fstpl(Address(ESP, 0)); + Location stack_temp = Location::DoubleStackSlot(0); + codegen_->Move64(out, stack_temp); + } + + // Remove the temporary stack space we allocated. + if (adjustment != 0) { + __ addl(ESP, Immediate(adjustment)); + } break; } @@ -2225,24 +2217,43 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } } -void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, uint32_t temp_offset, - uint32_t stack_adjustment, bool is_float) { +void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, + uint32_t temp_offset, + uint32_t stack_adjustment, + bool is_fp, + bool is_wide) { if (source.IsStackSlot()) { - DCHECK(is_float); - __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + DCHECK(!is_wide); + if (is_fp) { + __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } } else if (source.IsDoubleStackSlot()) { - DCHECK(!is_float); - __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + DCHECK(is_wide); + if (is_fp) { + __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } } else { // Write the value to the temporary location on the stack and load to FP stack. - if (is_float) { + if (!is_wide) { Location stack_temp = Location::StackSlot(temp_offset); codegen_->Move32(stack_temp, source); - __ flds(Address(ESP, temp_offset)); + if (is_fp) { + __ flds(Address(ESP, temp_offset)); + } else { + __ filds(Address(ESP, temp_offset)); + } } else { Location stack_temp = Location::DoubleStackSlot(temp_offset); codegen_->Move64(stack_temp, source); - __ fldl(Address(ESP, temp_offset)); + if (is_fp) { + __ fldl(Address(ESP, temp_offset)); + } else { + __ fildl(Address(ESP, temp_offset)); + } } } } @@ -2261,8 +2272,9 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { __ subl(ESP, Immediate(2 * elem_size)); // Load the values to the FP stack in reverse order, using temporaries if needed. - PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); - PushOntoFPStack(first, 0, 2 * elem_size, is_float); + const bool is_wide = !is_float; + PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide); + PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide); // Loop doing FPREM until we stabilize. Label retry; |