diff options
author | Guillaume Sanchez <guillaumesa@google.com> | 2015-03-30 17:55:45 +0100 |
---|---|---|
committer | Guillaume Sanchez <guillaumesa@google.com> | 2015-04-09 12:24:10 +0100 |
commit | 0f88e87085b7cf6544dadff3f555773966a6853e (patch) | |
tree | ea0ae17c712b995e258f1f749a1b8cd98b1fa34b /compiler | |
parent | c4bd0e6a7f4839ea99222f06979cc2369cb9bf10 (diff) | |
download | android_art-0f88e87085b7cf6544dadff3f555773966a6853e.tar.gz android_art-0f88e87085b7cf6544dadff3f555773966a6853e.tar.bz2 android_art-0f88e87085b7cf6544dadff3f555773966a6853e.zip |
Speedup div/rem by constants on x86 and x86_64
This is done using the algorithms in Hacker's Delight chapter 10.
Change-Id: I7bacefe10067569769ed31a1f7834f796fb41119
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/Android.mk | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_utils.cc | 93 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_utils.h | 25 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 190 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 3 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 309 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 3 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 8 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 1 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64_test.cc | 4 |
10 files changed, 593 insertions, 44 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 904f117a5a..c663fcbf89 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -96,6 +96,7 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/code_generator_arm64.cc \ optimizing/code_generator_x86.cc \ optimizing/code_generator_x86_64.cc \ + optimizing/code_generator_utils.cc \ optimizing/constant_folding.cc \ optimizing/dead_code_elimination.cc \ optimizing/graph_checker.cc \ diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc new file mode 100644 index 0000000000..26cab2ff09 --- /dev/null +++ b/compiler/optimizing/code_generator_utils.cc @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_utils.h" + +#include "base/logging.h" + +void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, + int64_t* magic, int* shift) { + // It does not make sense to calculate magic and shift for zero divisor. + DCHECK_NE(divisor, 0); + + /* According to implementation from H.S.Warren's "Hacker's Delight" (Addison Wesley, 2002) + * Chapter 10 and T,Grablund, P.L.Montogomery's "Division by Invariant Integers Using + * Multiplication" (PLDI 1994). + * The magic number M and shift S can be calculated in the following way: + * Let nc be the most positive value of numerator(n) such that nc = kd - 1, + * where divisor(d) >= 2. + * Let nc be the most negative value of numerator(n) such that nc = kd + 1, + * where divisor(d) <= -2. + * Thus nc can be calculated like: + * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long + * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long + * + * So the shift p is the smallest p satisfying + * 2^p > nc * (d - 2^p % d), where d >= 2 + * 2^p > nc * (d + 2^p % d), where d <= -2. + * + * The magic number M is calcuated by + * M = (2^p + d - 2^p % d) / d, where d >= 2 + * M = (2^p - d - 2^p % d) / d, where d <= -2. + * + * Notice that p is always bigger than or equal to 32 (resp. 64), so we just return 32-p + * (resp. 64 - p) as the shift number S. + */ + + int64_t p = is_long ? 63 : 31; + const uint64_t exp = is_long ? (UINT64_C(1) << 63) : (UINT32_C(1) << 31); + + // Initialize the computations. + uint64_t abs_d = (divisor >= 0) ? divisor : -divisor; + uint64_t tmp = exp + (is_long ? static_cast<uint64_t>(divisor) >> 63 : + static_cast<uint32_t>(divisor) >> 31); + uint64_t abs_nc = tmp - 1 - tmp % abs_d; + uint64_t quotient1 = exp / abs_nc; + uint64_t remainder1 = exp % abs_nc; + uint64_t quotient2 = exp / abs_d; + uint64_t remainder2 = exp % abs_d; + + /* + * To avoid handling both positive and negative divisor, "Hacker's Delight" + * introduces a method to handle these 2 cases together to avoid duplication. + */ + uint64_t delta; + do { + p++; + quotient1 = 2 * quotient1; + remainder1 = 2 * remainder1; + if (remainder1 >= abs_nc) { + quotient1++; + remainder1 = remainder1 - abs_nc; + } + quotient2 = 2 * quotient2; + remainder2 = 2 * remainder2; + if (remainder2 >= abs_d) { + quotient2++; + remainder2 = remainder2 - abs_d; + } + delta = abs_d - remainder2; + } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0)); + + *magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1); + + if (!is_long) { + *magic = static_cast<int>(*magic); + } + + *shift = is_long ? p - 64 : p - 32; +} + diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h new file mode 100644 index 0000000000..742d67565a --- /dev/null +++ b/compiler/optimizing/code_generator_utils.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_ +#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_ + +#include <cstdint> + +// Computes the magic number and the shift needed in the div/rem by constant algorithm +void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift); + +#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_ diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 8d0ca0beb9..dac7221bc4 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -16,6 +16,7 @@ #include "code_generator_x86.h" +#include "code_generator_utils.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" @@ -2222,6 +2223,134 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { __ addl(ESP, Immediate(2 * elem_size)); } + +void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(1).IsConstant()); + + Register out_register = locations->Out().AsRegister<Register>(); + Register input_register = locations->InAt(0).AsRegister<Register>(); + int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ xorl(out_register, out_register); + } else { + __ movl(out_register, input_register); + if (imm == -1) { + __ negl(out_register); + } + } +} + + +void InstructionCodeGeneratorX86::DivByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv()); + + LocationSummary* locations = instruction->GetLocations(); + + Register out_register = locations->Out().AsRegister<Register>(); + Register input_register = locations->InAt(0).AsRegister<Register>(); + int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + + DCHECK(instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))); + Register num = locations->GetTemp(0).AsRegister<Register>(); + + __ leal(num, Address(input_register, std::abs(imm) - 1)); + __ testl(input_register, input_register); + __ cmovl(kGreaterEqual, num, input_register); + int shift = CTZ(imm); + __ sarl(num, Immediate(shift)); + + if (imm < 0) { + __ negl(num); + } + + __ movl(out_register, num); +} + +void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + + Register eax = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + Register num; + Register edx; + + if (instruction->IsDiv()) { + edx = locations->GetTemp(0).AsRegister<Register>(); + num = locations->GetTemp(1).AsRegister<Register>(); + } else { + edx = locations->Out().AsRegister<Register>(); + num = locations->GetTemp(0).AsRegister<Register>(); + } + + DCHECK_EQ(EAX, eax); + DCHECK_EQ(EDX, edx); + if (instruction->IsDiv()) { + DCHECK_EQ(EAX, out); + } else { + DCHECK_EQ(EDX, out); + } + + int64_t magic; + int shift; + CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + + Label ndiv; + Label end; + // If numerator is 0, the result is 0, no computation needed. + __ testl(eax, eax); + __ j(kNotEqual, &ndiv); + + __ xorl(out, out); + __ jmp(&end); + + __ Bind(&ndiv); + + // Save the numerator. + __ movl(num, eax); + + // EAX = magic + __ movl(eax, Immediate(magic)); + + // EDX:EAX = magic * numerator + __ imull(num); + + if (imm > 0 && magic < 0) { + // EDX += num + __ addl(edx, num); + } else if (imm < 0 && magic > 0) { + __ subl(edx, num); + } + + // Shift if needed. + if (shift != 0) { + __ sarl(edx, Immediate(shift)); + } + + // EDX += 1 if EDX < 0 + __ movl(eax, edx); + __ shrl(edx, Immediate(31)); + __ addl(edx, eax); + + if (instruction->IsRem()) { + __ movl(eax, num); + __ imull(edx, Immediate(imm)); + __ subl(eax, edx); + __ movl(edx, eax); + } else { + __ movl(eax, edx); + } + __ Bind(&end); +} + void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); @@ -2233,28 +2362,42 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr switch (instruction->GetResultType()) { case Primitive::kPrimInt: { - Register second_reg = second.AsRegister<Register>(); DCHECK_EQ(EAX, first.AsRegister<Register>()); DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>()); - SlowPathCodeX86* slow_path = + if (second.IsConstant()) { + int imm = second.GetConstant()->AsIntConstant()->GetValue(); + + if (imm == 0) { + // Do not generate anything for 0. DivZeroCheck would forbid any generated code. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (is_div && IsPowerOfTwo(std::abs(imm))) { + DivByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } + } else { + SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(), - is_div); - codegen_->AddSlowPath(slow_path); + is_div); + codegen_->AddSlowPath(slow_path); - // 0x80000000/-1 triggers an arithmetic exception! - // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so - // it's safe to just use negl instead of more complex comparisons. - - __ cmpl(second_reg, Immediate(-1)); - __ j(kEqual, slow_path->GetEntryLabel()); + Register second_reg = second.AsRegister<Register>(); + // 0x80000000/-1 triggers an arithmetic exception! + // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so + // it's safe to just use negl instead of more complex comparisons. - // edx:eax <- sign-extended of eax - __ cdq(); - // eax = quotient, edx = remainder - __ idivl(second_reg); + __ cmpl(second_reg, Immediate(-1)); + __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // edx:eax <- sign-extended of eax + __ cdq(); + // eax = quotient, edx = remainder + __ idivl(second_reg); + __ Bind(slow_path->GetExitLabel()); + } break; } @@ -2294,10 +2437,16 @@ void LocationsBuilderX86::VisitDiv(HDiv* div) { switch (div->GetResultType()) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RegisterLocation(EAX)); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); // Intel uses edx:eax as the dividend. locations->AddTemp(Location::RegisterLocation(EDX)); + // We need to save the numerator while we tweak eax and edx. As we are using imul in a way + // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as + // output and request another temp. + if (div->InputAt(1)->IsConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } break; } case Primitive::kPrimLong: { @@ -2355,6 +2504,7 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { void LocationsBuilderX86::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); + LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong) ? LocationSummary::kCall : LocationSummary::kNoCall; @@ -2363,8 +2513,14 @@ void LocationsBuilderX86::VisitRem(HRem* rem) { switch (type) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RegisterLocation(EAX)); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RegisterLocation(EDX)); + // We need to save the numerator while we tweak eax and edx. As we are using imul in a way + // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as + // output and request another temp. + if (rem->InputAt(1)->IsConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } break; } case Primitive::kPrimLong: { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 6a4d42dd01..a78f56450b 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -163,6 +163,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateRemFP(HRem *rem); void HandleShift(HBinaryOperation* instruction); void GenerateShlLong(const Location& loc, Register shifter); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ef60280016..1f24046c5e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -16,6 +16,7 @@ #include "code_generator_x86_64.h" +#include "code_generator_utils.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" @@ -2203,6 +2204,228 @@ void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { __ addq(CpuRegister(RSP), Immediate(2 * elem_size)); } +void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); + CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>(); + int64_t imm; + if (second.GetConstant()->IsLongConstant()) { + imm = second.GetConstant()->AsLongConstant()->GetValue(); + } else { + imm = second.GetConstant()->AsIntConstant()->GetValue(); + } + + DCHECK(imm == 1 || imm == -1); + + switch (instruction->GetResultType()) { + case Primitive::kPrimInt: { + if (instruction->IsRem()) { + __ xorl(output_register, output_register); + } else { + __ movl(output_register, input_register); + if (imm == -1) { + __ negl(output_register); + } + } + break; + } + + case Primitive::kPrimLong: { + if (instruction->IsRem()) { + __ xorq(output_register, output_register); + } else { + __ movq(output_register, input_register); + if (imm == -1) { + __ negq(output_register); + } + } + break; + } + + default: + LOG(FATAL) << "Unreachable"; + } +} + +void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + + CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); + CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); + + int64_t imm; + if (instruction->GetResultType() == Primitive::kPrimLong) { + imm = second.GetConstant()->AsLongConstant()->GetValue(); + } else { + imm = second.GetConstant()->AsIntConstant()->GetValue(); + } + + DCHECK(IsPowerOfTwo(std::abs(imm))); + + CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); + + if (instruction->GetResultType() == Primitive::kPrimInt) { + __ leal(tmp, Address(numerator, std::abs(imm) - 1)); + __ testl(numerator, numerator); + __ cmov(kGreaterEqual, tmp, numerator); + int shift = CTZ(imm); + __ sarl(tmp, Immediate(shift)); + + if (imm < 0) { + __ negl(tmp); + } + + __ movl(output_register, tmp); + } else { + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); + + __ movq(rdx, Immediate(std::abs(imm) - 1)); + __ addq(rdx, numerator); + __ testq(numerator, numerator); + __ cmov(kGreaterEqual, rdx, numerator); + int shift = CTZ(imm); + __ sarq(rdx, Immediate(shift)); + + if (imm < 0) { + __ negq(rdx); + } + + __ movq(output_register, rdx); + } +} + +void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + + CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>() + : locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>() + : locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + DCHECK_EQ(RAX, eax.AsRegister()); + DCHECK_EQ(RDX, edx.AsRegister()); + if (instruction->IsDiv()) { + DCHECK_EQ(RAX, out.AsRegister()); + } else { + DCHECK_EQ(RDX, out.AsRegister()); + } + + int64_t magic; + int shift; + + // TODO: can these branch be written as one? + if (instruction->GetResultType() == Primitive::kPrimInt) { + int imm = second.GetConstant()->AsIntConstant()->GetValue(); + + CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + + __ movl(numerator, eax); + + Label no_div; + Label end; + __ testl(eax, eax); + __ j(kNotEqual, &no_div); + + __ xorl(out, out); + __ jmp(&end); + + __ Bind(&no_div); + + __ movl(eax, Immediate(magic)); + __ imull(numerator); + + if (imm > 0 && magic < 0) { + __ addl(edx, numerator); + } else if (imm < 0 && magic > 0) { + __ subl(edx, numerator); + } + + if (shift != 0) { + __ sarl(edx, Immediate(shift)); + } + + __ movl(eax, edx); + __ shrl(edx, Immediate(31)); + __ addl(edx, eax); + + if (instruction->IsRem()) { + __ movl(eax, numerator); + __ imull(edx, Immediate(imm)); + __ subl(eax, edx); + __ movl(edx, eax); + } else { + __ movl(eax, edx); + } + __ Bind(&end); + } else { + int64_t imm = second.GetConstant()->AsLongConstant()->GetValue(); + + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + + CpuRegister rax = eax; + CpuRegister rdx = edx; + + CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift); + + // Save the numerator. + __ movq(numerator, rax); + + // RAX = magic + __ movq(rax, Immediate(magic)); + + // RDX:RAX = magic * numerator + __ imulq(numerator); + + if (imm > 0 && magic < 0) { + // RDX += numeratorerator + __ addq(rdx, numerator); + } else if (imm < 0 && magic > 0) { + // RDX -= numerator + __ subq(rdx, numerator); + } + + // Shift if needed. + if (shift != 0) { + __ sarq(rdx, Immediate(shift)); + } + + // RDX += 1 if RDX < 0 + __ movq(rax, rdx); + __ shrq(rdx, Immediate(63)); + __ addq(rdx, rax); + + if (instruction->IsRem()) { + __ movq(rax, numerator); + + if (IsInt<32>(imm)) { + __ imulq(rdx, Immediate(static_cast<int32_t>(imm))); + } else { + __ movq(numerator, Immediate(imm)); + __ imulq(rdx, numerator); + } + + __ subq(rax, rdx); + __ movq(rdx, rax); + } else { + __ movq(rax, rdx); + } + } +} + void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); Primitive::Type type = instruction->GetResultType(); @@ -2211,37 +2434,57 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in bool is_div = instruction->IsDiv(); LocationSummary* locations = instruction->GetLocations(); - CpuRegister out_reg = locations->Out().AsRegister<CpuRegister>(); - CpuRegister second_reg = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location second = locations->InAt(1); DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister()); - DCHECK_EQ(is_div ? RAX : RDX, out_reg.AsRegister()); + DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister()); - SlowPathCodeX86_64* slow_path = - new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64( - out_reg.AsRegister(), type, is_div); - codegen_->AddSlowPath(slow_path); + if (second.IsConstant()) { + int64_t imm; + if (second.GetConstant()->AsLongConstant()) { + imm = second.GetConstant()->AsLongConstant()->GetValue(); + } else { + imm = second.GetConstant()->AsIntConstant()->GetValue(); + } - // 0x80000000(00000000)/-1 triggers an arithmetic exception! - // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000) - // so it's safe to just use negl instead of more complex comparisons. - if (type == Primitive::kPrimInt) { - __ cmpl(second_reg, Immediate(-1)); - __ j(kEqual, slow_path->GetEntryLabel()); - // edx:eax <- sign-extended of eax - __ cdq(); - // eax = quotient, edx = remainder - __ idivl(second_reg); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) { + DivByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } } else { - __ cmpq(second_reg, Immediate(-1)); - __ j(kEqual, slow_path->GetEntryLabel()); - // rdx:rax <- sign-extended of rax - __ cqo(); - // rax = quotient, rdx = remainder - __ idivq(second_reg); - } + SlowPathCodeX86_64* slow_path = + new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64( + out.AsRegister(), type, is_div); + codegen_->AddSlowPath(slow_path); - __ Bind(slow_path->GetExitLabel()); + CpuRegister second_reg = second.AsRegister<CpuRegister>(); + // 0x80000000(00000000)/-1 triggers an arithmetic exception! + // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000) + // so it's safe to just use negl instead of more complex comparisons. + if (type == Primitive::kPrimInt) { + __ cmpl(second_reg, Immediate(-1)); + __ j(kEqual, slow_path->GetEntryLabel()); + // edx:eax <- sign-extended of eax + __ cdq(); + // eax = quotient, edx = remainder + __ idivl(second_reg); + } else { + __ cmpq(second_reg, Immediate(-1)); + __ j(kEqual, slow_path->GetEntryLabel()); + // rdx:rax <- sign-extended of rax + __ cqo(); + // rax = quotient, rdx = remainder + __ idivq(second_reg); + } + __ Bind(slow_path->GetExitLabel()); + } } void LocationsBuilderX86_64::VisitDiv(HDiv* div) { @@ -2251,10 +2494,16 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) { case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RegisterLocation(RAX)); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); // Intel uses edx:eax as the dividend. locations->AddTemp(Location::RegisterLocation(RDX)); + // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way + // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as + // output and request another temp. + if (div->InputAt(1)->IsConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } break; } @@ -2309,9 +2558,15 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) { case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RegisterLocation(RAX)); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); // Intel uses rdx:rax as the dividend and puts the remainder in rdx locations->SetOut(Location::RegisterLocation(RDX)); + // We need to save the numerator while we tweak eax and edx. As we are using imul in a way + // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as + // output and request another temp. + if (rem->InputAt(1)->IsConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } break; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index a380b6a04c..03bf0b8f98 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -173,6 +173,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateRemFP(HRem *rem); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleShift(HBinaryOperation* operation); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index bd155ed788..b8c757c05d 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1597,6 +1597,14 @@ void X86_64Assembler::imull(CpuRegister reg) { } +void X86_64Assembler::imulq(CpuRegister reg) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitRex64(reg); + EmitUint8(0xF7); + EmitOperand(5, Operand(reg)); +} + + void X86_64Assembler::imull(const Address& address) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(address); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 495f74f498..e2fd5fbb80 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -465,6 +465,7 @@ class X86_64Assembler FINAL : public Assembler { void imull(CpuRegister reg, const Immediate& imm); void imull(CpuRegister reg, const Address& address); + void imulq(CpuRegister src); void imulq(CpuRegister dst, CpuRegister src); void imulq(CpuRegister reg, const Immediate& imm); void imulq(CpuRegister reg, const Address& address); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 00f508b23f..c2052c7732 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -269,6 +269,10 @@ TEST_F(AssemblerX86_64Test, AddlImm) { DriverStr(Repeatri(&x86_64::X86_64Assembler::addl, 4U, "add ${imm}, %{reg}"), "addli"); } +TEST_F(AssemblerX86_64Test, ImulqReg1) { + DriverStr(RepeatR(&x86_64::X86_64Assembler::imulq, "imulq %{reg}"), "imulq"); +} + TEST_F(AssemblerX86_64Test, ImulqRegs) { DriverStr(RepeatRR(&x86_64::X86_64Assembler::imulq, "imulq %{reg2}, %{reg1}"), "imulq"); } |