From fb8d279bc011b31d0765dc7ca59afea324fd0d0c Mon Sep 17 00:00:00 2001 From: Mark Mendell Date: Tue, 31 Mar 2015 22:16:59 -0400 Subject: [optimizing] Implement x86/x86_64 math intrinsics Implement floor/ceil/round/RoundFloat on x86 and x86_64. Implement RoundDouble on x86_64. Add support for roundss and roundsd on both architectures. Support them in the disassembler as well. Add the instruction set features for x86, as the 'round' instruction is only supported if SSE4.1 is supported. Fix the tests to handle the addition of passing the instruction set features to x86 and x86_64. Add assembler tests for roundsd and roundss to x86_64 assembler tests. Change-Id: I9742d5930befb0bbc23f3d6c83ce0183ed9fe04f Signed-off-by: Mark Mendell --- compiler/utils/assembler_test.h | 61 ++++++++++++++++++++++++++ compiler/utils/x86/assembler_x86.cc | 22 ++++++++++ compiler/utils/x86/assembler_x86.h | 3 ++ compiler/utils/x86_64/assembler_x86_64.cc | 24 ++++++++++ compiler/utils/x86_64/assembler_x86_64.h | 3 ++ compiler/utils/x86_64/assembler_x86_64_test.cc | 8 ++++ 6 files changed, 121 insertions(+) (limited to 'compiler/utils') diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index 6f8b3012a4..b13edb68bf 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -123,6 +123,16 @@ class AssemblerTest : public testing::Test { fmt); } + std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&), size_t imm_bytes, std::string fmt) { + return RepeatTemplatedRegistersImm(f, + GetFPRegisters(), + GetFPRegisters(), + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + imm_bytes, + fmt); + } + std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) { return RepeatTemplatedRegisters(f, GetFPRegisters(), @@ -448,6 +458,57 @@ class AssemblerTest : public testing::Test { return str; } + template + std::string RepeatTemplatedRegistersImm(void (Ass::*f)(Reg1, Reg2, const Imm&), + const std::vector reg1_registers, + const std::vector reg2_registers, + std::string (AssemblerTest::*GetName1)(const Reg1&), + std::string (AssemblerTest::*GetName2)(const Reg2&), + size_t imm_bytes, + std::string fmt) { + std::vector imms = CreateImmediateValues(imm_bytes); + WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size()); + + std::string str; + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + for (int64_t imm : imms) { + Imm new_imm = CreateImmediate(imm); + (assembler_.get()->*f)(*reg1, *reg2, new_imm); + std::string base = fmt; + + std::string reg1_string = (this->*GetName1)(*reg1); + size_t reg1_index; + while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { + base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); + } + + std::string reg2_string = (this->*GetName2)(*reg2); + size_t reg2_index; + while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { + base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); + } + + size_t imm_index = base.find(IMM_TOKEN); + if (imm_index != std::string::npos) { + std::ostringstream sreg; + sreg << imm; + std::string imm_string = sreg.str(); + base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + } + } + // Add a newline at the end. + str += "\n"; + return str; + } + template std::string GetRegName(const Reg& reg) { std::ostringstream sreg; diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 5773459ff5..b3a1376727 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -695,6 +695,28 @@ void X86Assembler::ucomisd(XmmRegister a, XmmRegister b) { } +void X86Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x3A); + EmitUint8(0x0B); + EmitXmmRegisterOperand(dst, src); + EmitUint8(imm.value()); +} + + +void X86Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x3A); + EmitUint8(0x0A); + EmitXmmRegisterOperand(dst, src); + EmitUint8(imm.value()); +} + + void X86Assembler::sqrtsd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 6ccf2e365d..bdf88435a4 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -312,6 +312,9 @@ class X86Assembler FINAL : public Assembler { void ucomiss(XmmRegister a, XmmRegister b); void ucomisd(XmmRegister a, XmmRegister b); + void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm); + void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm); + void sqrtsd(XmmRegister dst, XmmRegister src); void sqrtss(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index bd155ed788..e82d90c5c8 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -796,6 +796,30 @@ void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) { } +void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x3A); + EmitUint8(0x0B); + EmitXmmRegisterOperand(dst.LowBits(), src); + EmitUint8(imm.value()); +} + + +void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x3A); + EmitUint8(0x0A); + EmitXmmRegisterOperand(dst.LowBits(), src); + EmitUint8(imm.value()); +} + + void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 495f74f498..39f781cb1c 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -353,6 +353,9 @@ class X86_64Assembler FINAL : public Assembler { void ucomiss(XmmRegister a, XmmRegister b); void ucomisd(XmmRegister a, XmmRegister b); + void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm); + void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm); + void sqrtsd(XmmRegister dst, XmmRegister src); void sqrtss(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 00f508b23f..4402dfcb37 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -692,6 +692,14 @@ TEST_F(AssemblerX86_64Test, Sqrtsd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::sqrtsd, "sqrtsd %{reg2}, %{reg1}"), "sqrtsd"); } +TEST_F(AssemblerX86_64Test, Roundss) { + DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundss, 1, "roundss ${imm}, %{reg2}, %{reg1}"), "roundss"); +} + +TEST_F(AssemblerX86_64Test, Roundsd) { + DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundsd, 1, "roundsd ${imm}, %{reg2}, %{reg1}"), "roundsd"); +} + TEST_F(AssemblerX86_64Test, Xorps) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorps, "xorps %{reg2}, %{reg1}"), "xorps"); } -- cgit v1.2.3