diff options
author | Mark Mendell <mark.p.mendell@intel.com> | 2014-04-29 16:55:20 -0400 |
---|---|---|
committer | buzbee <buzbee@google.com> | 2014-05-16 11:04:27 -0700 |
commit | d65c51a556e6649db4e18bd083c8fec37607a442 (patch) | |
tree | 97fcb17ae74a587c6ef756dda6f4b03db5e9950f /compiler | |
parent | 1e97c4a4ab9f17d1394b952882d59d894b1e3c74 (diff) | |
download | art-d65c51a556e6649db4e18bd083c8fec37607a442.tar.gz art-d65c51a556e6649db4e18bd083c8fec37607a442.tar.bz2 art-d65c51a556e6649db4e18bd083c8fec37607a442.zip |
ART: Add support for constant vector literals
Add in some vector instructions. Implement the ConstVector
instruction, which takes 4 words of data and loads it into
an XMM register.
Initially, only the ConstVector MIR opcode is implemented. Others will
be added after this one goes in.
Change-Id: I5c79bc8b7de9030ef1c213fc8b227debc47f6337
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/dex/compiler_enums.h | 98 | ||||
-rw-r--r-- | compiler/dex/mir_graph.cc | 50 | ||||
-rw-r--r-- | compiler/dex/quick/codegen_util.cc | 4 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.cc | 11 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 8 | ||||
-rw-r--r-- | compiler/dex/quick/x86/assemble_x86.cc | 25 | ||||
-rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 32 | ||||
-rw-r--r-- | compiler/dex/quick/x86/target_x86.cc | 123 | ||||
-rw-r--r-- | compiler/dex/quick/x86/utility_x86.cc | 3 | ||||
-rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 2 |
10 files changed, 334 insertions, 22 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 05ab8ca3fb..5b4492f48c 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -126,6 +126,104 @@ enum ExtendedMIROpcode { kMirOpCheck, kMirOpCheckPart2, kMirOpSelect, + + // Vector opcodes: + // TypeSize is an encoded field giving the element type and the vector size. + // It is encoded as OpSize << 16 | (number of bits in vector) + // + // Destination and source are integers that will be interpreted by the + // backend that supports Vector operations. Backends are permitted to support only + // certain vector register sizes. + // + // At this point, only two operand instructions are supported. Three operand instructions + // could be supported by using a bit in TypeSize and arg[0] where needed. + + // @brief MIR to move constant data to a vector register + // vA: number of bits in register + // vB: destination + // args[0]~args[3]: up to 128 bits of data for initialization + kMirOpConstVector, + + // @brief MIR to move a vectorized register to another + // vA: TypeSize + // vB: destination + // vC: source + kMirOpMoveVector, + + // @brief Packed multiply of units in two vector registers: vB = vB .* vC using vA to know the type of the vector. + // vA: TypeSize + // vB: destination and source + // vC: source + kMirOpPackedMultiply, + + // @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector. + // vA: TypeSize + // vB: destination and source + // vC: source + kMirOpPackedAddition, + + // @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector. + // vA: TypeSize + // vB: destination and source + // vC: source + kMirOpPackedSubtract, + + // @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector. + // vA: TypeSize + // vB: destination and source + // vC: immediate + kMirOpPackedShiftLeft, + + // @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector. + // vA: TypeSize + // vB: destination and source + // vC: immediate + kMirOpPackedSignedShiftRight, + + // @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector. + // vA: TypeSize + // vB: destination and source + // vC: immediate + kMirOpPackedUnsignedShiftRight, + + // @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector. + // vA: TypeSize + // vB: destination and source + // vC: source + kMirOpPackedAnd, + + // @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector. + // vA: TypeSize + // vB: destination and source + // vC: source + kMirOpPackedOr, + + // @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector. + // vA: TypeSize + // vB: destination and source + // vC: source + kMirOpPackedXor, + + // @brief Reduce a 128-bit packed element into a single VR by taking lower bits + // @details Instruction does a horizontal addition of the packed elements and then adds it to VR + // vA: TypeSize + // vB: destination and source VR (not vector register) + // vC: source (vector register) + kMirOpPackedAddReduce, + + // @brief Extract a packed element into a single VR. + // vA: TypeSize + // vB: destination VR (not vector register) + // vC: source (vector register) + // arg[0]: The index to use for extraction from vector register (which packed element) + kMirOpPackedReduce, + + // @brief Create a vector value, with all TypeSize values equal to vC + // vA: TypeSize + // vB: destination vector register + // vC: source VR (not vector register) + kMirOpPackedSet, + kMirOpLast, }; diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index ca90a833cc..ba4224ea78 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -45,6 +45,20 @@ const char* MIRGraph::extended_mir_op_names_[kMirOpLast - kMirOpFirst] = { "Check1", "Check2", "Select", + "ConstVector", + "MoveVector", + "PackedMultiply", + "PackedAddition", + "PackedSubtract", + "PackedShiftLeft", + "PackedSignedShiftRight", + "PackedUnsignedShiftRight", + "PackedAnd", + "PackedOr", + "PackedXor", + "PackedAddReduce", + "PackedReduce", + "PackedSet", }; MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) @@ -798,13 +812,35 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff bb->first_mir_insn ? " | " : " "); for (mir = bb->first_mir_insn; mir; mir = mir->next) { int opcode = mir->dalvikInsn.opcode; - fprintf(file, " {%04x %s %s %s\\l}%s\\\n", mir->offset, - mir->ssa_rep ? GetDalvikDisassembly(mir) : - (opcode < kMirOpFirst) ? Instruction::Name(mir->dalvikInsn.opcode) : - extended_mir_op_names_[opcode - kMirOpFirst], - (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ", - (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ", - mir->next ? " | " : " "); + if (opcode > kMirOpSelect && opcode < kMirOpLast) { + if (opcode == kMirOpConstVector) { + fprintf(file, " {%04x %s %d %d %d %d %d %d\\l}%s\\\n", mir->offset, + extended_mir_op_names_[kMirOpConstVector - kMirOpFirst], + mir->dalvikInsn.vA, + mir->dalvikInsn.vB, + mir->dalvikInsn.arg[0], + mir->dalvikInsn.arg[1], + mir->dalvikInsn.arg[2], + mir->dalvikInsn.arg[3], + mir->next ? " | " : " "); + } else { + fprintf(file, " {%04x %s %d %d %d\\l}%s\\\n", mir->offset, + extended_mir_op_names_[opcode - kMirOpFirst], + mir->dalvikInsn.vA, + mir->dalvikInsn.vB, + mir->dalvikInsn.vC, + mir->next ? " | " : " "); + } + } else { + fprintf(file, " {%04x %s %s %s\\l}%s\\\n", mir->offset, + mir->ssa_rep ? GetDalvikDisassembly(mir) : + (opcode < kMirOpFirst) ? + Instruction::Name(mir->dalvikInsn.opcode) : + extended_mir_op_names_[opcode - kMirOpFirst], + (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ", + (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ", + mir->next ? " | " : " "); + } } fprintf(file, " }\"];\n\n"); } else if (bb->block_type == kExceptionHandling) { diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 784dfaf96a..6f812384e4 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1203,4 +1203,8 @@ RegLocation Mir2Lir::NarrowRegLoc(RegLocation loc) { return loc; } +void Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) { + LOG(FATAL) << "Unknown MIR opcode not supported on this architecture"; +} + } // namespace art diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 2c4ca8885a..10c245976a 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -975,7 +975,18 @@ void Mir2Lir::HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir) { case kMirOpSelect: GenSelect(bb, mir); break; + case kMirOpPhi: + case kMirOpNop: + case kMirOpNullCheck: + case kMirOpRangeCheck: + case kMirOpDivZeroCheck: + case kMirOpCheck: + case kMirOpCheckPart2: + // Ignore these known opcodes + break; default: + // Give the backends a chance to handle unknown extended MIR opcodes. + GenMachineSpecificExtendedMethodMIR(bb, mir); break; } } diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 507da0e458..3201b60d12 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -1180,6 +1180,14 @@ class Mir2Lir : public Backend { virtual void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) = 0; virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0; + /* + * @brief Handle Machine Specific MIR Extended opcodes. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is not standard extended MIR. + * @note Base class implementation will abort for unknown opcodes. + */ + virtual void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir); + /** * @brief Lowers the kMirOpSelect MIR into LIR. * @param bb The basic block in which the MIR is from. diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index c0c60d779e..92001065d9 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -320,6 +320,11 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0 }, "FstpsM", "[!0r,!1d]" }, { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" }, + EXT_0F_ENCODING_MAP(Mova128, 0x66, 0x6F, REG_DEF0), + { kX86Mova128MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128MR", "[!0r+!1d],!2r" }, + { kX86Mova128AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128AR", "[!0r+!1r<<!2d+!3d],!4r" }, + + EXT_0F_ENCODING_MAP(Movups, 0x0, 0x10, REG_DEF0), { kX86MovupsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" }, { kX86MovupsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" }, @@ -1508,6 +1513,26 @@ int X86Mir2Lir::AssignInsnOffsets() { void X86Mir2Lir::AssignOffsets() { int offset = AssignInsnOffsets(); + if (const_vectors_ != nullptr) { + /* assign offsets to vector literals */ + + // First, get offset to 12 mod 16 to align to 16 byte boundary. + // This will ensure that the vector is 16 byte aligned, as the procedure is + // always aligned at at 4 mod 16. + int align_size = (16-4) - (offset & 0xF); + if (align_size < 0) { + align_size += 16; + } + + offset += align_size; + + // Now assign each literal the right offset. + for (LIR *p = const_vectors_; p != nullptr; p = p->next) { + p->offset = offset; + offset += 16; + } + } + /* Const values have to be word aligned */ offset = RoundUp(offset, 4); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 47d17925a4..cc0e1f23c0 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -408,6 +408,22 @@ class X86Mir2Lir FINAL : public Mir2Lir { bool GenInlinedIndexOf(CallInfo* info, bool zero_based); /* + * @brief Load 128 bit constant into vector register. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector + * @note vA is the TypeSize for the register. + * @note vB is the destination XMM register. arg[0..3] are 32 bit constant values. + */ + void GenConst128(BasicBlock* bb, MIR* mir); + + /* + * @brief Generate code for a vector opcode. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is a non-standard opcode. + */ + void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir); + + /* * @brief Return the correct x86 opcode for the Dex operation * @param op Dex opcode for the operation * @param loc Register location of the operand @@ -613,6 +629,22 @@ class X86Mir2Lir FINAL : public Mir2Lir { // 64-bit mode bool gen64bit_; + + // The list of const vector literals. + LIR *const_vectors_; + + /* + * @brief Search for a matching vector literal + * @param mir A kMirOpConst128b MIR instruction to match. + * @returns pointer to matching LIR constant, or nullptr if not found. + */ + LIR *ScanVectorLiteral(MIR *mir); + + /* + * @brief Add a constant vector literal + * @param mir A kMirOpConst128b MIR instruction to match. + */ + LIR *AddVectorLiteral(MIR *mir); }; } // namespace art diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 2e6bfdeff5..237c68c188 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -641,13 +641,15 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* method_address_insns_(arena, 100, kGrowableArrayMisc), class_type_address_insns_(arena, 100, kGrowableArrayMisc), call_method_insns_(arena, 100, kGrowableArrayMisc), - stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit) { + stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit), + const_vectors_(nullptr) { + store_method_addr_used_ = false; if (kIsDebugBuild) { for (int i = 0; i < kX86Last; i++) { if (X86Mir2Lir::EncodingMap[i].opcode != i) { LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name - << " is wrong: expecting " << i << ", seeing " - << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode); + << " is wrong: expecting " << i << ", seeing " + << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode); } } } @@ -838,12 +840,46 @@ LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, Invok return call; } +/* + * @brief Enter a 32 bit quantity into a buffer + * @param buf buffer. + * @param data Data value. + */ + +static void PushWord(std::vector<uint8_t>&buf, int32_t data) { + buf.push_back(data & 0xff); + buf.push_back((data >> 8) & 0xff); + buf.push_back((data >> 16) & 0xff); + buf.push_back((data >> 24) & 0xff); +} + void X86Mir2Lir::InstallLiteralPools() { // These are handled differently for x86. DCHECK(code_literal_list_ == nullptr); DCHECK(method_literal_list_ == nullptr); DCHECK(class_literal_list_ == nullptr); + // Align to 16 byte boundary. We have implicit knowledge that the start of the method is + // on a 4 byte boundary. How can I check this if it changes (other than aligned loads + // will fail at runtime)? + if (const_vectors_ != nullptr) { + int align_size = (16-4) - (code_buffer_.size() & 0xF); + if (align_size < 0) { + align_size += 16; + } + + while (align_size > 0) { + code_buffer_.push_back(0); + align_size--; + } + for (LIR *p = const_vectors_; p != nullptr; p = p->next) { + PushWord(code_buffer_, p->operands[0]); + PushWord(code_buffer_, p->operands[1]); + PushWord(code_buffer_, p->operands[2]); + PushWord(code_buffer_, p->operands[3]); + } + } + // Handle the fixups for methods. for (uint32_t i = 0; i < method_address_insns_.Size(); i++) { LIR* p = method_address_insns_.Get(i); @@ -1074,18 +1110,6 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { } /* - * @brief Enter a 32 bit quantity into the FDE buffer - * @param buf FDE buffer. - * @param data Data value. - */ -static void PushWord(std::vector<uint8_t>&buf, int data) { - buf.push_back(data & 0xff); - buf.push_back((data >> 8) & 0xff); - buf.push_back((data >> 16) & 0xff); - buf.push_back((data >> 24) & 0xff); -} - -/* * @brief Enter an 'advance LOC' into the FDE buffer * @param buf FDE buffer. * @param increment Amount by which to increase the current location. @@ -1235,4 +1259,73 @@ std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() { return cfi_info; } +void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) { + switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) { + case kMirOpConstVector: + GenConst128(bb, mir); + break; + default: + break; + } +} + +void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) { + int type_size = mir->dalvikInsn.vA; + // We support 128 bit vectors. + DCHECK_EQ(type_size & 0xFFFF, 128); + int reg = mir->dalvikInsn.vB; + DCHECK_LT(reg, 8); + uint32_t *args = mir->dalvikInsn.arg; + // Check for all 0 case. + if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) { + NewLIR2(kX86XorpsRR, reg, reg); + return; + } + // Okay, load it from the constant vector area. + LIR *data_target = ScanVectorLiteral(mir); + if (data_target == nullptr) { + data_target = AddVectorLiteral(mir); + } + + // Address the start of the method. + RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); + rl_method = LoadValue(rl_method, kCoreReg); + + // Load the proper value from the literal area. + // We don't know the proper offset for the value, so pick one that will force + // 4 byte offset. We will fix this up in the assembler later to have the right + // value. + LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(), 256 /* bogus */); + load->flags.fixup = kFixupLoad; + load->target = data_target; + SetMemRefType(load, true, kLiteral); +} + +LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) { + int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg); + for (LIR *p = const_vectors_; p != nullptr; p = p->next) { + if (args[0] == p->operands[0] && args[1] == p->operands[1] && + args[2] == p->operands[2] && args[3] == p->operands[3]) { + return p; + } + } + return nullptr; +} + +LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) { + LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData)); + int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg); + new_value->operands[0] = args[0]; + new_value->operands[1] = args[1]; + new_value->operands[2] = args[2]; + new_value->operands[3] = args[3]; + new_value->next = const_vectors_; + if (const_vectors_ == nullptr) { + estimated_native_code_size_ += 12; // Amount needed to align to 16 byte boundary. + } + estimated_native_code_size_ += 16; // Space for one vector. + const_vectors_ = new_value; + return new_value; +} + } // namespace art diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index fb85318409..e9592a6ec4 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -866,6 +866,9 @@ void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir) { case kMirOpFusedCmpgDouble: AnalyzeFPInstruction(opcode, bb, mir); break; + case kMirOpConstVector: + store_method_addr_ = true; + break; default: // Ignore the rest. break; diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index c8c2542ffb..adfed0c4f3 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -425,6 +425,8 @@ enum X86OpCode { kX86Fild64M, // push 64-bit integer on x87 stack kX86Fstp32M, // pop top x87 fp stack and do 32-bit store kX86Fstp64M, // pop top x87 fp stack and do 64-bit store + Binary0fOpCode(kX86Mova128), // move 128 bits aligned + kX86Mova128MR, kX86Mova128AR, // store 128 bit aligned from xmm1 to m128 Binary0fOpCode(kX86Movups), // load unaligned packed single FP values from xmm2/m128 to xmm1 kX86MovupsMR, kX86MovupsAR, // store unaligned packed single FP values from xmm1 to m128 Binary0fOpCode(kX86Movaps), // load aligned packed single FP values from xmm2/m128 to xmm1 |