diff options
Diffstat (limited to 'lib/Target/R600/R600Instructions.td')
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 524 |
1 files changed, 387 insertions, 137 deletions
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 64bab18fa6..8242df9440 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -70,6 +70,11 @@ class InstFlag<string PM = "printOperand", int Default = 0> let PrintMethod = PM; } +// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers +def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> { + let PrintMethod = "printSel"; +} + def LITERAL : InstFlag<"printLiteral">; def WRITE : InstFlag <"printWrite", 1>; @@ -86,9 +91,16 @@ def UP : InstFlag <"printUpdatePred">; // default to 0. def LAST : InstFlag<"printLast", 1>; +def FRAMEri : Operand<iPTR> { + let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); +} + def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; +def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; +def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; +def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; class R600ALU_Word0 { field bits<32> Word0; @@ -173,6 +185,55 @@ class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{ let Word1{17-13} = alu_inst; } +class VTX_WORD0 { + field bits<32> Word0; + bits<7> SRC_GPR; + bits<5> VC_INST; + bits<2> FETCH_TYPE; + bits<1> FETCH_WHOLE_QUAD; + bits<8> BUFFER_ID; + bits<1> SRC_REL; + bits<2> SRC_SEL_X; + bits<6> MEGA_FETCH_COUNT; + + let Word0{4-0} = VC_INST; + let Word0{6-5} = FETCH_TYPE; + let Word0{7} = FETCH_WHOLE_QUAD; + let Word0{15-8} = BUFFER_ID; + let Word0{22-16} = SRC_GPR; + let Word0{23} = SRC_REL; + let Word0{25-24} = SRC_SEL_X; + let Word0{31-26} = MEGA_FETCH_COUNT; +} + +class VTX_WORD1_GPR { + field bits<32> Word1; + bits<7> DST_GPR; + bits<1> DST_REL; + bits<3> DST_SEL_X; + bits<3> DST_SEL_Y; + bits<3> DST_SEL_Z; + bits<3> DST_SEL_W; + bits<1> USE_CONST_FIELDS; + bits<6> DATA_FORMAT; + bits<2> NUM_FORMAT_ALL; + bits<1> FORMAT_COMP_ALL; + bits<1> SRF_MODE_ALL; + + let Word1{6-0} = DST_GPR; + let Word1{7} = DST_REL; + let Word1{8} = 0; // Reserved + let Word1{11-9} = DST_SEL_X; + let Word1{14-12} = DST_SEL_Y; + let Word1{17-15} = DST_SEL_Z; + let Word1{20-18} = DST_SEL_W; + let Word1{21} = USE_CONST_FIELDS; + let Word1{27-22} = DATA_FORMAT; + let Word1{29-28} = NUM_FORMAT_ALL; + let Word1{30} = FORMAT_COMP_ALL; + let Word1{31} = SRF_MODE_ALL; +} + /* XXX: R600 subtarget uses a slightly different encoding than the other subtargets. We currently handle this in R600MCCodeEmitter, but we may @@ -214,11 +275,11 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern, InstR600 <0, (outs R600_Reg32:$dst), (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), !strconcat(opName, "$clamp $dst$write$dst_rel$omod, " - "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " + "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -254,13 +315,13 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern, (outs R600_Reg32:$dst), (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), !strconcat(opName, "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " - "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " - "$src1_neg$src1_abs$src1$src1_abs$src1_rel, " + "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " + "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -291,14 +352,14 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern, InstR600 <0, (outs R600_Reg32:$dst), (ins REL:$dst_rel, CLAMP:$clamp, - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, - R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, + R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), !strconcat(opName, "$clamp $dst$dst_rel, " - "$src0_neg$src0$src0_rel, " - "$src1_neg$src1$src1_rel, " - "$src2_neg$src2$src2_rel, " + "$src0_neg$src0$src0_sel$src0_rel, " + "$src1_neg$src1$src1_sel$src1_rel, " + "$src2_neg$src2$src2_sel$src2_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -342,6 +403,27 @@ def TEX_SHADOW : PatLeaf< }] >; +def TEX_RECT : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 5; + }] +>; + +def TEX_ARRAY : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 9 || TType == 10 || TType == 15 || TType == 16; + }] +>; + +def TEX_SHADOW_ARRAY : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 11 || TType == 12 || TType == 17; + }] +>; + class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, dag ins, string asm, list<dag> pattern> : InstR600ISA <outs, ins, asm, pattern> { @@ -414,32 +496,35 @@ def isR600toCayman : Predicate< "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; //===----------------------------------------------------------------------===// -// Interpolation Instructions +// R600 SDNodes //===----------------------------------------------------------------------===// -def INTERP: SDNode<"AMDGPUISD::INTERP", - SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]> - >; +def INTERP_PAIR_XY : AMDGPUShaderInst < + (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1), + (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), + "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1", + []>; -def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0", - SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]> - >; +def INTERP_PAIR_ZW : AMDGPUShaderInst < + (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1), + (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), + "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1", + []>; -let usesCustomInserter = 1 in { -def input_perspective : AMDGPUShaderInst < - (outs R600_Reg128:$dst), - (ins i32imm:$src0, i32imm:$src1), - "input_perspective $src0 $src1 : dst", - [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>; -} // End usesCustomInserter = 1 - -def input_constant : AMDGPUShaderInst < - (outs R600_Reg128:$dst), - (ins i32imm:$src), - "input_perspective $src : dst", - [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>; +def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, + [SDNPMayLoad] +>; +//===----------------------------------------------------------------------===// +// Interpolation Instructions +//===----------------------------------------------------------------------===// +def INTERP_VEC_LOAD : AMDGPUShaderInst < + (outs R600_Reg128:$dst), + (ins i32imm:$src0), + "INTERP_LOAD $src0 : $dst", + []>; def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { let bank_swizzle = 5; @@ -455,7 +540,7 @@ def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; // Export Instructions //===----------------------------------------------------------------------===// -def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>; +def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, [SDNPHasChain, SDNPSideEffect]>; @@ -507,53 +592,59 @@ class ExportBufWord1 { multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), (ExportInst - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 0, 61, 0, 7, 7, 7, cf_inst, 0) >; def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), (ExportInst - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 0, 61, 7, 0, 7, 7, cf_inst, 0) >; - def : Pat<(int_R600_store_pixel_dummy), + def : Pat<(int_R600_store_dummy (i32 imm:$type)), (ExportInst - (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0) + (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0) >; - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, - 0, 1, 2, 3, cf_inst, 0) + def : Pat<(int_R600_store_dummy 1), + (ExportInst + (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) + >; + + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), + (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$base, + imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) >; + } multiclass SteamOutputExportPattern<Instruction ExportInst, bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { // Stream0 - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf0inst, 0)>; // Stream1 - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 2), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf1inst, 0)>; // Stream2 - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 3), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf2inst, 0)>; // Stream3 - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 4), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf3inst, 0)>; } -let isTerminator = 1, usesCustomInserter = 1 in { +let usesCustomInserter = 1 in { class ExportSwzInst : InstR600ISA<( outs), @@ -567,7 +658,7 @@ class ExportSwzInst : InstR600ISA<( let Inst{63-32} = Word1; } -} // End isTerminator = 1, usesCustomInserter = 1 +} // End usesCustomInserter = 1 class ExportBufInst : InstR600ISA<( outs), @@ -580,7 +671,7 @@ class ExportBufInst : InstR600ISA<( let Inst{63-32} = Word1; } -let Predicates = [isR600toCayman] in { +let Predicates = [isR600toCayman] in { //===----------------------------------------------------------------------===// // Common Instructions R600, R700, Evergreen, Cayman @@ -624,6 +715,34 @@ def SNE : R600_2OP < COND_NE))] >; +def SETE_DX10 : R600_2OP < + 0xC, "SETE_DX10", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), + COND_EQ))] +>; + +def SETGT_DX10 : R600_2OP < + 0xD, "SETGT_DX10", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), + COND_GT))] +>; + +def SETGE_DX10 : R600_2OP < + 0xE, "SETGE_DX10", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), + COND_GE))] +>; + +def SETNE_DX10 : R600_2OP < + 0xF, "SETNE_DX10", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), + COND_NE))] +>; + def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; @@ -684,7 +803,7 @@ def SETE_INT : R600_2OP < >; def SETGT_INT : R600_2OP < - 0x3B, "SGT_INT", + 0x3B, "SETGT_INT", [(set (i32 R600_Reg32:$dst), (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] >; @@ -830,8 +949,13 @@ class MUL_LIT_Common <bits<5> inst> : R600_3OP < class MULADD_Common <bits<5> inst> : R600_3OP < inst, "MULADD", + [] +>; + +class MULADD_IEEE_Common <bits<5> inst> : R600_3OP < + inst, "MULADD_IEEE", [(set (f32 R600_Reg32:$dst), - (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] + (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))] >; class CNDE_Common <bits<5> inst> : R600_3OP < @@ -988,6 +1112,7 @@ let Predicates = [isR600] in { def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; def MULADD_r600 : MULADD_Common<0x10>; + def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>; def CNDE_r600 : CNDE_Common<0x18>; def CNDGT_r600 : CNDGT_Common<0x19>; def CNDGE_r600 : CNDGE_Common<0x1A>; @@ -1070,7 +1195,7 @@ let Predicates = [isR700] in { //===----------------------------------------------------------------------===// let Predicates = [isEG] in { - + def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; defm DIV_eg : DIV_Common<RECIP_IEEE_eg>; @@ -1127,6 +1252,7 @@ let Predicates = [isEGorCayman] in { >; def MULADD_eg : MULADD_Common<0x14>; + def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; def ASHR_eg : ASHR_Common<0x15>; def LSHR_eg : LSHR_Common<0x16>; def LSHL_eg : LSHL_Common<0x17>; @@ -1138,6 +1264,10 @@ let Predicates = [isEGorCayman] in { defm DOT4_eg : DOT4_Common<0xBE>; defm CUBE_eg : CUBE_Common<0xC0>; +let hasSideEffects = 1 in { + def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>; +} + def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { @@ -1228,37 +1358,30 @@ def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < >; class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> - : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern> { - - // Operands - bits<7> DST_GPR; - bits<7> SRC_GPR; + : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>, + VTX_WORD1_GPR, VTX_WORD0 { // Static fields - bits<5> VC_INST = 0; - bits<2> FETCH_TYPE = 2; - bits<1> FETCH_WHOLE_QUAD = 0; - bits<8> BUFFER_ID = buffer_id; - bits<1> SRC_REL = 0; + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = buffer_id; + let SRC_REL = 0; // XXX: We can infer this field based on the SRC_GPR. This would allow us // to store vertex addresses in any channel, not just X. - bits<2> SRC_SEL_X = 0; - bits<6> MEGA_FETCH_COUNT; - bits<1> DST_REL = 0; - bits<3> DST_SEL_X; - bits<3> DST_SEL_Y; - bits<3> DST_SEL_Z; - bits<3> DST_SEL_W; + let SRC_SEL_X = 0; + let DST_REL = 0; // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, // however, based on my testing if USE_CONST_FIELDS is set, then all // these fields need to be set to 0. - bits<1> USE_CONST_FIELDS = 0; - bits<6> DATA_FORMAT; - bits<2> NUM_FORMAT_ALL = 1; - bits<1> FORMAT_COMP_ALL = 0; - bits<1> SRF_MODE_ALL = 0; + let USE_CONST_FIELDS = 0; + let NUM_FORMAT_ALL = 1; + let FORMAT_COMP_ALL = 0; + let SRF_MODE_ALL = 0; + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; // LLVM can only encode 64-bit instructions, so these fields are manually // encoded in R600CodeEmitter // @@ -1269,29 +1392,7 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> // bits<1> ALT_CONST = 0; // bits<2> BUFFER_INDEX_MODE = 0; - // VTX_WORD0 - let Inst{4-0} = VC_INST; - let Inst{6-5} = FETCH_TYPE; - let Inst{7} = FETCH_WHOLE_QUAD; - let Inst{15-8} = BUFFER_ID; - let Inst{22-16} = SRC_GPR; - let Inst{23} = SRC_REL; - let Inst{25-24} = SRC_SEL_X; - let Inst{31-26} = MEGA_FETCH_COUNT; - - // VTX_WORD1_GPR - let Inst{38-32} = DST_GPR; - let Inst{39} = DST_REL; - let Inst{40} = 0; // Reserved - let Inst{43-41} = DST_SEL_X; - let Inst{46-44} = DST_SEL_Y; - let Inst{49-47} = DST_SEL_Z; - let Inst{52-50} = DST_SEL_W; - let Inst{53} = USE_CONST_FIELDS; - let Inst{59-54} = DATA_FORMAT; - let Inst{61-60} = NUM_FORMAT_ALL; - let Inst{62} = FORMAT_COMP_ALL; - let Inst{63} = SRF_MODE_ALL; + // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding // is done in R600CodeEmitter @@ -1346,7 +1447,7 @@ class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> // This is not really necessary, but there were some GPU hangs that appeared // to be caused by ALU instructions in the next instruction group that wrote - // to the $ptr registers of the VTX_READ. + // to the $ptr registers of the VTX_READ. // e.g. // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 // %T2_X<def> = MOV %ZERO @@ -1387,6 +1488,10 @@ def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] >; +def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, + [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))] +>; + //===----------------------------------------------------------------------===// // VTX Read from global memory space //===----------------------------------------------------------------------===// @@ -1417,9 +1522,15 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, } +//===----------------------------------------------------------------------===// +// Regist loads and stores - for indirect addressing +//===----------------------------------------------------------------------===// + +defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; + let Predicates = [isCayman] in { -let isVector = 1 in { +let isVector = 1 in { def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; @@ -1476,6 +1587,7 @@ def PRED_X : InstR600 < (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), "", [], NullALU> { let FlagOperandIdx = 3; + let isTerminator = 1; } let isTerminator = 1, isBranch = 1, isBarrier = 1 in { @@ -1502,19 +1614,6 @@ def MASK_WRITE : AMDGPUShaderInst < } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 -def R600_LOAD_CONST : AMDGPUShaderInst < - (outs R600_Reg32:$dst), - (ins i32imm:$src0), - "R600_LOAD_CONST $dst, $src0", - [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] ->; - -def RESERVE_REG : AMDGPUShaderInst < - (outs), - (ins i32imm:$src), - "RESERVE_REG $src", - [(int_AMDGPU_reserve_reg imm:$src)] ->; def TXD: AMDGPUShaderInst < (outs R600_Reg128:$dst), @@ -1540,11 +1639,138 @@ def FNEG_R600 : FNEG<R600_Reg32>; //===---------------------------------------------------------------------===// // Return instruction //===---------------------------------------------------------------------===// -let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { +let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, + usesCustomInserter = 1 in { def RETURN : ILFormat<(outs), (ins variable_ops), "RETURN", [(IL_retflag)]>; } + +//===----------------------------------------------------------------------===// +// Constant Buffer Addressing Support +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { +def CONST_COPY : Instruction { + let OutOperandList = (outs R600_Reg32:$dst); + let InOperandList = (ins i32imm:$src); + let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; + let AsmString = "CONST_COPY"; + let neverHasSideEffects = 1; + let isAsCheapAsAMove = 1; + let Itinerary = NullALU; +} +} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" + +def TEX_VTX_CONSTBUF : + InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", + [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>, + VTX_WORD1_GPR, VTX_WORD0 { + + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = 0; + let SRC_REL = 0; + let SRC_SEL_X = 0; + let DST_REL = 0; + let USE_CONST_FIELDS = 0; + let NUM_FORMAT_ALL = 2; + let FORMAT_COMP_ALL = 1; + let SRF_MODE_ALL = 1; + let MEGA_FETCH_COUNT = 16; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let DATA_FORMAT = 35; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + +// LLVM can only encode 64-bit instructions, so these fields are manually +// encoded in R600CodeEmitter +// +// bits<16> OFFSET; +// bits<2> ENDIAN_SWAP = 0; +// bits<1> CONST_BUF_NO_STRIDE = 0; +// bits<1> MEGA_FETCH = 0; +// bits<1> ALT_CONST = 0; +// bits<2> BUFFER_INDEX_MODE = 0; + + + +// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding +// is done in R600CodeEmitter +// +// Inst{79-64} = OFFSET; +// Inst{81-80} = ENDIAN_SWAP; +// Inst{82} = CONST_BUF_NO_STRIDE; +// Inst{83} = MEGA_FETCH; +// Inst{84} = ALT_CONST; +// Inst{86-85} = BUFFER_INDEX_MODE; +// Inst{95-86} = 0; Reserved + +// VTX_WORD3 (Padding) +// +// Inst{127-96} = 0; +} + +def TEX_VTX_TEXBUF: + InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", + [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, +VTX_WORD1_GPR, VTX_WORD0 { + +let VC_INST = 0; +let FETCH_TYPE = 2; +let FETCH_WHOLE_QUAD = 0; +let SRC_REL = 0; +let SRC_SEL_X = 0; +let DST_REL = 0; +let USE_CONST_FIELDS = 1; +let NUM_FORMAT_ALL = 0; +let FORMAT_COMP_ALL = 0; +let SRF_MODE_ALL = 1; +let MEGA_FETCH_COUNT = 16; +let DST_SEL_X = 0; +let DST_SEL_Y = 1; +let DST_SEL_Z = 2; +let DST_SEL_W = 3; +let DATA_FORMAT = 0; + +let Inst{31-0} = Word0; +let Inst{63-32} = Word1; + +// LLVM can only encode 64-bit instructions, so these fields are manually +// encoded in R600CodeEmitter +// +// bits<16> OFFSET; +// bits<2> ENDIAN_SWAP = 0; +// bits<1> CONST_BUF_NO_STRIDE = 0; +// bits<1> MEGA_FETCH = 0; +// bits<1> ALT_CONST = 0; +// bits<2> BUFFER_INDEX_MODE = 0; + + + +// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding +// is done in R600CodeEmitter +// +// Inst{79-64} = OFFSET; +// Inst{81-80} = ENDIAN_SWAP; +// Inst{82} = CONST_BUF_NO_STRIDE; +// Inst{83} = MEGA_FETCH; +// Inst{84} = ALT_CONST; +// Inst{86-85} = BUFFER_INDEX_MODE; +// Inst{95-86} = 0; Reserved + +// VTX_WORD3 (Padding) +// +// Inst{127-96} = 0; +} + + + //===--------------------------------------------------------------------===// // Instructions support //===--------------------------------------------------------------------===// @@ -1641,7 +1867,19 @@ def : Pat < // SGE Reverse args def : Pat < (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), - (SGE R600_Reg32:$src1, R600_Reg32:$src0) + (SGE R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGT_DX10 reverse args +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT), + (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGE_DX10 reverse args +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE), + (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0) >; // SETGT_INT reverse args @@ -1682,31 +1920,43 @@ def : Pat < (SETE R600_Reg32:$src0, R600_Reg32:$src1) >; +//SETE_DX10 - 'true if ordered' +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO), + (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) +>; + //SNE - 'true if unordered' def : Pat < (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), (SNE R600_Reg32:$src0, R600_Reg32:$src1) >; -def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>; -def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>; -def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>; -def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>; +//SETNE_DX10 - 'true if ordered' +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO), + (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) +>; + +def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>; +def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>; +def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>; +def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>; +def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>; +def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>; +def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>; +def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>; -def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>; -def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>; -def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>; -def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>; +def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>; +def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>; +def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>; +def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>; def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>; def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>; |