aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/R600/R600Instructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/R600Instructions.td')
-rw-r--r--lib/Target/R600/R600Instructions.td524
1 files changed, 387 insertions, 137 deletions
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 64bab18fa6..8242df9440 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -70,6 +70,11 @@ class InstFlag<string PM = "printOperand", int Default = 0>
let PrintMethod = PM;
}
+// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
+def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
+ let PrintMethod = "printSel";
+}
+
def LITERAL : InstFlag<"printLiteral">;
def WRITE : InstFlag <"printWrite", 1>;
@@ -86,9 +91,16 @@ def UP : InstFlag <"printUpdatePred">;
// default to 0.
def LAST : InstFlag<"printLast", 1>;
+def FRAMEri : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
+}
+
def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
+def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
+def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
class R600ALU_Word0 {
field bits<32> Word0;
@@ -173,6 +185,55 @@ class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
let Word1{17-13} = alu_inst;
}
+class VTX_WORD0 {
+ field bits<32> Word0;
+ bits<7> SRC_GPR;
+ bits<5> VC_INST;
+ bits<2> FETCH_TYPE;
+ bits<1> FETCH_WHOLE_QUAD;
+ bits<8> BUFFER_ID;
+ bits<1> SRC_REL;
+ bits<2> SRC_SEL_X;
+ bits<6> MEGA_FETCH_COUNT;
+
+ let Word0{4-0} = VC_INST;
+ let Word0{6-5} = FETCH_TYPE;
+ let Word0{7} = FETCH_WHOLE_QUAD;
+ let Word0{15-8} = BUFFER_ID;
+ let Word0{22-16} = SRC_GPR;
+ let Word0{23} = SRC_REL;
+ let Word0{25-24} = SRC_SEL_X;
+ let Word0{31-26} = MEGA_FETCH_COUNT;
+}
+
+class VTX_WORD1_GPR {
+ field bits<32> Word1;
+ bits<7> DST_GPR;
+ bits<1> DST_REL;
+ bits<3> DST_SEL_X;
+ bits<3> DST_SEL_Y;
+ bits<3> DST_SEL_Z;
+ bits<3> DST_SEL_W;
+ bits<1> USE_CONST_FIELDS;
+ bits<6> DATA_FORMAT;
+ bits<2> NUM_FORMAT_ALL;
+ bits<1> FORMAT_COMP_ALL;
+ bits<1> SRF_MODE_ALL;
+
+ let Word1{6-0} = DST_GPR;
+ let Word1{7} = DST_REL;
+ let Word1{8} = 0; // Reserved
+ let Word1{11-9} = DST_SEL_X;
+ let Word1{14-12} = DST_SEL_Y;
+ let Word1{17-15} = DST_SEL_Z;
+ let Word1{20-18} = DST_SEL_W;
+ let Word1{21} = USE_CONST_FIELDS;
+ let Word1{27-22} = DATA_FORMAT;
+ let Word1{29-28} = NUM_FORMAT_ALL;
+ let Word1{30} = FORMAT_COMP_ALL;
+ let Word1{31} = SRF_MODE_ALL;
+}
+
/*
XXX: R600 subtarget uses a slightly different encoding than the other
subtargets. We currently handle this in R600MCCodeEmitter, but we may
@@ -214,11 +275,11 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
InstR600 <0,
(outs R600_Reg32:$dst),
(ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
!strconcat(opName,
"$clamp $dst$write$dst_rel$omod, "
- "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+ "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
@@ -254,13 +315,13 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
(outs R600_Reg32:$dst),
(ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
- R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
!strconcat(opName,
"$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
- "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
- "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
+ "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
+ "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
@@ -291,14 +352,14 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
InstR600 <0,
(outs R600_Reg32:$dst),
(ins REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel,
- R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel,
- R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
+ R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
!strconcat(opName, "$clamp $dst$dst_rel, "
- "$src0_neg$src0$src0_rel, "
- "$src1_neg$src1$src1_rel, "
- "$src2_neg$src2$src2_rel, "
+ "$src0_neg$src0$src0_sel$src0_rel, "
+ "$src1_neg$src1$src1_sel$src1_rel, "
+ "$src2_neg$src2$src2_sel$src2_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
@@ -342,6 +403,27 @@ def TEX_SHADOW : PatLeaf<
}]
>;
+def TEX_RECT : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 5;
+ }]
+>;
+
+def TEX_ARRAY : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 9 || TType == 10 || TType == 15 || TType == 16;
+ }]
+>;
+
+def TEX_SHADOW_ARRAY : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 11 || TType == 12 || TType == 17;
+ }]
+>;
+
class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
dag ins, string asm, list<dag> pattern> :
InstR600ISA <outs, ins, asm, pattern> {
@@ -414,32 +496,35 @@ def isR600toCayman : Predicate<
"Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
//===----------------------------------------------------------------------===//
-// Interpolation Instructions
+// R600 SDNodes
//===----------------------------------------------------------------------===//
-def INTERP: SDNode<"AMDGPUISD::INTERP",
- SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
- >;
+def INTERP_PAIR_XY : AMDGPUShaderInst <
+ (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
+ (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+ "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
+ []>;
-def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
- SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
- >;
+def INTERP_PAIR_ZW : AMDGPUShaderInst <
+ (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
+ (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+ "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
+ []>;
-let usesCustomInserter = 1 in {
-def input_perspective : AMDGPUShaderInst <
- (outs R600_Reg128:$dst),
- (ins i32imm:$src0, i32imm:$src1),
- "input_perspective $src0 $src1 : dst",
- [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>;
-} // End usesCustomInserter = 1
-
-def input_constant : AMDGPUShaderInst <
- (outs R600_Reg128:$dst),
- (ins i32imm:$src),
- "input_perspective $src : dst",
- [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>;
+def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
+ [SDNPMayLoad]
+>;
+//===----------------------------------------------------------------------===//
+// Interpolation Instructions
+//===----------------------------------------------------------------------===//
+def INTERP_VEC_LOAD : AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins i32imm:$src0),
+ "INTERP_LOAD $src0 : $dst",
+ []>;
def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
let bank_swizzle = 5;
@@ -455,7 +540,7 @@ def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
// Export Instructions
//===----------------------------------------------------------------------===//
-def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>;
+def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
[SDNPHasChain, SDNPSideEffect]>;
@@ -507,53 +592,59 @@ class ExportBufWord1 {
multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
(ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
0, 61, 0, 7, 7, 7, cf_inst, 0)
>;
def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
(ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
0, 61, 7, 0, 7, 7, cf_inst, 0)
>;
- def : Pat<(int_R600_store_pixel_dummy),
+ def : Pat<(int_R600_store_dummy (i32 imm:$type)),
(ExportInst
- (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0)
+ (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
>;
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
- 0, 1, 2, 3, cf_inst, 0)
+ def : Pat<(int_R600_store_dummy 1),
+ (ExportInst
+ (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
+ (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
+ imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
>;
+
}
multiclass SteamOutputExportPattern<Instruction ExportInst,
bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
// Stream0
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf0inst, 0)>;
// Stream1
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 2),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf1inst, 0)>;
// Stream2
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 3),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf2inst, 0)>;
// Stream3
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 4),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf3inst, 0)>;
}
-let isTerminator = 1, usesCustomInserter = 1 in {
+let usesCustomInserter = 1 in {
class ExportSwzInst : InstR600ISA<(
outs),
@@ -567,7 +658,7 @@ class ExportSwzInst : InstR600ISA<(
let Inst{63-32} = Word1;
}
-} // End isTerminator = 1, usesCustomInserter = 1
+} // End usesCustomInserter = 1
class ExportBufInst : InstR600ISA<(
outs),
@@ -580,7 +671,7 @@ class ExportBufInst : InstR600ISA<(
let Inst{63-32} = Word1;
}
-let Predicates = [isR600toCayman] in {
+let Predicates = [isR600toCayman] in {
//===----------------------------------------------------------------------===//
// Common Instructions R600, R700, Evergreen, Cayman
@@ -624,6 +715,34 @@ def SNE : R600_2OP <
COND_NE))]
>;
+def SETE_DX10 : R600_2OP <
+ 0xC, "SETE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_EQ))]
+>;
+
+def SETGT_DX10 : R600_2OP <
+ 0xD, "SETGT_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_GT))]
+>;
+
+def SETGE_DX10 : R600_2OP <
+ 0xE, "SETGE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_GE))]
+>;
+
+def SETNE_DX10 : R600_2OP <
+ 0xF, "SETNE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_NE))]
+>;
+
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
@@ -684,7 +803,7 @@ def SETE_INT : R600_2OP <
>;
def SETGT_INT : R600_2OP <
- 0x3B, "SGT_INT",
+ 0x3B, "SETGT_INT",
[(set (i32 R600_Reg32:$dst),
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
>;
@@ -830,8 +949,13 @@ class MUL_LIT_Common <bits<5> inst> : R600_3OP <
class MULADD_Common <bits<5> inst> : R600_3OP <
inst, "MULADD",
+ []
+>;
+
+class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
+ inst, "MULADD_IEEE",
[(set (f32 R600_Reg32:$dst),
- (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
+ (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
>;
class CNDE_Common <bits<5> inst> : R600_3OP <
@@ -988,6 +1112,7 @@ let Predicates = [isR600] in {
def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
def MULADD_r600 : MULADD_Common<0x10>;
+ def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
def CNDE_r600 : CNDE_Common<0x18>;
def CNDGT_r600 : CNDGT_Common<0x19>;
def CNDGE_r600 : CNDGE_Common<0x1A>;
@@ -1070,7 +1195,7 @@ let Predicates = [isR700] in {
//===----------------------------------------------------------------------===//
let Predicates = [isEG] in {
-
+
def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
@@ -1127,6 +1252,7 @@ let Predicates = [isEGorCayman] in {
>;
def MULADD_eg : MULADD_Common<0x14>;
+ def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
def ASHR_eg : ASHR_Common<0x15>;
def LSHR_eg : LSHR_Common<0x16>;
def LSHL_eg : LSHL_Common<0x17>;
@@ -1138,6 +1264,10 @@ let Predicates = [isEGorCayman] in {
defm DOT4_eg : DOT4_Common<0xBE>;
defm CUBE_eg : CUBE_Common<0xC0>;
+let hasSideEffects = 1 in {
+ def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
+}
+
def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
@@ -1228,37 +1358,30 @@ def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
>;
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
- : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern> {
-
- // Operands
- bits<7> DST_GPR;
- bits<7> SRC_GPR;
+ : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>,
+ VTX_WORD1_GPR, VTX_WORD0 {
// Static fields
- bits<5> VC_INST = 0;
- bits<2> FETCH_TYPE = 2;
- bits<1> FETCH_WHOLE_QUAD = 0;
- bits<8> BUFFER_ID = buffer_id;
- bits<1> SRC_REL = 0;
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let BUFFER_ID = buffer_id;
+ let SRC_REL = 0;
// XXX: We can infer this field based on the SRC_GPR. This would allow us
// to store vertex addresses in any channel, not just X.
- bits<2> SRC_SEL_X = 0;
- bits<6> MEGA_FETCH_COUNT;
- bits<1> DST_REL = 0;
- bits<3> DST_SEL_X;
- bits<3> DST_SEL_Y;
- bits<3> DST_SEL_Z;
- bits<3> DST_SEL_W;
+ let SRC_SEL_X = 0;
+ let DST_REL = 0;
// The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
// FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
// however, based on my testing if USE_CONST_FIELDS is set, then all
// these fields need to be set to 0.
- bits<1> USE_CONST_FIELDS = 0;
- bits<6> DATA_FORMAT;
- bits<2> NUM_FORMAT_ALL = 1;
- bits<1> FORMAT_COMP_ALL = 0;
- bits<1> SRF_MODE_ALL = 0;
+ let USE_CONST_FIELDS = 0;
+ let NUM_FORMAT_ALL = 1;
+ let FORMAT_COMP_ALL = 0;
+ let SRF_MODE_ALL = 0;
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
// LLVM can only encode 64-bit instructions, so these fields are manually
// encoded in R600CodeEmitter
//
@@ -1269,29 +1392,7 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
// bits<1> ALT_CONST = 0;
// bits<2> BUFFER_INDEX_MODE = 0;
- // VTX_WORD0
- let Inst{4-0} = VC_INST;
- let Inst{6-5} = FETCH_TYPE;
- let Inst{7} = FETCH_WHOLE_QUAD;
- let Inst{15-8} = BUFFER_ID;
- let Inst{22-16} = SRC_GPR;
- let Inst{23} = SRC_REL;
- let Inst{25-24} = SRC_SEL_X;
- let Inst{31-26} = MEGA_FETCH_COUNT;
-
- // VTX_WORD1_GPR
- let Inst{38-32} = DST_GPR;
- let Inst{39} = DST_REL;
- let Inst{40} = 0; // Reserved
- let Inst{43-41} = DST_SEL_X;
- let Inst{46-44} = DST_SEL_Y;
- let Inst{49-47} = DST_SEL_Z;
- let Inst{52-50} = DST_SEL_W;
- let Inst{53} = USE_CONST_FIELDS;
- let Inst{59-54} = DATA_FORMAT;
- let Inst{61-60} = NUM_FORMAT_ALL;
- let Inst{62} = FORMAT_COMP_ALL;
- let Inst{63} = SRF_MODE_ALL;
+
// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
// is done in R600CodeEmitter
@@ -1346,7 +1447,7 @@ class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
// This is not really necessary, but there were some GPU hangs that appeared
// to be caused by ALU instructions in the next instruction group that wrote
- // to the $ptr registers of the VTX_READ.
+ // to the $ptr registers of the VTX_READ.
// e.g.
// %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
// %T2_X<def> = MOV %ZERO
@@ -1387,6 +1488,10 @@ def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
[(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
>;
+def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
+ [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
//===----------------------------------------------------------------------===//
// VTX Read from global memory space
//===----------------------------------------------------------------------===//
@@ -1417,9 +1522,15 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
}
+//===----------------------------------------------------------------------===//
+// Regist loads and stores - for indirect addressing
+//===----------------------------------------------------------------------===//
+
+defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
+
let Predicates = [isCayman] in {
-let isVector = 1 in {
+let isVector = 1 in {
def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
@@ -1476,6 +1587,7 @@ def PRED_X : InstR600 <
(ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
"", [], NullALU> {
let FlagOperandIdx = 3;
+ let isTerminator = 1;
}
let isTerminator = 1, isBranch = 1, isBarrier = 1 in {
@@ -1502,19 +1614,6 @@ def MASK_WRITE : AMDGPUShaderInst <
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
-def R600_LOAD_CONST : AMDGPUShaderInst <
- (outs R600_Reg32:$dst),
- (ins i32imm:$src0),
- "R600_LOAD_CONST $dst, $src0",
- [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
->;
-
-def RESERVE_REG : AMDGPUShaderInst <
- (outs),
- (ins i32imm:$src),
- "RESERVE_REG $src",
- [(int_AMDGPU_reserve_reg imm:$src)]
->;
def TXD: AMDGPUShaderInst <
(outs R600_Reg128:$dst),
@@ -1540,11 +1639,138 @@ def FNEG_R600 : FNEG<R600_Reg32>;
//===---------------------------------------------------------------------===//
// Return instruction
//===---------------------------------------------------------------------===//
-let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
+let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1,
+ usesCustomInserter = 1 in {
def RETURN : ILFormat<(outs), (ins variable_ops),
"RETURN", [(IL_retflag)]>;
}
+
+//===----------------------------------------------------------------------===//
+// Constant Buffer Addressing Support
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
+def CONST_COPY : Instruction {
+ let OutOperandList = (outs R600_Reg32:$dst);
+ let InOperandList = (ins i32imm:$src);
+ let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
+ let AsmString = "CONST_COPY";
+ let neverHasSideEffects = 1;
+ let isAsCheapAsAMove = 1;
+ let Itinerary = NullALU;
+}
+} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
+
+def TEX_VTX_CONSTBUF :
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr",
+ [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>,
+ VTX_WORD1_GPR, VTX_WORD0 {
+
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let BUFFER_ID = 0;
+ let SRC_REL = 0;
+ let SRC_SEL_X = 0;
+ let DST_REL = 0;
+ let USE_CONST_FIELDS = 0;
+ let NUM_FORMAT_ALL = 2;
+ let FORMAT_COMP_ALL = 1;
+ let SRF_MODE_ALL = 1;
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 35;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2> ENDIAN_SWAP = 0;
+// bits<1> CONST_BUF_NO_STRIDE = 0;
+// bits<1> MEGA_FETCH = 0;
+// bits<1> ALT_CONST = 0;
+// bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82} = CONST_BUF_NO_STRIDE;
+// Inst{83} = MEGA_FETCH;
+// Inst{84} = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+def TEX_VTX_TEXBUF:
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
+ [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
+VTX_WORD1_GPR, VTX_WORD0 {
+
+let VC_INST = 0;
+let FETCH_TYPE = 2;
+let FETCH_WHOLE_QUAD = 0;
+let SRC_REL = 0;
+let SRC_SEL_X = 0;
+let DST_REL = 0;
+let USE_CONST_FIELDS = 1;
+let NUM_FORMAT_ALL = 0;
+let FORMAT_COMP_ALL = 0;
+let SRF_MODE_ALL = 1;
+let MEGA_FETCH_COUNT = 16;
+let DST_SEL_X = 0;
+let DST_SEL_Y = 1;
+let DST_SEL_Z = 2;
+let DST_SEL_W = 3;
+let DATA_FORMAT = 0;
+
+let Inst{31-0} = Word0;
+let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2> ENDIAN_SWAP = 0;
+// bits<1> CONST_BUF_NO_STRIDE = 0;
+// bits<1> MEGA_FETCH = 0;
+// bits<1> ALT_CONST = 0;
+// bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82} = CONST_BUF_NO_STRIDE;
+// Inst{83} = MEGA_FETCH;
+// Inst{84} = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+
+
//===--------------------------------------------------------------------===//
// Instructions support
//===--------------------------------------------------------------------===//
@@ -1641,7 +1867,19 @@ def : Pat <
// SGE Reverse args
def : Pat <
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
- (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+ (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_DX10 reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
+ (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_DX10 reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
+ (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
>;
// SETGT_INT reverse args
@@ -1682,31 +1920,43 @@ def : Pat <
(SETE R600_Reg32:$src0, R600_Reg32:$src1)
>;
+//SETE_DX10 - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
+ (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
//SNE - 'true if unordered'
def : Pat <
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
(SNE R600_Reg32:$src0, R600_Reg32:$src1)
>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
+//SETNE_DX10 - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
+ (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;