diff options
author | Evan Cheng <evan.cheng@apple.com> | 2010-10-09 04:07:58 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2010-10-09 04:07:58 +0000 |
commit | 10dc63feeb7847f867a6f35179312f4079981ad3 (patch) | |
tree | 7bf5a8373e146f6df2f5ab014ef6d3f87732b727 /lib | |
parent | e90ea139f47752eb122af756a5714ef0b3756298 (diff) | |
download | external_llvm-10dc63feeb7847f867a6f35179312f4079981ad3.tar.gz external_llvm-10dc63feeb7847f867a6f35179312f4079981ad3.tar.bz2 external_llvm-10dc63feeb7847f867a6f35179312f4079981ad3.zip |
Add VLD4 scheduling itineraries.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116143 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 24 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSchedule.td | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA8.td | 25 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA9.td | 32 |
4 files changed, 64 insertions, 20 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 4340a7d3c7..d2ff222e29 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -548,7 +548,7 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VLD4, "vld4", Dt, + nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; @@ -556,16 +556,16 @@ def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">; def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">; def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">; -def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4>; -def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4>; -def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4>; +def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; +def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; +def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; // ...with double-spaced registers: def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">; def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">; -def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4>; -def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4>; +def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; +def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; // ...with address register writeback: class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -573,7 +573,7 @@ class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), - IIC_VLD4, "vld4", Dt, + IIC_VLD4ln, "vld4", Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb", []>; @@ -582,15 +582,15 @@ def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">; def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">; def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">; -def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>; -def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>; -def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>; +def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; +def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; +def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">; def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; -def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4>; -def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4>; +def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; +def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; // VLD1DUP : Vector Load (single element to all lanes) // VLD2DUP : Vector Load (single 2-element structure to all lanes) diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 73c677e0d4..7ccac7ee86 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -145,6 +145,9 @@ def IIC_VLD3ln : InstrItinClass; def IIC_VLD3u : InstrItinClass; def IIC_VLD3lnu : InstrItinClass; def IIC_VLD4 : InstrItinClass; +def IIC_VLD4ln : InstrItinClass; +def IIC_VLD4u : InstrItinClass; +def IIC_VLD4lnu : InstrItinClass; def IIC_VST : InstrItinClass; def IIC_VUNAD : InstrItinClass; def IIC_VUNAQ : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 6c4cf8f122..fc6ad34005 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -390,7 +390,7 @@ def CortexA8Itineraries : ProcessorItineraries< // // VLD1 InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NLSPipe], 1>, InstrStage<1, [A8_LSPipe]>]>, // VLD1x2 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -496,8 +496,27 @@ def CortexA8Itineraries : ProcessorItineraries< // // VLD4 InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_NLSPipe]>, - InstrStage<1, [A8_LSPipe]>], [2, 2, 2, 2, 1]>, + InstrStage<4, [A8_NLSPipe], 1>, + InstrStage<4, [A8_LSPipe]>], + [3, 3, 4, 4, 1]>, + // + // VLD4ln + InstrItinData<IIC_VLD4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<5, [A8_NLSPipe], 1>, + InstrStage<5, [A8_LSPipe]>], + [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, + // + // VLD4u + InstrItinData<IIC_VLD4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<4, [A8_NLSPipe], 1>, + InstrStage<4, [A8_LSPipe]>], + [3, 3, 4, 4, 2, 1]>, + // + // VLD4lnu + InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<5, [A8_NLSPipe], 1>, + InstrStage<5, [A8_LSPipe]>], + [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, // // VST // FIXME: We don't model this instruction properly diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 2d2bc370f5..06edaa9446 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -854,14 +854,36 @@ def CortexA9Itineraries : ProcessorItineraries< [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>, // // VLD4 - // FIXME: We don't model this instruction properly InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_NPipe]>], - [2, 2, 2, 2, 1]>, + InstrStage<4, [A9_NPipe]>], + [4, 4, 5, 5, 1]>, + // + // VLD4ln + InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<11, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<5, [A9_NPipe]>], + [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>, + // + // VLD4u + InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<4, [A9_NPipe]>], + [4, 4, 5, 5, 2, 1]>, + // + // VLD4lnu + InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<11, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<5, [A9_NPipe]>], + [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>, // // VST // FIXME: We don't model this instruction properly |