Add VLD4 scheduling itineraries.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116143 91177308-0d34-0410-b5e6-96231b3b80d8
author: Evan Cheng <evan.cheng@apple.com> 2010-10-09 04:07:58 +0000
committer: Evan Cheng <evan.cheng@apple.com> 2010-10-09 04:07:58 +0000
commit: 10dc63feeb7847f867a6f35179312f4079981ad3 (patch)
tree: 7bf5a8373e146f6df2f5ab014ef6d3f87732b727 /lib
parent: e90ea139f47752eb122af756a5714ef0b3756298 (diff)
download: external_llvm-10dc63feeb7847f867a6f35179312f4079981ad3.tar.gz
external_llvm-10dc63feeb7847f867a6f35179312f4079981ad3.tar.bz2
external_llvm-10dc63feeb7847f867a6f35179312f4079981ad3.zip
4 files changed, 64 insertions, 20 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 4340a7d3c7..d2ff222e29 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -548,7 +548,7 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdSt<1, 0b10, op11_8, op7_4,
           (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
-          nohash_imm:$lane), IIC_VLD4, "vld4", Dt,
+          nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
           "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
           "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
 
@@ -556,16 +556,16 @@ def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8">;
 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">;
 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">;
 
-def VLD4LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD4>;
-def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4>;
-def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4>;
+def VLD4LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
 
 // ...with double-spaced registers:
 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">;
 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">;
 
-def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4>;
-def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4>;
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
 
 // ...with address register writeback:
 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -573,7 +573,7 @@ class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
           (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
           (ins addrmode6:$addr, am6offset:$offset,
            DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
-          IIC_VLD4, "vld4", Dt,
+          IIC_VLD4ln, "vld4", Dt,
 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset",
 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb",
           []>;
@@ -582,15 +582,15 @@ def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8">;
 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">;
 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">;
 
-def VLD4LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD4>;
-def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>;
-def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>;
+def VLD4LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
 
 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">;
 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
 
-def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4>;
-def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4>;
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
 
 //   VLD1DUP  : Vector Load (single element to all lanes)
 //   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 73c677e0d4..7ccac7ee86 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -145,6 +145,9 @@ def IIC_VLD3ln     : InstrItinClass;
 def IIC_VLD3u      : InstrItinClass;
 def IIC_VLD3lnu    : InstrItinClass;
 def IIC_VLD4       : InstrItinClass;
+def IIC_VLD4ln     : InstrItinClass;
+def IIC_VLD4u      : InstrItinClass;
+def IIC_VLD4lnu    : InstrItinClass;
 def IIC_VST        : InstrItinClass;
 def IIC_VUNAD      : InstrItinClass;
 def IIC_VUNAQ      : InstrItinClass;
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 6c4cf8f122..fc6ad34005 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -390,7 +390,7 @@ def CortexA8Itineraries : ProcessorItineraries<
   //
   // VLD1
   InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NLSPipe], 1>,
                                InstrStage<1, [A8_LSPipe]>]>,
   // VLD1x2
   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -496,8 +496,27 @@ def CortexA8Itineraries : ProcessorItineraries<
   //
   // VLD4
   InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_NLSPipe]>,
-                               InstrStage<1, [A8_LSPipe]>], [2, 2, 2, 2, 1]>,
+                               InstrStage<4, [A8_NLSPipe], 1>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 4, 1]>,
+  //
+  // VLD4ln
+  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<5, [A8_NLSPipe], 1>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4u
+  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<4, [A8_NLSPipe], 1>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 4, 2, 1]>,
+  //
+  // VLD4lnu
+  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<5, [A8_NLSPipe], 1>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
   //
   // VST
   // FIXME: We don't model this instruction properly
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 2d2bc370f5..06edaa9446 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -854,14 +854,36 @@ def CortexA9Itineraries : ProcessorItineraries<
                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
   //
   // VLD4
-  // FIXME: We don't model this instruction properly
   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
-                               InstrStage<1, [A9_NPipe]>],
-                              [2, 2, 2, 2, 1]>,
+                               InstrStage<4, [A9_NPipe]>],
+                              [4, 4, 5, 5, 1]>,
+  //
+  // VLD4ln
+  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<5, [A9_NPipe]>],
+                              [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4u
+  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<4, [A9_NPipe]>],
+                              [4, 4, 5, 5, 2, 1]>,
+  //
+  // VLD4lnu
+  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<5, [A9_NPipe]>],
+                              [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
   //
   // VST
   // FIXME: We don't model this instruction properly
author	Evan Cheng <evan.cheng@apple.com>	2010-10-09 04:07:58 +0000
committer	Evan Cheng <evan.cheng@apple.com>	2010-10-09 04:07:58 +0000
commit	10dc63feeb7847f867a6f35179312f4079981ad3 (patch)
tree	7bf5a8373e146f6df2f5ab014ef6d3f87732b727 /lib
parent	e90ea139f47752eb122af756a5714ef0b3756298 (diff)
download	external_llvm-10dc63feeb7847f867a6f35179312f4079981ad3.tar.gz external_llvm-10dc63feeb7847f867a6f35179312f4079981ad3.tar.bz2 external_llvm-10dc63feeb7847f867a6f35179312f4079981ad3.zip