aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.8/gcc/config/rs6000/vsx.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.8/gcc/config/rs6000/vsx.md')
-rw-r--r--gcc-4.8/gcc/config/rs6000/vsx.md1206
1 files changed, 873 insertions, 333 deletions
diff --git a/gcc-4.8/gcc/config/rs6000/vsx.md b/gcc-4.8/gcc/config/rs6000/vsx.md
index 3fafd9b27..5f5e4a3b8 100644
--- a/gcc-4.8/gcc/config/rs6000/vsx.md
+++ b/gcc-4.8/gcc/config/rs6000/vsx.md
@@ -34,11 +34,20 @@
(define_mode_iterator VSX_F [V4SF V2DF])
;; Iterator for logical types supported by VSX
-(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
+(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI])
;; Iterator for memory move. Handle TImode specially to allow
;; it to use gprs as well as vsx registers.
-(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
+(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI])
+
+(define_mode_iterator VSX_M2 [V16QI
+ V8HI
+ V4SI
+ V2DI
+ V4SF
+ V2DF
+ V1TI
+ (TI "TARGET_VSX_TIMODE")])
;; Map into the appropriate load/store name based on the type
(define_mode_attr VSm [(V16QI "vw4")
@@ -48,7 +57,8 @@
(V2DF "vd2")
(V2DI "vd2")
(DF "d")
- (TI "vw4")])
+ (V1TI "vd2")
+ (TI "vd2")])
;; Map into the appropriate suffix based on the type
(define_mode_attr VSs [(V16QI "sp")
@@ -59,7 +69,8 @@
(V2DI "dp")
(DF "dp")
(SF "sp")
- (TI "sp")])
+ (V1TI "dp")
+ (TI "dp")])
;; Map the register class used
(define_mode_attr VSr [(V16QI "v")
@@ -70,7 +81,8 @@
(V2DF "wd")
(DF "ws")
(SF "d")
- (TI "wd")])
+ (V1TI "v")
+ (TI "wt")])
;; Map the register class used for float<->int conversions
(define_mode_attr VSr2 [(V2DF "wd")
@@ -115,7 +127,7 @@
(V4SF "v")
(V2DI "v")
(V2DF "v")
- (TI "v")
+ (V1TI "v")
(DF "s")])
;; Appropriate type for add ops (and other simple FP ops)
@@ -173,7 +185,8 @@
(V2DF "vecdouble")])
;; Map the scalar mode for a vector type
-(define_mode_attr VS_scalar [(V2DF "DF")
+(define_mode_attr VS_scalar [(V1TI "TI")
+ (V2DF "DF")
(V2DI "DI")
(V4SF "SF")
(V4SI "SI")
@@ -184,7 +197,8 @@
(define_mode_attr VS_double [(V4SI "V8SI")
(V4SF "V8SF")
(V2DI "V4DI")
- (V2DF "V4DF")])
+ (V2DF "V4DF")
+ (V1TI "V2TI")])
;; Constants for creating unspecs
(define_c_enum "unspec"
@@ -192,6 +206,8 @@
UNSPEC_VSX_CVDPSXWS
UNSPEC_VSX_CVDPUXWS
UNSPEC_VSX_CVSPDP
+ UNSPEC_VSX_CVSPDPN
+ UNSPEC_VSX_CVDPSPN
UNSPEC_VSX_CVSXWDP
UNSPEC_VSX_CVUXWDP
UNSPEC_VSX_CVSXDSP
@@ -204,77 +220,394 @@
UNSPEC_VSX_ROUND_I
UNSPEC_VSX_ROUND_IC
UNSPEC_VSX_SLDWI
+ UNSPEC_VSX_XXSPLTW
])
;; VSX moves
-(define_insn "*vsx_mov<mode>"
- [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v")
- (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (register_operand (operands[0], <MODE>mode)
- || register_operand (operands[1], <MODE>mode))"
+
+;; The patterns for LE permuted loads and stores come before the general
+;; VSX moves so they match first.
+(define_insn_and_split "*vsx_le_perm_load_<mode>"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+ (match_operand:VSX_D 1 "memory_operand" "Z"))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "#"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ [(set (match_dup 2)
+ (vec_select:<MODE>
+ (match_dup 1)
+ (parallel [(const_int 1) (const_int 0)])))
+ (set (match_dup 0)
+ (vec_select:<MODE>
+ (match_dup 2)
+ (parallel [(const_int 1) (const_int 0)])))]
+ "
{
- switch (which_alternative)
- {
- case 0:
- case 3:
- gcc_assert (MEM_P (operands[0])
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
- return "stx<VSm>x %x1,%y0";
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
+ : operands[0];
+}
+ "
+ [(set_attr "type" "vecload")
+ (set_attr "length" "8")])
- case 1:
- case 4:
- gcc_assert (MEM_P (operands[1])
- && GET_CODE (XEXP (operands[1], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY);
- return "lx<VSm>x %x0,%y1";
+(define_insn_and_split "*vsx_le_perm_load_<mode>"
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
+ (match_operand:VSX_W 1 "memory_operand" "Z"))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "#"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ [(set (match_dup 2)
+ (vec_select:<MODE>
+ (match_dup 1)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 0) (const_int 1)])))
+ (set (match_dup 0)
+ (vec_select:<MODE>
+ (match_dup 2)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 0) (const_int 1)])))]
+ "
+{
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
+ : operands[0];
+}
+ "
+ [(set_attr "type" "vecload")
+ (set_attr "length" "8")])
- case 2:
- case 5:
- return "xxlor %x0,%x1,%x1";
+(define_insn_and_split "*vsx_le_perm_load_v8hi"
+ [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
+ (match_operand:V8HI 1 "memory_operand" "Z"))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "#"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ [(set (match_dup 2)
+ (vec_select:V8HI
+ (match_dup 1)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))
+ (set (match_dup 0)
+ (vec_select:V8HI
+ (match_dup 2)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "
+{
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
+ : operands[0];
+}
+ "
+ [(set_attr "type" "vecload")
+ (set_attr "length" "8")])
- case 6:
- case 7:
- case 8:
- case 11:
- return "#";
+(define_insn_and_split "*vsx_le_perm_load_v16qi"
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+ (match_operand:V16QI 1 "memory_operand" "Z"))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "#"
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ [(set (match_dup 2)
+ (vec_select:V16QI
+ (match_dup 1)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))
+ (set (match_dup 0)
+ (vec_select:V16QI
+ (match_dup 2)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "
+{
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
+ : operands[0];
+}
+ "
+ [(set_attr "type" "vecload")
+ (set_attr "length" "8")])
- case 9:
- case 10:
- return "xxlxor %x0,%x0,%x0";
+(define_insn "*vsx_le_perm_store_<mode>"
+ [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
+ (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "#"
+ [(set_attr "type" "vecstore")
+ (set_attr "length" "12")])
+
+(define_split
+ [(set (match_operand:VSX_D 0 "memory_operand" "")
+ (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+ [(set (match_dup 2)
+ (vec_select:<MODE>
+ (match_dup 1)
+ (parallel [(const_int 1) (const_int 0)])))
+ (set (match_dup 0)
+ (vec_select:<MODE>
+ (match_dup 2)
+ (parallel [(const_int 1) (const_int 0)])))]
+{
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
+ : operands[1];
+})
- case 12:
- return output_vec_const_move (operands);
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+ [(set (match_operand:VSX_D 0 "memory_operand" "")
+ (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+ [(set (match_dup 1)
+ (vec_select:<MODE>
+ (match_dup 1)
+ (parallel [(const_int 1) (const_int 0)])))
+ (set (match_dup 0)
+ (vec_select:<MODE>
+ (match_dup 1)
+ (parallel [(const_int 1) (const_int 0)])))
+ (set (match_dup 1)
+ (vec_select:<MODE>
+ (match_dup 1)
+ (parallel [(const_int 1) (const_int 0)])))]
+ "")
- case 13:
- gcc_assert (MEM_P (operands[0])
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
- return "stvx %1,%y0";
+(define_insn "*vsx_le_perm_store_<mode>"
+ [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
+ (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "#"
+ [(set_attr "type" "vecstore")
+ (set_attr "length" "12")])
+
+(define_split
+ [(set (match_operand:VSX_W 0 "memory_operand" "")
+ (match_operand:VSX_W 1 "vsx_register_operand" ""))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+ [(set (match_dup 2)
+ (vec_select:<MODE>
+ (match_dup 1)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 0) (const_int 1)])))
+ (set (match_dup 0)
+ (vec_select:<MODE>
+ (match_dup 2)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 0) (const_int 1)])))]
+{
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
+ : operands[1];
+})
- case 14:
- gcc_assert (MEM_P (operands[0])
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
- return "lvx %0,%y1";
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+ [(set (match_operand:VSX_W 0 "memory_operand" "")
+ (match_operand:VSX_W 1 "vsx_register_operand" ""))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+ [(set (match_dup 1)
+ (vec_select:<MODE>
+ (match_dup 1)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 0) (const_int 1)])))
+ (set (match_dup 0)
+ (vec_select:<MODE>
+ (match_dup 1)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 0) (const_int 1)])))
+ (set (match_dup 1)
+ (vec_select:<MODE>
+ (match_dup 1)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 0) (const_int 1)])))]
+ "")
- default:
- gcc_unreachable ();
- }
+(define_insn "*vsx_le_perm_store_v8hi"
+ [(set (match_operand:V8HI 0 "memory_operand" "=Z")
+ (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "#"
+ [(set_attr "type" "vecstore")
+ (set_attr "length" "12")])
+
+(define_split
+ [(set (match_operand:V8HI 0 "memory_operand" "")
+ (match_operand:V8HI 1 "vsx_register_operand" ""))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+ [(set (match_dup 2)
+ (vec_select:V8HI
+ (match_dup 1)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))
+ (set (match_dup 0)
+ (vec_select:V8HI
+ (match_dup 2)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+{
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
+ : operands[1];
+})
+
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+ [(set (match_operand:V8HI 0 "memory_operand" "")
+ (match_operand:V8HI 1 "vsx_register_operand" ""))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+ [(set (match_dup 1)
+ (vec_select:V8HI
+ (match_dup 1)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))
+ (set (match_dup 0)
+ (vec_select:V8HI
+ (match_dup 1)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))
+ (set (match_dup 1)
+ (vec_select:V8HI
+ (match_dup 1)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "")
+
+(define_insn "*vsx_le_perm_store_v16qi"
+ [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+ (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX"
+ "#"
+ [(set_attr "type" "vecstore")
+ (set_attr "length" "12")])
+
+(define_split
+ [(set (match_operand:V16QI 0 "memory_operand" "")
+ (match_operand:V16QI 1 "vsx_register_operand" ""))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+ [(set (match_dup 2)
+ (vec_select:V16QI
+ (match_dup 1)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))
+ (set (match_dup 0)
+ (vec_select:V16QI
+ (match_dup 2)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+{
+ operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
+ : operands[1];
+})
+
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+ [(set (match_operand:V16QI 0 "memory_operand" "")
+ (match_operand:V16QI 1 "vsx_register_operand" ""))]
+ "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+ [(set (match_dup 1)
+ (vec_select:V16QI
+ (match_dup 1)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))
+ (set (match_dup 0)
+ (vec_select:V16QI
+ (match_dup 1)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))
+ (set (match_dup 1)
+ (vec_select:V16QI
+ (match_dup 1)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "")
+
+
+(define_insn "*vsx_mov<mode>"
+ [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
+ (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
+{
+ return rs6000_output_move_128bit (operands);
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")])
-
-;; Unlike other VSX moves, allow the GPRs, since a normal use of TImode is for
-;; unions. However for plain data movement, slightly favor the vector loads
-(define_insn "*vsx_movti"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,?Y,?r,?r,wa,v,v,wZ")
- (match_operand:TI 1 "input_operand" "wa,Z,wa,r,Y,r,j,W,wZ,v"))]
- "VECTOR_MEM_VSX_P (TImode)
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
+ (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
+
+;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
+;; use of TImode is for unions. However for plain data movement, slightly
+;; favor the vector loads
+(define_insn "*vsx_movti_64bit"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
+ (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
+ && (register_operand (operands[0], TImode)
+ || register_operand (operands[1], TImode))"
+{
+ return rs6000_output_move_128bit (operands);
+}
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
+ (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
+
+(define_insn "*vsx_movti_32bit"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
+ (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v,r,r, Q, Y, r,n"))]
+ "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
&& (register_operand (operands[0], TImode)
|| register_operand (operands[1], TImode))"
{
@@ -290,27 +623,45 @@
return "xxlor %x0,%x1,%x1";
case 3:
+ return "xxlxor %x0,%x0,%x0";
+
case 4:
+ return output_vec_const_move (operands);
+
case 5:
- return "#";
+ return "stvx %1,%y0";
case 6:
- return "xxlxor %x0,%x0,%x0";
+ return "lvx %0,%y1";
case 7:
- return output_vec_const_move (operands);
+ if (TARGET_STRING)
+ return \"stswi %1,%P0,16\";
case 8:
- return "stvx %1,%y0";
+ return \"#\";
case 9:
- return "lvx %0,%y1";
+ /* If the address is not used in the output, we can use lsi. Otherwise,
+ fall through to generating four loads. */
+ if (TARGET_STRING
+ && ! reg_overlap_mentioned_p (operands[0], operands[1]))
+ return \"lswi %0,%P1,16\";
+ /* ... fall through ... */
+ case 10:
+ case 11:
+ case 12:
+ return \"#\";
default:
gcc_unreachable ();
}
}
- [(set_attr "type" "vecstore,vecload,vecsimple,*,*,*,vecsimple,*,vecstore,vecload")])
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store_ux,store_ux,load_ux,load_ux, *, *")
+ (set_attr "length" " 4, 4, 4, 4, 8, 4, 4, 16, 16, 16, 16,16,16")
+ (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
+ (const_string "always")
+ (const_string "conditional")))])
;; Explicit load/store expanders for the builtin functions
(define_expand "vsx_load_<mode>"
@@ -320,46 +671,48 @@
"")
(define_expand "vsx_store_<mode>"
- [(set (match_operand:VEC_M 0 "memory_operand" "")
- (match_operand:VEC_M 1 "vsx_register_operand" ""))]
+ [(set (match_operand:VSX_M 0 "memory_operand" "")
+ (match_operand:VSX_M 1 "vsx_register_operand" ""))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
"")
-;; VSX scalar and vector floating point arithmetic instructions
+;; VSX vector floating point arithmetic instructions. The VSX scalar
+;; instructions are now combined with the insn for the traditional floating
+;; point unit.
(define_insn "*vsx_add<mode>3"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (plus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>add<VSs> %x0,%x1,%x2"
+ "xvadd<VSs> %x0,%x1,%x2"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "*vsx_sub<mode>3"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (minus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>sub<VSs> %x0,%x1,%x2"
+ "xvsub<VSs> %x0,%x1,%x2"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "*vsx_mul<mode>3"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>mul<VSs> %x0,%x1,%x2"
- [(set_attr "type" "<VStype_mul>")
+ "xvmul<VSs> %x0,%x1,%x2"
+ [(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_mul>")])
(define_insn "*vsx_div<mode>3"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (div:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>div<VSs> %x0,%x1,%x2"
+ "xvdiv<VSs> %x0,%x1,%x2"
[(set_attr "type" "<VStype_div>")
(set_attr "fp_type" "<VSfptype_div>")])
@@ -402,94 +755,72 @@
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "vsx_fre<mode>2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
UNSPEC_FRES))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>re<VSs> %x0,%x1"
+ "xvre<VSs> %x0,%x1"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "*vsx_neg<mode>2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (neg:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>neg<VSs> %x0,%x1"
+ "xvneg<VSs> %x0,%x1"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "*vsx_abs<mode>2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>abs<VSs> %x0,%x1"
+ "xvabs<VSs> %x0,%x1"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "vsx_nabs<mode>2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (neg:VSX_B
- (abs:VSX_B
- (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa"))))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (neg:VSX_F
+ (abs:VSX_F
+ (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa"))))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>nabs<VSs> %x0,%x1"
+ "xvnabs<VSs> %x0,%x1"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "vsx_smax<mode>3"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (smax:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>max<VSs> %x0,%x1,%x2"
+ "xvmax<VSs> %x0,%x1,%x2"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "*vsx_smin<mode>3"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (smin:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>min<VSs> %x0,%x1,%x2"
+ "xvmin<VSs> %x0,%x1,%x2"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
-;; Special VSX version of smin/smax for single precision floating point. Since
-;; both numbers are rounded to single precision, we can just use the DP version
-;; of the instruction.
-
-(define_insn "*vsx_smaxsf3"
- [(set (match_operand:SF 0 "vsx_register_operand" "=f")
- (smax:SF (match_operand:SF 1 "vsx_register_operand" "f")
- (match_operand:SF 2 "vsx_register_operand" "f")))]
- "VECTOR_UNIT_VSX_P (DFmode)"
- "xsmaxdp %x0,%x1,%x2"
- [(set_attr "type" "fp")
- (set_attr "fp_type" "fp_addsub_d")])
-
-(define_insn "*vsx_sminsf3"
- [(set (match_operand:SF 0 "vsx_register_operand" "=f")
- (smin:SF (match_operand:SF 1 "vsx_register_operand" "f")
- (match_operand:SF 2 "vsx_register_operand" "f")))]
- "VECTOR_UNIT_VSX_P (DFmode)"
- "xsmindp %x0,%x1,%x2"
- [(set_attr "type" "fp")
- (set_attr "fp_type" "fp_addsub_d")])
-
(define_insn "*vsx_sqrt<mode>2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (sqrt:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>sqrt<VSs> %x0,%x1"
+ "xvsqrt<VSs> %x0,%x1"
[(set_attr "type" "<VStype_sqrt>")
(set_attr "fp_type" "<VSfptype_sqrt>")])
(define_insn "*vsx_rsqrte<mode>2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
UNSPEC_RSQRT))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>rsqrte<VSs> %x0,%x1"
+ "xvrsqrte<VSs> %x0,%x1"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
@@ -528,26 +859,10 @@
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
-;; Fused vector multiply/add instructions Support the classical DF versions of
-;; fma, which allows the target to be a separate register from the 3 inputs.
-;; Under VSX, the target must be either the addend or the first multiply.
-;; Where we can, also do the same for the Altivec V4SF fmas.
-
-(define_insn "*vsx_fmadf4"
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
- (fma:DF
- (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
- (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
- (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))]
- "VECTOR_UNIT_VSX_P (DFmode)"
- "@
- xsmaddadp %x0,%x1,%x2
- xsmaddmdp %x0,%x1,%x3
- xsmaddadp %x0,%x1,%x2
- xsmaddmdp %x0,%x1,%x3
- fmadd %0,%1,%2,%3"
- [(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_d")])
+;; Fused vector multiply/add instructions. Support the classical Altivec
+;; versions of fma, which allows the target to be a separate register from the
+;; 3 inputs. Under VSX, the target must be either the addend or the first
+;; multiply.
(define_insn "*vsx_fmav4sf4"
[(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v")
@@ -578,23 +893,6 @@
xvmaddmdp %x0,%x1,%x3"
[(set_attr "type" "vecdouble")])
-(define_insn "*vsx_fmsdf4"
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
- (fma:DF
- (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
- (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
- (neg:DF
- (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
- "VECTOR_UNIT_VSX_P (DFmode)"
- "@
- xsmsubadp %x0,%x1,%x2
- xsmsubmdp %x0,%x1,%x3
- xsmsubadp %x0,%x1,%x2
- xsmsubmdp %x0,%x1,%x3
- fmsub %0,%1,%2,%3"
- [(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_d")])
-
(define_insn "*vsx_fms<mode>4"
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
(fma:VSX_F
@@ -604,29 +902,12 @@
(match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"@
- x<VSv>msuba<VSs> %x0,%x1,%x2
- x<VSv>msubm<VSs> %x0,%x1,%x3
- x<VSv>msuba<VSs> %x0,%x1,%x2
- x<VSv>msubm<VSs> %x0,%x1,%x3"
+ xvmsuba<VSs> %x0,%x1,%x2
+ xvmsubm<VSs> %x0,%x1,%x3
+ xvmsuba<VSs> %x0,%x1,%x2
+ xvmsubm<VSs> %x0,%x1,%x3"
[(set_attr "type" "<VStype_mul>")])
-(define_insn "*vsx_nfmadf4"
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
- (neg:DF
- (fma:DF
- (match_operand:DF 1 "vsx_register_operand" "ws,ws,wa,wa,d")
- (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
- (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
- "VECTOR_UNIT_VSX_P (DFmode)"
- "@
- xsnmaddadp %x0,%x1,%x2
- xsnmaddmdp %x0,%x1,%x3
- xsnmaddadp %x0,%x1,%x2
- xsnmaddmdp %x0,%x1,%x3
- fnmadd %0,%1,%2,%3"
- [(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_d")])
-
(define_insn "*vsx_nfma<mode>4"
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
(neg:VSX_F
@@ -636,31 +917,13 @@
(match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"@
- x<VSv>nmadda<VSs> %x0,%x1,%x2
- x<VSv>nmaddm<VSs> %x0,%x1,%x3
- x<VSv>nmadda<VSs> %x0,%x1,%x2
- x<VSv>nmaddm<VSs> %x0,%x1,%x3"
+ xvnmadda<VSs> %x0,%x1,%x2
+ xvnmaddm<VSs> %x0,%x1,%x3
+ xvnmadda<VSs> %x0,%x1,%x2
+ xvnmaddm<VSs> %x0,%x1,%x3"
[(set_attr "type" "<VStype_mul>")
(set_attr "fp_type" "<VSfptype_mul>")])
-(define_insn "*vsx_nfmsdf4"
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
- (neg:DF
- (fma:DF
- (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
- (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
- (neg:DF
- (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))))]
- "VECTOR_UNIT_VSX_P (DFmode)"
- "@
- xsnmsubadp %x0,%x1,%x2
- xsnmsubmdp %x0,%x1,%x3
- xsnmsubadp %x0,%x1,%x2
- xsnmsubmdp %x0,%x1,%x3
- fnmsub %0,%1,%2,%3"
- [(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_d")])
-
(define_insn "*vsx_nfmsv4sf4"
[(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
(neg:V4SF
@@ -722,16 +985,6 @@
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
-;; Floating point scalar compare
-(define_insn "*vsx_cmpdf_internal1"
- [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,?y")
- (compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "ws,wa")
- (match_operand:DF 2 "gpc_reg_operand" "ws,wa")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
- && VECTOR_UNIT_VSX_P (DFmode)"
- "xscmpudp %0,%x1,%x2"
- [(set_attr "type" "fpcompare")])
-
;; Compare vectors producing a vector result and a predicate, setting CR6 to
;; indicate a combined status
(define_insn "*vsx_eq_<mode>_p"
@@ -798,13 +1051,13 @@
;; Copy sign
(define_insn "vsx_copysign<mode>3"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (unspec:VSX_B
- [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unspec:VSX_F
+ [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")]
UNSPEC_COPYSIGN))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>cpsgn<VSs> %x0,%x2,%x1"
+ "xvcpsgn<VSs> %x0,%x2,%x1"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
@@ -865,10 +1118,10 @@
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "vsx_btrunc<mode>2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (fix:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>r<VSs>iz %x0,%x1"
+ "xvr<VSs>iz %x0,%x1"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
@@ -882,20 +1135,20 @@
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "vsx_floor<mode>2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
UNSPEC_FRIM))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>r<VSs>im %x0,%x1"
+ "xvr<VSs>im %x0,%x1"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "vsx_ceil<mode>2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+ [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+ (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
UNSPEC_FRIP))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
- "x<VSv>r<VSs>ip %x0,%x1"
+ "xvr<VSs>ip %x0,%x1"
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_simple>")])
@@ -942,6 +1195,40 @@
"xscvspdp %x0,%x1"
[(set_attr "type" "fp")])
+;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
+(define_insn "vsx_xscvdpspn"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
+ (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
+ UNSPEC_VSX_CVDPSPN))]
+ "TARGET_XSCVDPSPN"
+ "xscvdpspn %x0,%x1"
+ [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvspdpn"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
+ (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+ UNSPEC_VSX_CVSPDPN))]
+ "TARGET_XSCVSPDPN"
+ "xscvspdpn %x0,%x1"
+ [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvdpspn_scalar"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+ (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
+ UNSPEC_VSX_CVDPSPN))]
+ "TARGET_XSCVDPSPN"
+ "xscvdpspn %x0,%x1"
+ [(set_attr "type" "fp")])
+
+;; Used by direct move to move a SFmode value from GPR to VSX register
+(define_insn "vsx_xscvspdpn_directmove"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+ (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_CVSPDPN))]
+ "TARGET_XSCVSPDPN"
+ "xscvspdpn %x0,%x1"
+ [(set_attr "type" "fp")])
+
;; Convert from 64-bit to 32-bit types
;; Note, favor the Altivec registers since the usual use of these instructions
;; is in vector converts and we need to use the Altivec vperm instruction.
@@ -1027,73 +1314,21 @@
(set_attr "fp_type" "<VSfptype_simple>")])
-;; Logical and permute operations
-(define_insn "*vsx_and<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (and:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
- "xxland %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_ior<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
- "xxlor %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_xor<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (xor:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
- "xxlxor %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_one_cmpl<mode>2"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (not:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
- "xxlnor %x0,%x1,%x1"
- [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_nor<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (not:VSX_L
- (ior:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
- "xxlnor %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_andc<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (and:VSX_L
- (not:VSX_L
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
- "xxlandc %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
-
-
;; Permute operations
;; Build a V2DF/V2DI vector from two scalars
(define_insn "vsx_concat_<mode>"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
- (unspec:VSX_D
- [(match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
- (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")]
- UNSPEC_VSX_CONCAT))]
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?wa")
+ (vec_concat:VSX_D
+ (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
+ (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
- "xxpermdi %x0,%x1,%x2,0"
+{
+ if (BYTES_BIG_ENDIAN)
+ return "xxpermdi %x0,%x1,%x2,0";
+ else
+ return "xxpermdi %x0,%x2,%x1,0";
+}
[(set_attr "type" "vecperm")])
;; Special purpose concat using xxpermdi to glue two single precision values
@@ -1106,9 +1341,176 @@
(match_operand:SF 2 "vsx_register_operand" "f,f")]
UNSPEC_VSX_CONCAT))]
"VECTOR_MEM_VSX_P (V2DFmode)"
- "xxpermdi %x0,%x1,%x2,0"
+{
+ if (BYTES_BIG_ENDIAN)
+ return "xxpermdi %x0,%x1,%x2,0";
+ else
+ return "xxpermdi %x0,%x2,%x1,0";
+}
+ [(set_attr "type" "vecperm")])
+
+;; xxpermdi for little endian loads and stores. We need several of
+;; these since the form of the PARALLEL differs by mode.
+(define_insn "*vsx_xxpermdi2_le_<mode>"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+ (vec_select:VSX_D
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 1) (const_int 0)])))]
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxpermdi %x0,%x1,%x1,2"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_xxpermdi4_le_<mode>"
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
+ (vec_select:VSX_W
+ (match_operand:VSX_W 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 0) (const_int 1)])))]
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxpermdi %x0,%x1,%x1,2"
[(set_attr "type" "vecperm")])
+(define_insn "*vsx_xxpermdi8_le_V8HI"
+ [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+ "xxpermdi %x0,%x1,%x1,2"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_xxpermdi16_le_V16QI"
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+ (vec_select:V16QI
+ (match_operand:V16QI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+ "xxpermdi %x0,%x1,%x1,2"
+ [(set_attr "type" "vecperm")])
+
+;; lxvd2x for little endian loads. We need several of
+;; these since the form of the PARALLEL differs by mode.
+(define_insn "*vsx_lxvd2x2_le_<mode>"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+ (vec_select:VSX_D
+ (match_operand:VSX_D 1 "memory_operand" "Z")
+ (parallel [(const_int 1) (const_int 0)])))]
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "lxvd2x %x0,%y1"
+ [(set_attr "type" "vecload")])
+
+(define_insn "*vsx_lxvd2x4_le_<mode>"
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
+ (vec_select:VSX_W
+ (match_operand:VSX_W 1 "memory_operand" "Z")
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 0) (const_int 1)])))]
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "lxvd2x %x0,%y1"
+ [(set_attr "type" "vecload")])
+
+(define_insn "*vsx_lxvd2x8_le_V8HI"
+ [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "memory_operand" "Z")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+ "lxvd2x %x0,%y1"
+ [(set_attr "type" "vecload")])
+
+(define_insn "*vsx_lxvd2x16_le_V16QI"
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+ (vec_select:V16QI
+ (match_operand:V16QI 1 "memory_operand" "Z")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+ "lxvd2x %x0,%y1"
+ [(set_attr "type" "vecload")])
+
+;; stxvd2x for little endian stores. We need several of
+;; these since the form of the PARALLEL differs by mode.
+(define_insn "*vsx_stxvd2x2_le_<mode>"
+ [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
+ (vec_select:VSX_D
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 1) (const_int 0)])))]
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "stxvd2x %x1,%y0"
+ [(set_attr "type" "vecstore")])
+
+(define_insn "*vsx_stxvd2x4_le_<mode>"
+ [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
+ (vec_select:VSX_W
+ (match_operand:VSX_W 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 0) (const_int 1)])))]
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "stxvd2x %x1,%y0"
+ [(set_attr "type" "vecstore")])
+
+(define_insn "*vsx_stxvd2x8_le_V8HI"
+ [(set (match_operand:V8HI 0 "memory_operand" "=Z")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+ "stxvd2x %x1,%y0"
+ [(set_attr "type" "vecstore")])
+
+(define_insn "*vsx_stxvd2x16_le_V16QI"
+ [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+ (vec_select:V16QI
+ (match_operand:V16QI 1 "vsx_register_operand" "wa")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)
+ (const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+ "stxvd2x %x1,%y0"
+ [(set_attr "type" "vecstore")])
+
+;; Convert a TImode value into V1TImode
+(define_expand "vsx_set_v1ti"
+ [(match_operand:V1TI 0 "nonimmediate_operand" "")
+ (match_operand:V1TI 1 "nonimmediate_operand" "")
+ (match_operand:TI 2 "input_operand" "")
+ (match_operand:QI 3 "u5bit_cint_operand" "")]
+ "VECTOR_MEM_VSX_P (V1TImode)"
+{
+ if (operands[3] != const0_rtx)
+ gcc_unreachable ();
+
+ emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
+ DONE;
+})
+
;; Set the element of a V2DI/VD2F mode
(define_insn "vsx_set_<mode>"
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
@@ -1118,9 +1520,10 @@
UNSPEC_VSX_SET))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
- if (INTVAL (operands[3]) == 0)
+ int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
+ if (INTVAL (operands[3]) == idx_first)
return \"xxpermdi %x0,%x2,%x1,1\";
- else if (INTVAL (operands[3]) == 1)
+ else if (INTVAL (operands[3]) == 1 - idx_first)
return \"xxpermdi %x0,%x1,%x2,0\";
else
gcc_unreachable ();
@@ -1135,8 +1538,12 @@
[(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
+ int fldDM;
gcc_assert (UINTVAL (operands[2]) <= 1);
- operands[3] = GEN_INT (INTVAL (operands[2]) << 1);
+ fldDM = INTVAL (operands[2]) << 1;
+ if (!BYTES_BIG_ENDIAN)
+ fldDM = 3 - fldDM;
+ operands[3] = GEN_INT (fldDM);
return \"xxpermdi %x0,%x1,%x1,%3\";
}
[(set_attr "type" "vecperm")])
@@ -1149,7 +1556,26 @@
(parallel [(const_int 0)])))]
"VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
"lxsd%U1x %x0,%y1"
- [(set_attr "type" "fpload")
+ [(set (attr "type")
+ (if_then_else
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+ (const_string "fpload_ux")
+ (const_string "fpload")))
+ (set_attr "length" "4")])
+
+;; Optimize extracting element 1 from memory for little endian
+(define_insn "*vsx_extract_<mode>_one_le"
+ [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
+ (parallel [(const_int 1)])))]
+ "VECTOR_MEM_VSX_P (<MODE>mode) && !WORDS_BIG_ENDIAN"
+ "lxsd%U1x %x0,%y1"
+ [(set (attr "type")
+ (if_then_else
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+ (const_string "fpload_ux")
+ (const_string "fpload")))
(set_attr "length" "4")])
;; Extract a SF element from V4SF
@@ -1172,7 +1598,7 @@
rtx op2 = operands[2];
rtx op3 = operands[3];
rtx tmp;
- HOST_WIDE_INT ele = INTVAL (op2);
+ HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
if (ele == 0)
tmp = op1;
@@ -1213,11 +1639,22 @@
if (<MODE>mode != V2DImode)
{
target = gen_lowpart (V2DImode, target);
- op0 = gen_lowpart (V2DImode, target);
- op1 = gen_lowpart (V2DImode, target);
+ op0 = gen_lowpart (V2DImode, op0);
+ op1 = gen_lowpart (V2DImode, op1);
}
}
+ /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
+ transformation we don't want; it is necessary for
+ rs6000_expand_vec_perm_const_1 but not for this use. So we
+ prepare for that by reversing the transformation here. */
+ if (BYTES_BIG_ENDIAN)
emit_insn (gen (target, op0, op1, perm0, perm1));
+ else
+ {
+ rtx p0 = GEN_INT (3 - INTVAL (perm1));
+ rtx p1 = GEN_INT (3 - INTVAL (perm0));
+ emit_insn (gen (target, op1, op0, p0, p1));
+ }
DONE;
})
@@ -1231,9 +1668,32 @@
(match_operand 4 "const_2_to_3_operand" "")])))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
- int mask = (INTVAL (operands[3]) << 1) | (INTVAL (operands[4]) - 2);
+ int op3, op4, mask;
+
+ /* For little endian, swap operands and invert/swap selectors
+ to get the correct xxpermdi. The operand swap sets up the
+ inputs as a little endian array. The selectors are swapped
+ because they are defined to use big endian ordering. The
+ selectors are inverted to get the correct doublewords for
+ little endian ordering. */
+ if (BYTES_BIG_ENDIAN)
+ {
+ op3 = INTVAL (operands[3]);
+ op4 = INTVAL (operands[4]);
+ }
+ else
+ {
+ op3 = 3 - INTVAL (operands[4]);
+ op4 = 3 - INTVAL (operands[3]);
+ }
+
+ mask = (op3 << 1) | (op4 - 2);
operands[3] = GEN_INT (mask);
+
+ if (BYTES_BIG_ENDIAN)
return "xxpermdi %x0,%x1,%x2,%3";
+ else
+ return "xxpermdi %x0,%x2,%x1,%3";
}
[(set_attr "type" "vecperm")])
@@ -1252,24 +1712,56 @@
;; Expanders for builtins
(define_expand "vsx_mergel_<mode>"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "")
- (vec_select:VSX_D
- (vec_concat:<VS_double>
- (match_operand:VSX_D 1 "vsx_register_operand" "")
- (match_operand:VSX_D 2 "vsx_register_operand" ""))
- (parallel [(const_int 1) (const_int 3)])))]
+ [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
+ (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
+ (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
- "")
+{
+ rtvec v;
+ rtx x;
+
+ /* Special handling for LE with -maltivec=be. */
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+ {
+ v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
+ x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
+ }
+ else
+ {
+ v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
+ x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
+ }
+
+ x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+ DONE;
+})
(define_expand "vsx_mergeh_<mode>"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "")
- (vec_select:VSX_D
- (vec_concat:<VS_double>
- (match_operand:VSX_D 1 "vsx_register_operand" "")
- (match_operand:VSX_D 2 "vsx_register_operand" ""))
- (parallel [(const_int 0) (const_int 2)])))]
+ [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
+ (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
+ (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
- "")
+{
+ rtvec v;
+ rtx x;
+
+ /* Special handling for LE with -maltivec=be. */
+ if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+ {
+ v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
+ x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
+ }
+ else
+ {
+ v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
+ x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
+ }
+
+ x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+ DONE;
+})
;; V2DF/V2DI splat
(define_insn "vsx_splat_<mode>"
@@ -1295,6 +1787,20 @@
(parallel
[(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ if (!BYTES_BIG_ENDIAN)
+ operands[2] = GEN_INT (3 - INTVAL (operands[2]));
+
+ return "xxspltw %x0,%x1,%2";
+}
+ [(set_attr "type" "vecperm")])
+
+(define_insn "vsx_xxspltw_<mode>_direct"
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
+ (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
+ (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
+ UNSPEC_VSX_XXSPLTW))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
"xxspltw %x0,%x1,%2"
[(set_attr "type" "vecperm")])
@@ -1308,7 +1814,12 @@
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
- "xxmrghw %x0,%x1,%x2"
+{
+ if (BYTES_BIG_ENDIAN)
+ return "xxmrghw %x0,%x1,%x2";
+ else
+ return "xxmrglw %x0,%x2,%x1";
+}
[(set_attr "type" "vecperm")])
(define_insn "vsx_xxmrglw_<mode>"
@@ -1320,7 +1831,12 @@
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
- "xxmrglw %x0,%x1,%x2"
+{
+ if (BYTES_BIG_ENDIAN)
+ return "xxmrglw %x0,%x1,%x2";
+ else
+ return "xxmrghw %x0,%x2,%x1";
+}
[(set_attr "type" "vecperm")])
;; Shift left double by word immediate
@@ -1483,3 +1999,27 @@
}"
[(set_attr "length" "20")
(set_attr "type" "veccomplex")])
+
+
+;; Power8 Vector fusion. The fused ops must be physically adjacent.
+(define_peephole
+ [(set (match_operand:P 0 "base_reg_operand" "")
+ (match_operand:P 1 "short_cint_operand" ""))
+ (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
+ (mem:VSX_M2 (plus:P (match_dup 0)
+ (match_operand:P 3 "int_reg_operand" ""))))]
+ "TARGET_VSX && TARGET_P8_FUSION"
+ "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
+ [(set_attr "length" "8")
+ (set_attr "type" "vecload")])
+
+(define_peephole
+ [(set (match_operand:P 0 "base_reg_operand" "")
+ (match_operand:P 1 "short_cint_operand" ""))
+ (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
+ (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
+ (match_dup 0))))]
+ "TARGET_VSX && TARGET_P8_FUSION"
+ "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
+ [(set_attr "length" "8")
+ (set_attr "type" "vecload")])