diff options
author | Andrew Hsieh <andrewhsieh@google.com> | 2014-06-20 13:50:27 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2014-06-20 02:04:52 +0000 |
commit | 3c447213baf828ebd01c45ad459552b3c9922c92 (patch) | |
tree | 198ed6386925c6ff5c24faf686b836aa74082836 /gcc-4.8/gcc/config/rs6000/vsx.md | |
parent | e1be674a8e345aaa025ad9f1d38bab4272301e1d (diff) | |
parent | f190d6284359da8ae8694b2d2e14b01602a959ed (diff) | |
download | toolchain_gcc-3c447213baf828ebd01c45ad459552b3c9922c92.tar.gz toolchain_gcc-3c447213baf828ebd01c45ad459552b3c9922c92.tar.bz2 toolchain_gcc-3c447213baf828ebd01c45ad459552b3c9922c92.zip |
Merge "Merge GCC 4.8.3"
Diffstat (limited to 'gcc-4.8/gcc/config/rs6000/vsx.md')
-rw-r--r-- | gcc-4.8/gcc/config/rs6000/vsx.md | 1206 |
1 files changed, 873 insertions, 333 deletions
diff --git a/gcc-4.8/gcc/config/rs6000/vsx.md b/gcc-4.8/gcc/config/rs6000/vsx.md index 3fafd9b27..5f5e4a3b8 100644 --- a/gcc-4.8/gcc/config/rs6000/vsx.md +++ b/gcc-4.8/gcc/config/rs6000/vsx.md @@ -34,11 +34,20 @@ (define_mode_iterator VSX_F [V4SF V2DF]) ;; Iterator for logical types supported by VSX -(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI]) +(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI]) ;; Iterator for memory move. Handle TImode specially to allow ;; it to use gprs as well as vsx registers. -(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF]) +(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI]) + +(define_mode_iterator VSX_M2 [V16QI + V8HI + V4SI + V2DI + V4SF + V2DF + V1TI + (TI "TARGET_VSX_TIMODE")]) ;; Map into the appropriate load/store name based on the type (define_mode_attr VSm [(V16QI "vw4") @@ -48,7 +57,8 @@ (V2DF "vd2") (V2DI "vd2") (DF "d") - (TI "vw4")]) + (V1TI "vd2") + (TI "vd2")]) ;; Map into the appropriate suffix based on the type (define_mode_attr VSs [(V16QI "sp") @@ -59,7 +69,8 @@ (V2DI "dp") (DF "dp") (SF "sp") - (TI "sp")]) + (V1TI "dp") + (TI "dp")]) ;; Map the register class used (define_mode_attr VSr [(V16QI "v") @@ -70,7 +81,8 @@ (V2DF "wd") (DF "ws") (SF "d") - (TI "wd")]) + (V1TI "v") + (TI "wt")]) ;; Map the register class used for float<->int conversions (define_mode_attr VSr2 [(V2DF "wd") @@ -115,7 +127,7 @@ (V4SF "v") (V2DI "v") (V2DF "v") - (TI "v") + (V1TI "v") (DF "s")]) ;; Appropriate type for add ops (and other simple FP ops) @@ -173,7 +185,8 @@ (V2DF "vecdouble")]) ;; Map the scalar mode for a vector type -(define_mode_attr VS_scalar [(V2DF "DF") +(define_mode_attr VS_scalar [(V1TI "TI") + (V2DF "DF") (V2DI "DI") (V4SF "SF") (V4SI "SI") @@ -184,7 +197,8 @@ (define_mode_attr VS_double [(V4SI "V8SI") (V4SF "V8SF") (V2DI "V4DI") - (V2DF "V4DF")]) + (V2DF "V4DF") + (V1TI "V2TI")]) ;; Constants for creating unspecs (define_c_enum "unspec" @@ -192,6 +206,8 @@ UNSPEC_VSX_CVDPSXWS UNSPEC_VSX_CVDPUXWS UNSPEC_VSX_CVSPDP + UNSPEC_VSX_CVSPDPN + UNSPEC_VSX_CVDPSPN UNSPEC_VSX_CVSXWDP UNSPEC_VSX_CVUXWDP UNSPEC_VSX_CVSXDSP @@ -204,77 +220,394 @@ UNSPEC_VSX_ROUND_I UNSPEC_VSX_ROUND_IC UNSPEC_VSX_SLDWI + UNSPEC_VSX_XXSPLTW ]) ;; VSX moves -(define_insn "*vsx_mov<mode>" - [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v") - (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (register_operand (operands[0], <MODE>mode) - || register_operand (operands[1], <MODE>mode))" + +;; The patterns for LE permuted loads and stores come before the general +;; VSX moves so they match first. +(define_insn_and_split "*vsx_le_perm_load_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (match_operand:VSX_D 1 "memory_operand" "Z"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX" + "#" + "!BYTES_BIG_ENDIAN && TARGET_VSX" + [(set (match_dup 2) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 1) (const_int 0)]))) + (set (match_dup 0) + (vec_select:<MODE> + (match_dup 2) + (parallel [(const_int 1) (const_int 0)])))] + " { - switch (which_alternative) - { - case 0: - case 3: - gcc_assert (MEM_P (operands[0]) - && GET_CODE (XEXP (operands[0], 0)) != PRE_INC - && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); - return "stx<VSm>x %x1,%y0"; + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]; +} + " + [(set_attr "type" "vecload") + (set_attr "length" "8")]) - case 1: - case 4: - gcc_assert (MEM_P (operands[1]) - && GET_CODE (XEXP (operands[1], 0)) != PRE_INC - && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY); - return "lx<VSm>x %x0,%y1"; +(define_insn_and_split "*vsx_le_perm_load_<mode>" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") + (match_operand:VSX_W 1 "memory_operand" "Z"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX" + "#" + "!BYTES_BIG_ENDIAN && TARGET_VSX" + [(set (match_dup 2) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)]))) + (set (match_dup 0) + (vec_select:<MODE> + (match_dup 2) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] + " +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]; +} + " + [(set_attr "type" "vecload") + (set_attr "length" "8")]) - case 2: - case 5: - return "xxlor %x0,%x1,%x1"; +(define_insn_and_split "*vsx_le_perm_load_v8hi" + [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") + (match_operand:V8HI 1 "memory_operand" "Z"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX" + "#" + "!BYTES_BIG_ENDIAN && TARGET_VSX" + [(set (match_dup 2) + (vec_select:V8HI + (match_dup 1) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))) + (set (match_dup 0) + (vec_select:V8HI + (match_dup 2) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + " +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]; +} + " + [(set_attr "type" "vecload") + (set_attr "length" "8")]) - case 6: - case 7: - case 8: - case 11: - return "#"; +(define_insn_and_split "*vsx_le_perm_load_v16qi" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (match_operand:V16QI 1 "memory_operand" "Z"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX" + "#" + "!BYTES_BIG_ENDIAN && TARGET_VSX" + [(set (match_dup 2) + (vec_select:V16QI + (match_dup 1) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))) + (set (match_dup 0) + (vec_select:V16QI + (match_dup 2) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + " +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]; +} + " + [(set_attr "type" "vecload") + (set_attr "length" "8")]) - case 9: - case 10: - return "xxlxor %x0,%x0,%x0"; +(define_insn "*vsx_le_perm_store_<mode>" + [(set (match_operand:VSX_D 0 "memory_operand" "=Z") + (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX" + "#" + [(set_attr "type" "vecstore") + (set_attr "length" "12")]) + +(define_split + [(set (match_operand:VSX_D 0 "memory_operand" "") + (match_operand:VSX_D 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed" + [(set (match_dup 2) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 1) (const_int 0)]))) + (set (match_dup 0) + (vec_select:<MODE> + (match_dup 2) + (parallel [(const_int 1) (const_int 0)])))] +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) + : operands[1]; +}) - case 12: - return output_vec_const_move (operands); +;; The post-reload split requires that we re-permute the source +;; register in case it is still live. +(define_split + [(set (match_operand:VSX_D 0 "memory_operand" "") + (match_operand:VSX_D 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed" + [(set (match_dup 1) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 1) (const_int 0)]))) + (set (match_dup 0) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 1) (const_int 0)]))) + (set (match_dup 1) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 1) (const_int 0)])))] + "") - case 13: - gcc_assert (MEM_P (operands[0]) - && GET_CODE (XEXP (operands[0], 0)) != PRE_INC - && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); - return "stvx %1,%y0"; +(define_insn "*vsx_le_perm_store_<mode>" + [(set (match_operand:VSX_W 0 "memory_operand" "=Z") + (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX" + "#" + [(set_attr "type" "vecstore") + (set_attr "length" "12")]) + +(define_split + [(set (match_operand:VSX_W 0 "memory_operand" "") + (match_operand:VSX_W 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed" + [(set (match_dup 2) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)]))) + (set (match_dup 0) + (vec_select:<MODE> + (match_dup 2) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) + : operands[1]; +}) - case 14: - gcc_assert (MEM_P (operands[0]) - && GET_CODE (XEXP (operands[0], 0)) != PRE_INC - && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); - return "lvx %0,%y1"; +;; The post-reload split requires that we re-permute the source +;; register in case it is still live. +(define_split + [(set (match_operand:VSX_W 0 "memory_operand" "") + (match_operand:VSX_W 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed" + [(set (match_dup 1) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)]))) + (set (match_dup 0) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)]))) + (set (match_dup 1) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] + "") - default: - gcc_unreachable (); - } +(define_insn "*vsx_le_perm_store_v8hi" + [(set (match_operand:V8HI 0 "memory_operand" "=Z") + (match_operand:V8HI 1 "vsx_register_operand" "+wa"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX" + "#" + [(set_attr "type" "vecstore") + (set_attr "length" "12")]) + +(define_split + [(set (match_operand:V8HI 0 "memory_operand" "") + (match_operand:V8HI 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed" + [(set (match_dup 2) + (vec_select:V8HI + (match_dup 1) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))) + (set (match_dup 0) + (vec_select:V8HI + (match_dup 2) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) + : operands[1]; +}) + +;; The post-reload split requires that we re-permute the source +;; register in case it is still live. +(define_split + [(set (match_operand:V8HI 0 "memory_operand" "") + (match_operand:V8HI 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed" + [(set (match_dup 1) + (vec_select:V8HI + (match_dup 1) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))) + (set (match_dup 0) + (vec_select:V8HI + (match_dup 1) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))) + (set (match_dup 1) + (vec_select:V8HI + (match_dup 1) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "") + +(define_insn "*vsx_le_perm_store_v16qi" + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (match_operand:V16QI 1 "vsx_register_operand" "+wa"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX" + "#" + [(set_attr "type" "vecstore") + (set_attr "length" "12")]) + +(define_split + [(set (match_operand:V16QI 0 "memory_operand" "") + (match_operand:V16QI 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed" + [(set (match_dup 2) + (vec_select:V16QI + (match_dup 1) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))) + (set (match_dup 0) + (vec_select:V16QI + (match_dup 2) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) + : operands[1]; +}) + +;; The post-reload split requires that we re-permute the source +;; register in case it is still live. +(define_split + [(set (match_operand:V16QI 0 "memory_operand" "") + (match_operand:V16QI 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed" + [(set (match_dup 1) + (vec_select:V16QI + (match_dup 1) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))) + (set (match_dup 0) + (vec_select:V16QI + (match_dup 1) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))) + (set (match_dup 1) + (vec_select:V16QI + (match_dup 1) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "") + + +(define_insn "*vsx_mov<mode>" + [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v") + (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))] + "VECTOR_MEM_VSX_P (<MODE>mode) + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[1], <MODE>mode))" +{ + return rs6000_output_move_128bit (operands); } - [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")]) - -;; Unlike other VSX moves, allow the GPRs, since a normal use of TImode is for -;; unions. However for plain data movement, slightly favor the vector loads -(define_insn "*vsx_movti" - [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,?Y,?r,?r,wa,v,v,wZ") - (match_operand:TI 1 "input_operand" "wa,Z,wa,r,Y,r,j,W,wZ,v"))] - "VECTOR_MEM_VSX_P (TImode) + [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload") + (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")]) + +;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal +;; use of TImode is for unions. However for plain data movement, slightly +;; favor the vector loads +(define_insn "*vsx_movti_64bit" + [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r") + (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode) + && (register_operand (operands[0], TImode) + || register_operand (operands[1], TImode))" +{ + return rs6000_output_move_128bit (operands); +} + [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*") + (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")]) + +(define_insn "*vsx_movti_32bit" + [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r") + (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v,r,r, Q, Y, r,n"))] + "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode) && (register_operand (operands[0], TImode) || register_operand (operands[1], TImode))" { @@ -290,27 +623,45 @@ return "xxlor %x0,%x1,%x1"; case 3: + return "xxlxor %x0,%x0,%x0"; + case 4: + return output_vec_const_move (operands); + case 5: - return "#"; + return "stvx %1,%y0"; case 6: - return "xxlxor %x0,%x0,%x0"; + return "lvx %0,%y1"; case 7: - return output_vec_const_move (operands); + if (TARGET_STRING) + return \"stswi %1,%P0,16\"; case 8: - return "stvx %1,%y0"; + return \"#\"; case 9: - return "lvx %0,%y1"; + /* If the address is not used in the output, we can use lsi. Otherwise, + fall through to generating four loads. */ + if (TARGET_STRING + && ! reg_overlap_mentioned_p (operands[0], operands[1])) + return \"lswi %0,%P1,16\"; + /* ... fall through ... */ + case 10: + case 11: + case 12: + return \"#\"; default: gcc_unreachable (); } } - [(set_attr "type" "vecstore,vecload,vecsimple,*,*,*,vecsimple,*,vecstore,vecload")]) + [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store_ux,store_ux,load_ux,load_ux, *, *") + (set_attr "length" " 4, 4, 4, 4, 8, 4, 4, 16, 16, 16, 16,16,16") + (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING") + (const_string "always") + (const_string "conditional")))]) ;; Explicit load/store expanders for the builtin functions (define_expand "vsx_load_<mode>" @@ -320,46 +671,48 @@ "") (define_expand "vsx_store_<mode>" - [(set (match_operand:VEC_M 0 "memory_operand" "") - (match_operand:VEC_M 1 "vsx_register_operand" ""))] + [(set (match_operand:VSX_M 0 "memory_operand" "") + (match_operand:VSX_M 1 "vsx_register_operand" ""))] "VECTOR_MEM_VSX_P (<MODE>mode)" "") -;; VSX scalar and vector floating point arithmetic instructions +;; VSX vector floating point arithmetic instructions. The VSX scalar +;; instructions are now combined with the insn for the traditional floating +;; point unit. (define_insn "*vsx_add<mode>3" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (plus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>add<VSs> %x0,%x1,%x2" + "xvadd<VSs> %x0,%x1,%x2" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "*vsx_sub<mode>3" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (minus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>sub<VSs> %x0,%x1,%x2" + "xvsub<VSs> %x0,%x1,%x2" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "*vsx_mul<mode>3" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>mul<VSs> %x0,%x1,%x2" - [(set_attr "type" "<VStype_mul>") + "xvmul<VSs> %x0,%x1,%x2" + [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_mul>")]) (define_insn "*vsx_div<mode>3" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (div:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>div<VSs> %x0,%x1,%x2" + "xvdiv<VSs> %x0,%x1,%x2" [(set_attr "type" "<VStype_div>") (set_attr "fp_type" "<VSfptype_div>")]) @@ -402,94 +755,72 @@ (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "vsx_fre<mode>2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")] UNSPEC_FRES))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>re<VSs> %x0,%x1" + "xvre<VSs> %x0,%x1" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "*vsx_neg<mode>2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (neg:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>neg<VSs> %x0,%x1" + "xvneg<VSs> %x0,%x1" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "*vsx_abs<mode>2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>abs<VSs> %x0,%x1" + "xvabs<VSs> %x0,%x1" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "vsx_nabs<mode>2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (neg:VSX_B - (abs:VSX_B - (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa"))))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (neg:VSX_F + (abs:VSX_F + (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa"))))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>nabs<VSs> %x0,%x1" + "xvnabs<VSs> %x0,%x1" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "vsx_smax<mode>3" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (smax:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>max<VSs> %x0,%x1,%x2" + "xvmax<VSs> %x0,%x1,%x2" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "*vsx_smin<mode>3" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (smin:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>min<VSs> %x0,%x1,%x2" + "xvmin<VSs> %x0,%x1,%x2" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) -;; Special VSX version of smin/smax for single precision floating point. Since -;; both numbers are rounded to single precision, we can just use the DP version -;; of the instruction. - -(define_insn "*vsx_smaxsf3" - [(set (match_operand:SF 0 "vsx_register_operand" "=f") - (smax:SF (match_operand:SF 1 "vsx_register_operand" "f") - (match_operand:SF 2 "vsx_register_operand" "f")))] - "VECTOR_UNIT_VSX_P (DFmode)" - "xsmaxdp %x0,%x1,%x2" - [(set_attr "type" "fp") - (set_attr "fp_type" "fp_addsub_d")]) - -(define_insn "*vsx_sminsf3" - [(set (match_operand:SF 0 "vsx_register_operand" "=f") - (smin:SF (match_operand:SF 1 "vsx_register_operand" "f") - (match_operand:SF 2 "vsx_register_operand" "f")))] - "VECTOR_UNIT_VSX_P (DFmode)" - "xsmindp %x0,%x1,%x2" - [(set_attr "type" "fp") - (set_attr "fp_type" "fp_addsub_d")]) - (define_insn "*vsx_sqrt<mode>2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (sqrt:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>sqrt<VSs> %x0,%x1" + "xvsqrt<VSs> %x0,%x1" [(set_attr "type" "<VStype_sqrt>") (set_attr "fp_type" "<VSfptype_sqrt>")]) (define_insn "*vsx_rsqrte<mode>2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")] UNSPEC_RSQRT))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>rsqrte<VSs> %x0,%x1" + "xvrsqrte<VSs> %x0,%x1" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) @@ -528,26 +859,10 @@ [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) -;; Fused vector multiply/add instructions Support the classical DF versions of -;; fma, which allows the target to be a separate register from the 3 inputs. -;; Under VSX, the target must be either the addend or the first multiply. -;; Where we can, also do the same for the Altivec V4SF fmas. - -(define_insn "*vsx_fmadf4" - [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d") - (fma:DF - (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d") - (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d") - (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))] - "VECTOR_UNIT_VSX_P (DFmode)" - "@ - xsmaddadp %x0,%x1,%x2 - xsmaddmdp %x0,%x1,%x3 - xsmaddadp %x0,%x1,%x2 - xsmaddmdp %x0,%x1,%x3 - fmadd %0,%1,%2,%3" - [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_d")]) +;; Fused vector multiply/add instructions. Support the classical Altivec +;; versions of fma, which allows the target to be a separate register from the +;; 3 inputs. Under VSX, the target must be either the addend or the first +;; multiply. (define_insn "*vsx_fmav4sf4" [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v") @@ -578,23 +893,6 @@ xvmaddmdp %x0,%x1,%x3" [(set_attr "type" "vecdouble")]) -(define_insn "*vsx_fmsdf4" - [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d") - (fma:DF - (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d") - (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d") - (neg:DF - (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))] - "VECTOR_UNIT_VSX_P (DFmode)" - "@ - xsmsubadp %x0,%x1,%x2 - xsmsubmdp %x0,%x1,%x3 - xsmsubadp %x0,%x1,%x2 - xsmsubmdp %x0,%x1,%x3 - fmsub %0,%1,%2,%3" - [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_d")]) - (define_insn "*vsx_fms<mode>4" [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa") (fma:VSX_F @@ -604,29 +902,12 @@ (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))] "VECTOR_UNIT_VSX_P (<MODE>mode)" "@ - x<VSv>msuba<VSs> %x0,%x1,%x2 - x<VSv>msubm<VSs> %x0,%x1,%x3 - x<VSv>msuba<VSs> %x0,%x1,%x2 - x<VSv>msubm<VSs> %x0,%x1,%x3" + xvmsuba<VSs> %x0,%x1,%x2 + xvmsubm<VSs> %x0,%x1,%x3 + xvmsuba<VSs> %x0,%x1,%x2 + xvmsubm<VSs> %x0,%x1,%x3" [(set_attr "type" "<VStype_mul>")]) -(define_insn "*vsx_nfmadf4" - [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d") - (neg:DF - (fma:DF - (match_operand:DF 1 "vsx_register_operand" "ws,ws,wa,wa,d") - (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d") - (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))] - "VECTOR_UNIT_VSX_P (DFmode)" - "@ - xsnmaddadp %x0,%x1,%x2 - xsnmaddmdp %x0,%x1,%x3 - xsnmaddadp %x0,%x1,%x2 - xsnmaddmdp %x0,%x1,%x3 - fnmadd %0,%1,%2,%3" - [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_d")]) - (define_insn "*vsx_nfma<mode>4" [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa") (neg:VSX_F @@ -636,31 +917,13 @@ (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))] "VECTOR_UNIT_VSX_P (<MODE>mode)" "@ - x<VSv>nmadda<VSs> %x0,%x1,%x2 - x<VSv>nmaddm<VSs> %x0,%x1,%x3 - x<VSv>nmadda<VSs> %x0,%x1,%x2 - x<VSv>nmaddm<VSs> %x0,%x1,%x3" + xvnmadda<VSs> %x0,%x1,%x2 + xvnmaddm<VSs> %x0,%x1,%x3 + xvnmadda<VSs> %x0,%x1,%x2 + xvnmaddm<VSs> %x0,%x1,%x3" [(set_attr "type" "<VStype_mul>") (set_attr "fp_type" "<VSfptype_mul>")]) -(define_insn "*vsx_nfmsdf4" - [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d") - (neg:DF - (fma:DF - (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d") - (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d") - (neg:DF - (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))))] - "VECTOR_UNIT_VSX_P (DFmode)" - "@ - xsnmsubadp %x0,%x1,%x2 - xsnmsubmdp %x0,%x1,%x3 - xsnmsubadp %x0,%x1,%x2 - xsnmsubmdp %x0,%x1,%x3 - fnmsub %0,%1,%2,%3" - [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_d")]) - (define_insn "*vsx_nfmsv4sf4" [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v") (neg:V4SF @@ -722,16 +985,6 @@ [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) -;; Floating point scalar compare -(define_insn "*vsx_cmpdf_internal1" - [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,?y") - (compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "ws,wa") - (match_operand:DF 2 "gpc_reg_operand" "ws,wa")))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT - && VECTOR_UNIT_VSX_P (DFmode)" - "xscmpudp %0,%x1,%x2" - [(set_attr "type" "fpcompare")]) - ;; Compare vectors producing a vector result and a predicate, setting CR6 to ;; indicate a combined status (define_insn "*vsx_eq_<mode>_p" @@ -798,13 +1051,13 @@ ;; Copy sign (define_insn "vsx_copysign<mode>3" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (unspec:VSX_B - [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (unspec:VSX_F + [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")] UNSPEC_COPYSIGN))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>cpsgn<VSs> %x0,%x2,%x1" + "xvcpsgn<VSs> %x0,%x2,%x1" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) @@ -865,10 +1118,10 @@ (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "vsx_btrunc<mode>2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (fix:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>r<VSs>iz %x0,%x1" + "xvr<VSs>iz %x0,%x1" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) @@ -882,20 +1135,20 @@ (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "vsx_floor<mode>2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")] UNSPEC_FRIM))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>r<VSs>im %x0,%x1" + "xvr<VSs>im %x0,%x1" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "vsx_ceil<mode>2" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") - (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") + (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")] UNSPEC_FRIP))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>r<VSs>ip %x0,%x1" + "xvr<VSs>ip %x0,%x1" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) @@ -942,6 +1195,40 @@ "xscvspdp %x0,%x1" [(set_attr "type" "fp")]) +;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs +(define_insn "vsx_xscvdpspn" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa") + (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")] + UNSPEC_VSX_CVDPSPN))] + "TARGET_XSCVDPSPN" + "xscvdpspn %x0,%x1" + [(set_attr "type" "fp")]) + +(define_insn "vsx_xscvspdpn" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa") + (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")] + UNSPEC_VSX_CVSPDPN))] + "TARGET_XSCVSPDPN" + "xscvspdpn %x0,%x1" + [(set_attr "type" "fp")]) + +(define_insn "vsx_xscvdpspn_scalar" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")] + UNSPEC_VSX_CVDPSPN))] + "TARGET_XSCVDPSPN" + "xscvdpspn %x0,%x1" + [(set_attr "type" "fp")]) + +;; Used by direct move to move a SFmode value from GPR to VSX register +(define_insn "vsx_xscvspdpn_directmove" + [(set (match_operand:SF 0 "vsx_register_operand" "=wa") + (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSPDPN))] + "TARGET_XSCVSPDPN" + "xscvspdpn %x0,%x1" + [(set_attr "type" "fp")]) + ;; Convert from 64-bit to 32-bit types ;; Note, favor the Altivec registers since the usual use of these instructions ;; is in vector converts and we need to use the Altivec vperm instruction. @@ -1027,73 +1314,21 @@ (set_attr "fp_type" "<VSfptype_simple>")]) -;; Logical and permute operations -(define_insn "*vsx_and<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (and:VSX_L - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa") - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode)" - "xxland %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*vsx_ior<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa") - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode)" - "xxlor %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*vsx_xor<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (xor:VSX_L - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa") - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode)" - "xxlxor %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*vsx_one_cmpl<mode>2" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (not:VSX_L - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode)" - "xxlnor %x0,%x1,%x1" - [(set_attr "type" "vecsimple")]) - -(define_insn "*vsx_nor<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (not:VSX_L - (ior:VSX_L - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa") - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))] - "VECTOR_MEM_VSX_P (<MODE>mode)" - "xxlnor %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*vsx_andc<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (and:VSX_L - (not:VSX_L - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")) - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode)" - "xxlandc %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) - - ;; Permute operations ;; Build a V2DF/V2DI vector from two scalars (define_insn "vsx_concat_<mode>" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa") - (unspec:VSX_D - [(match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa") - (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")] - UNSPEC_VSX_CONCAT))] + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?wa") + (vec_concat:VSX_D + (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa") + (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")))] "VECTOR_MEM_VSX_P (<MODE>mode)" - "xxpermdi %x0,%x1,%x2,0" +{ + if (BYTES_BIG_ENDIAN) + return "xxpermdi %x0,%x1,%x2,0"; + else + return "xxpermdi %x0,%x2,%x1,0"; +} [(set_attr "type" "vecperm")]) ;; Special purpose concat using xxpermdi to glue two single precision values @@ -1106,9 +1341,176 @@ (match_operand:SF 2 "vsx_register_operand" "f,f")] UNSPEC_VSX_CONCAT))] "VECTOR_MEM_VSX_P (V2DFmode)" - "xxpermdi %x0,%x1,%x2,0" +{ + if (BYTES_BIG_ENDIAN) + return "xxpermdi %x0,%x1,%x2,0"; + else + return "xxpermdi %x0,%x2,%x1,0"; +} + [(set_attr "type" "vecperm")]) + +;; xxpermdi for little endian loads and stores. We need several of +;; these since the form of the PARALLEL differs by mode. +(define_insn "*vsx_xxpermdi2_le_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (vec_select:VSX_D + (match_operand:VSX_D 1 "vsx_register_operand" "wa") + (parallel [(const_int 1) (const_int 0)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" + "xxpermdi %x0,%x1,%x1,2" + [(set_attr "type" "vecperm")]) + +(define_insn "*vsx_xxpermdi4_le_<mode>" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") + (vec_select:VSX_W + (match_operand:VSX_W 1 "vsx_register_operand" "wa") + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" + "xxpermdi %x0,%x1,%x1,2" [(set_attr "type" "vecperm")]) +(define_insn "*vsx_xxpermdi8_le_V8HI" + [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") + (vec_select:V8HI + (match_operand:V8HI 1 "vsx_register_operand" "wa") + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)" + "xxpermdi %x0,%x1,%x1,2" + [(set_attr "type" "vecperm")]) + +(define_insn "*vsx_xxpermdi16_le_V16QI" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (vec_select:V16QI + (match_operand:V16QI 1 "vsx_register_operand" "wa") + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)" + "xxpermdi %x0,%x1,%x1,2" + [(set_attr "type" "vecperm")]) + +;; lxvd2x for little endian loads. We need several of +;; these since the form of the PARALLEL differs by mode. +(define_insn "*vsx_lxvd2x2_le_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (vec_select:VSX_D + (match_operand:VSX_D 1 "memory_operand" "Z") + (parallel [(const_int 1) (const_int 0)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" + "lxvd2x %x0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "*vsx_lxvd2x4_le_<mode>" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") + (vec_select:VSX_W + (match_operand:VSX_W 1 "memory_operand" "Z") + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" + "lxvd2x %x0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "*vsx_lxvd2x8_le_V8HI" + [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") + (vec_select:V8HI + (match_operand:V8HI 1 "memory_operand" "Z") + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)" + "lxvd2x %x0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "*vsx_lxvd2x16_le_V16QI" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (vec_select:V16QI + (match_operand:V16QI 1 "memory_operand" "Z") + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)" + "lxvd2x %x0,%y1" + [(set_attr "type" "vecload")]) + +;; stxvd2x for little endian stores. We need several of +;; these since the form of the PARALLEL differs by mode. +(define_insn "*vsx_stxvd2x2_le_<mode>" + [(set (match_operand:VSX_D 0 "memory_operand" "=Z") + (vec_select:VSX_D + (match_operand:VSX_D 1 "vsx_register_operand" "wa") + (parallel [(const_int 1) (const_int 0)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" + "stxvd2x %x1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "*vsx_stxvd2x4_le_<mode>" + [(set (match_operand:VSX_W 0 "memory_operand" "=Z") + (vec_select:VSX_W + (match_operand:VSX_W 1 "vsx_register_operand" "wa") + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" + "stxvd2x %x1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "*vsx_stxvd2x8_le_V8HI" + [(set (match_operand:V8HI 0 "memory_operand" "=Z") + (vec_select:V8HI + (match_operand:V8HI 1 "vsx_register_operand" "wa") + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)" + "stxvd2x %x1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "*vsx_stxvd2x16_le_V16QI" + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (vec_select:V16QI + (match_operand:V16QI 1 "vsx_register_operand" "wa") + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)" + "stxvd2x %x1,%y0" + [(set_attr "type" "vecstore")]) + +;; Convert a TImode value into V1TImode +(define_expand "vsx_set_v1ti" + [(match_operand:V1TI 0 "nonimmediate_operand" "") + (match_operand:V1TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "input_operand" "") + (match_operand:QI 3 "u5bit_cint_operand" "")] + "VECTOR_MEM_VSX_P (V1TImode)" +{ + if (operands[3] != const0_rtx) + gcc_unreachable (); + + emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1])); + DONE; +}) + ;; Set the element of a V2DI/VD2F mode (define_insn "vsx_set_<mode>" [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa") @@ -1118,9 +1520,10 @@ UNSPEC_VSX_SET))] "VECTOR_MEM_VSX_P (<MODE>mode)" { - if (INTVAL (operands[3]) == 0) + int idx_first = BYTES_BIG_ENDIAN ? 0 : 1; + if (INTVAL (operands[3]) == idx_first) return \"xxpermdi %x0,%x2,%x1,1\"; - else if (INTVAL (operands[3]) == 1) + else if (INTVAL (operands[3]) == 1 - idx_first) return \"xxpermdi %x0,%x1,%x2,0\"; else gcc_unreachable (); @@ -1135,8 +1538,12 @@ [(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))] "VECTOR_MEM_VSX_P (<MODE>mode)" { + int fldDM; gcc_assert (UINTVAL (operands[2]) <= 1); - operands[3] = GEN_INT (INTVAL (operands[2]) << 1); + fldDM = INTVAL (operands[2]) << 1; + if (!BYTES_BIG_ENDIAN) + fldDM = 3 - fldDM; + operands[3] = GEN_INT (fldDM); return \"xxpermdi %x0,%x1,%x1,%3\"; } [(set_attr "type" "vecperm")]) @@ -1149,7 +1556,26 @@ (parallel [(const_int 0)])))] "VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN" "lxsd%U1x %x0,%y1" - [(set_attr "type" "fpload") + [(set (attr "type") + (if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "fpload_ux") + (const_string "fpload"))) + (set_attr "length" "4")]) + +;; Optimize extracting element 1 from memory for little endian +(define_insn "*vsx_extract_<mode>_one_le" + [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa") + (vec_select:<VS_scalar> + (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z") + (parallel [(const_int 1)])))] + "VECTOR_MEM_VSX_P (<MODE>mode) && !WORDS_BIG_ENDIAN" + "lxsd%U1x %x0,%y1" + [(set (attr "type") + (if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "fpload_ux") + (const_string "fpload"))) (set_attr "length" "4")]) ;; Extract a SF element from V4SF @@ -1172,7 +1598,7 @@ rtx op2 = operands[2]; rtx op3 = operands[3]; rtx tmp; - HOST_WIDE_INT ele = INTVAL (op2); + HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2); if (ele == 0) tmp = op1; @@ -1213,11 +1639,22 @@ if (<MODE>mode != V2DImode) { target = gen_lowpart (V2DImode, target); - op0 = gen_lowpart (V2DImode, target); - op1 = gen_lowpart (V2DImode, target); + op0 = gen_lowpart (V2DImode, op0); + op1 = gen_lowpart (V2DImode, op1); } } + /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a + transformation we don't want; it is necessary for + rs6000_expand_vec_perm_const_1 but not for this use. So we + prepare for that by reversing the transformation here. */ + if (BYTES_BIG_ENDIAN) emit_insn (gen (target, op0, op1, perm0, perm1)); + else + { + rtx p0 = GEN_INT (3 - INTVAL (perm1)); + rtx p1 = GEN_INT (3 - INTVAL (perm0)); + emit_insn (gen (target, op1, op0, p0, p1)); + } DONE; }) @@ -1231,9 +1668,32 @@ (match_operand 4 "const_2_to_3_operand" "")])))] "VECTOR_MEM_VSX_P (<MODE>mode)" { - int mask = (INTVAL (operands[3]) << 1) | (INTVAL (operands[4]) - 2); + int op3, op4, mask; + + /* For little endian, swap operands and invert/swap selectors + to get the correct xxpermdi. The operand swap sets up the + inputs as a little endian array. The selectors are swapped + because they are defined to use big endian ordering. The + selectors are inverted to get the correct doublewords for + little endian ordering. */ + if (BYTES_BIG_ENDIAN) + { + op3 = INTVAL (operands[3]); + op4 = INTVAL (operands[4]); + } + else + { + op3 = 3 - INTVAL (operands[4]); + op4 = 3 - INTVAL (operands[3]); + } + + mask = (op3 << 1) | (op4 - 2); operands[3] = GEN_INT (mask); + + if (BYTES_BIG_ENDIAN) return "xxpermdi %x0,%x1,%x2,%3"; + else + return "xxpermdi %x0,%x2,%x1,%3"; } [(set_attr "type" "vecperm")]) @@ -1252,24 +1712,56 @@ ;; Expanders for builtins (define_expand "vsx_mergel_<mode>" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "") - (vec_select:VSX_D - (vec_concat:<VS_double> - (match_operand:VSX_D 1 "vsx_register_operand" "") - (match_operand:VSX_D 2 "vsx_register_operand" "")) - (parallel [(const_int 1) (const_int 3)])))] + [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) + (use (match_operand:VSX_D 1 "vsx_register_operand" "")) + (use (match_operand:VSX_D 2 "vsx_register_operand" ""))] "VECTOR_MEM_VSX_P (<MODE>mode)" - "") +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) (define_expand "vsx_mergeh_<mode>" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "") - (vec_select:VSX_D - (vec_concat:<VS_double> - (match_operand:VSX_D 1 "vsx_register_operand" "") - (match_operand:VSX_D 2 "vsx_register_operand" "")) - (parallel [(const_int 0) (const_int 2)])))] + [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) + (use (match_operand:VSX_D 1 "vsx_register_operand" "")) + (use (match_operand:VSX_D 2 "vsx_register_operand" ""))] "VECTOR_MEM_VSX_P (<MODE>mode)" - "") +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) ;; V2DF/V2DI splat (define_insn "vsx_splat_<mode>" @@ -1295,6 +1787,20 @@ (parallel [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))] "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + return "xxspltw %x0,%x1,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "vsx_xxspltw_<mode>_direct" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa") + (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,wa") + (match_operand:QI 2 "u5bit_cint_operand" "i,i")] + UNSPEC_VSX_XXSPLTW))] + "VECTOR_MEM_VSX_P (<MODE>mode)" "xxspltw %x0,%x1,%2" [(set_attr "type" "vecperm")]) @@ -1308,7 +1814,12 @@ (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] "VECTOR_MEM_VSX_P (<MODE>mode)" - "xxmrghw %x0,%x1,%x2" +{ + if (BYTES_BIG_ENDIAN) + return "xxmrghw %x0,%x1,%x2"; + else + return "xxmrglw %x0,%x2,%x1"; +} [(set_attr "type" "vecperm")]) (define_insn "vsx_xxmrglw_<mode>" @@ -1320,7 +1831,12 @@ (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] "VECTOR_MEM_VSX_P (<MODE>mode)" - "xxmrglw %x0,%x1,%x2" +{ + if (BYTES_BIG_ENDIAN) + return "xxmrglw %x0,%x1,%x2"; + else + return "xxmrghw %x0,%x2,%x1"; +} [(set_attr "type" "vecperm")]) ;; Shift left double by word immediate @@ -1483,3 +1999,27 @@ }" [(set_attr "length" "20") (set_attr "type" "veccomplex")]) + + +;; Power8 Vector fusion. The fused ops must be physically adjacent. +(define_peephole + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "short_cint_operand" "")) + (set (match_operand:VSX_M2 2 "vsx_register_operand" "") + (mem:VSX_M2 (plus:P (match_dup 0) + (match_operand:P 3 "int_reg_operand" ""))))] + "TARGET_VSX && TARGET_P8_FUSION" + "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3" + [(set_attr "length" "8") + (set_attr "type" "vecload")]) + +(define_peephole + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "short_cint_operand" "")) + (set (match_operand:VSX_M2 2 "vsx_register_operand" "") + (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "") + (match_dup 0))))] + "TARGET_VSX && TARGET_P8_FUSION" + "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3" + [(set_attr "length" "8") + (set_attr "type" "vecload")]) |