diff options
Diffstat (limited to 'gcc-4.8/gcc/config/rs6000/altivec.md')
-rw-r--r-- | gcc-4.8/gcc/config/rs6000/altivec.md | 1928 |
1 files changed, 1457 insertions, 471 deletions
diff --git a/gcc-4.8/gcc/config/rs6000/altivec.md b/gcc-4.8/gcc/config/rs6000/altivec.md index 1b0b5c3fb..f13b5ceb3 100644 --- a/gcc-4.8/gcc/config/rs6000/altivec.md +++ b/gcc-4.8/gcc/config/rs6000/altivec.md @@ -41,15 +41,12 @@ UNSPEC_VMULOSB UNSPEC_VMULOUH UNSPEC_VMULOSH - UNSPEC_VPKUHUM - UNSPEC_VPKUWUM UNSPEC_VPKPX - UNSPEC_VPKSHSS - UNSPEC_VPKSWSS - UNSPEC_VPKUHUS - UNSPEC_VPKSHUS - UNSPEC_VPKUWUS - UNSPEC_VPKSWUS + UNSPEC_VPACK_SIGN_SIGN_SAT + UNSPEC_VPACK_SIGN_UNS_SAT + UNSPEC_VPACK_UNS_UNS_SAT + UNSPEC_VPACK_UNS_UNS_MOD + UNSPEC_VPACK_UNS_UNS_MOD_DIRECT UNSPEC_VSLV4SI UNSPEC_VSLO UNSPEC_VSR @@ -71,12 +68,12 @@ UNSPEC_VLOGEFP UNSPEC_VEXPTEFP UNSPEC_VLSDOI - UNSPEC_VUPKHSB + UNSPEC_VUNPACK_HI_SIGN + UNSPEC_VUNPACK_LO_SIGN + UNSPEC_VUNPACK_HI_SIGN_DIRECT + UNSPEC_VUNPACK_LO_SIGN_DIRECT UNSPEC_VUPKHPX - UNSPEC_VUPKHSH - UNSPEC_VUPKLSB UNSPEC_VUPKLPX - UNSPEC_VUPKLSH UNSPEC_DST UNSPEC_DSTT UNSPEC_DSTST @@ -134,6 +131,21 @@ UNSPEC_VUPKLS_V4SF UNSPEC_VUPKHU_V4SF UNSPEC_VUPKLU_V4SF + UNSPEC_VGBBD + UNSPEC_VMRGH_DIRECT + UNSPEC_VMRGL_DIRECT + UNSPEC_VSPLT_DIRECT + UNSPEC_VSUMSWS_DIRECT + UNSPEC_VADDCUQ + UNSPEC_VADDEUQM + UNSPEC_VADDECUQ + UNSPEC_VSUBCUQ + UNSPEC_VSUBEUQM + UNSPEC_VSUBECUQ + UNSPEC_VBPERMQ + UNSPEC_BCDADD + UNSPEC_BCDSUB + UNSPEC_BCD_OVERFLOW ]) (define_c_enum "unspecv" @@ -146,6 +158,8 @@ ;; Vec int modes (define_mode_iterator VI [V4SI V8HI V16QI]) +;; Like VI, but add ISA 2.07 integer vector ops +(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI]) ;; Short vec in modes (define_mode_iterator VIshort [V8HI V16QI]) ;; Vec float modes @@ -154,13 +168,24 @@ (define_mode_iterator V [V4SI V8HI V16QI V4SF]) ;; Vec modes for move/logical/permute ops, include vector types for move not ;; otherwise handled by altivec (v2df, v2di, ti) -(define_mode_iterator VM [V4SI V8HI V16QI V4SF V2DF V2DI TI]) +(define_mode_iterator VM [V4SI V8HI V16QI V4SF V2DF V2DI V1TI TI]) ;; Like VM, except don't do TImode -(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI]) - -(define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")]) -(define_mode_attr VI_scalar [(V4SI "SI") (V8HI "HI") (V16QI "QI")]) +(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI V1TI]) + +(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")]) +(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")]) +(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)") + (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)") + (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)") + (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)") + (V1TI "VECTOR_UNIT_ALTIVEC_P (V1TImode)")]) + +;; Vector pack/unpack +(define_mode_iterator VP [V2DI V4SI V8HI]) +(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")]) +(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")]) +(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")]) ;; Vector move instructions. (define_insn "*altivec_mov<mode>" @@ -378,10 +403,10 @@ ;; add (define_insn "add<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (plus:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (plus:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vaddu<VI_char>m %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -398,7 +423,7 @@ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VADDCUW))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" "vaddcuw %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -408,7 +433,7 @@ (match_operand:VI 2 "register_operand" "v")] UNSPEC_VADDU)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + "<VI_unit>" "vaddu<VI_char>s %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -418,16 +443,16 @@ (match_operand:VI 2 "register_operand" "v")] UNSPEC_VADDS)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" "vadds<VI_char>s %0,%1,%2" [(set_attr "type" "vecsimple")]) ;; sub (define_insn "sub<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (minus:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (minus:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vsubu<VI_char>m %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -444,7 +469,7 @@ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VSUBCUW))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" "vsubcuw %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -454,7 +479,7 @@ (match_operand:VI 2 "register_operand" "v")] UNSPEC_VSUBU)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" "vsubu<VI_char>s %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -464,7 +489,7 @@ (match_operand:VI 2 "register_operand" "v")] UNSPEC_VSUBS)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" "vsubs<VI_char>s %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -483,7 +508,7 @@ (unspec:VI [(match_operand:VI 1 "register_operand" "v") (match_operand:VI 2 "register_operand" "v")] UNSPEC_VAVGS))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" "vavgs<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -492,31 +517,31 @@ (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v")] UNSPEC_VCMPBFP))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" "vcmpbfp %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_eq<mode>" - [(set (match_operand:VI 0 "altivec_register_operand" "=v") - (eq:VI (match_operand:VI 1 "altivec_register_operand" "v") - (match_operand:VI 2 "altivec_register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "<VI_unit>" "vcmpequ<VI_char> %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_gt<mode>" - [(set (match_operand:VI 0 "altivec_register_operand" "=v") - (gt:VI (match_operand:VI 1 "altivec_register_operand" "v") - (match_operand:VI 2 "altivec_register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "<VI_unit>" "vcmpgts<VI_char> %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_gtu<mode>" - [(set (match_operand:VI 0 "altivec_register_operand" "=v") - (gtu:VI (match_operand:VI 1 "altivec_register_operand" "v") - (match_operand:VI 2 "altivec_register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "<VI_unit>" "vcmpgtu<VI_char> %0,%1,%2" [(set_attr "type" "veccmp")]) @@ -642,7 +667,7 @@ convert_move (small_swap, swap, 0); low_product = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_widen_umult_odd_v8hi (low_product, one, two)); + emit_insn (gen_altivec_vmulouh (low_product, one, two)); high_product = gen_reg_rtx (V4SImode); emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero)); @@ -666,13 +691,22 @@ rtx high = gen_reg_rtx (V4SImode); rtx low = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_widen_smult_even_v8hi (even, operands[1], operands[2])); - emit_insn (gen_vec_widen_smult_odd_v8hi (odd, operands[1], operands[2])); - - emit_insn (gen_altivec_vmrghw (high, even, odd)); - emit_insn (gen_altivec_vmrglw (low, even, odd)); - - emit_insn (gen_altivec_vpkuwum (operands[0], high, low)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesh (even, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (odd, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (high, even, odd)); + emit_insn (gen_altivec_vmrglw_direct (low, even, odd)); + emit_insn (gen_altivec_vpkuwum_direct (operands[0], high, low)); + } + else + { + emit_insn (gen_altivec_vmulosh (even, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (odd, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (high, odd, even)); + emit_insn (gen_altivec_vmrglw_direct (low, odd, even)); + emit_insn (gen_altivec_vpkuwum_direct (operands[0], low, high)); + } DONE; }") @@ -744,18 +778,18 @@ ;; max (define_insn "umax<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (umax:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (umax:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vmaxu<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "smax<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (smax:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (smax:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vmaxs<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -768,18 +802,18 @@ [(set_attr "type" "veccmp")]) (define_insn "umin<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (umin:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (umin:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vminu<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "smin<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (smin:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (smin:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vmins<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -823,7 +857,39 @@ "vmladduhm %0,%1,%2,%3" [(set_attr "type" "veccomplex")]) -(define_insn "altivec_vmrghb" +(define_expand "altivec_vmrghb" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25), + GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27), + GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29), + GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17), + GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19), + GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21), + GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrghb_internal" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_select:V16QI (vec_concat:V32QI @@ -838,10 +904,52 @@ (const_int 6) (const_int 22) (const_int 7) (const_int 23)])))] "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghb %0,%1,%2"; + else + return "vmrglb %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghb_direct" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMRGH_DIRECT))] + "TARGET_ALTIVEC" "vmrghb %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrghh" +(define_expand "altivec_vmrghh" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13), + GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9), + GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrghh_internal" [(set (match_operand:V8HI 0 "register_operand" "=v") (vec_select:V8HI (vec_concat:V16HI @@ -852,10 +960,50 @@ (const_int 2) (const_int 10) (const_int 3) (const_int 11)])))] "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghh %0,%1,%2"; + else + return "vmrglh %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghh_direct" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMRGH_DIRECT))] + "TARGET_ALTIVEC" "vmrghh %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrghw" +(define_expand "altivec_vmrghw" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrghw_internal" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_select:V4SI (vec_concat:V8SI @@ -864,6 +1012,20 @@ (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghw %0,%1,%2"; + else + return "vmrglw %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghw_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMRGH_DIRECT))] + "TARGET_ALTIVEC" "vmrghw %0,%1,%2" [(set_attr "type" "vecperm")]) @@ -876,10 +1038,47 @@ (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] "VECTOR_MEM_ALTIVEC_P (V4SFmode)" - "vmrghw %0,%1,%2" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghw %0,%1,%2"; + else + return "vmrglw %0,%2,%1"; +} [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrglb" +(define_expand "altivec_vmrglb" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17), + GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19), + GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21), + GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25), + GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27), + GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29), + GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrglb_internal" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_select:V16QI (vec_concat:V32QI @@ -894,10 +1093,52 @@ (const_int 14) (const_int 30) (const_int 15) (const_int 31)])))] "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglb %0,%1,%2"; + else + return "vmrghb %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglb_direct" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMRGL_DIRECT))] + "TARGET_ALTIVEC" "vmrglb %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrglh" +(define_expand "altivec_vmrglh" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9), + GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13), + GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrglh_internal" [(set (match_operand:V8HI 0 "register_operand" "=v") (vec_select:V8HI (vec_concat:V16HI @@ -908,10 +1149,50 @@ (const_int 6) (const_int 14) (const_int 7) (const_int 15)])))] "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglh %0,%1,%2"; + else + return "vmrghh %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglh_direct" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMRGL_DIRECT))] + "TARGET_ALTIVEC" "vmrglh %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrglw" +(define_expand "altivec_vmrglw" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrglw_internal" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_select:V4SI (vec_concat:V8SI @@ -920,6 +1201,20 @@ (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglw %0,%1,%2"; + else + return "vmrghw %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglw_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMRGL_DIRECT))] + "TARGET_ALTIVEC" "vmrglw %0,%1,%2" [(set_attr "type" "vecperm")]) @@ -932,10 +1227,154 @@ (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] "VECTOR_MEM_ALTIVEC_P (V4SFmode)" - "vmrglw %0,%1,%2" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglw %0,%1,%2"; + else + return "vmrghw %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +;; Power8 vector merge even/odd +(define_insn "p8_vmrgew" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] + "TARGET_P8_VECTOR" +{ + if (BYTES_BIG_ENDIAN) + return "vmrgew %0,%1,%2"; + else + return "vmrgow %0,%2,%1"; +} [(set_attr "type" "vecperm")]) -(define_insn "vec_widen_umult_even_v16qi" +(define_insn "p8_vmrgow" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 1) (const_int 5) + (const_int 3) (const_int 7)])))] + "TARGET_P8_VECTOR" +{ + if (BYTES_BIG_ENDIAN) + return "vmrgow %0,%1,%2"; + else + return "vmrgew %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_expand "vec_widen_umult_even_v16qi" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_even_v16qi" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_umult_even_v8hi" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_even_v8hi" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_umult_odd_v16qi" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_odd_v16qi" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_umult_odd_v8hi" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_odd_v8hi" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "altivec_vmuleub" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") (match_operand:V16QI 2 "register_operand" "v")] @@ -944,43 +1383,25 @@ "vmuleub %0,%1,%2" [(set_attr "type" "veccomplex")]) -(define_insn "vec_widen_smult_even_v16qi" +(define_insn "altivec_vmuloub" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") (match_operand:V16QI 2 "register_operand" "v")] - UNSPEC_VMULESB))] - "TARGET_ALTIVEC" - "vmulesb %0,%1,%2" - [(set_attr "type" "veccomplex")]) - -(define_insn "vec_widen_umult_even_v8hi" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VMULEUH))] - "TARGET_ALTIVEC" - "vmuleuh %0,%1,%2" - [(set_attr "type" "veccomplex")]) - -(define_insn "vec_widen_smult_even_v8hi" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VMULESH))] + UNSPEC_VMULOUB))] "TARGET_ALTIVEC" - "vmulesh %0,%1,%2" + "vmuloub %0,%1,%2" [(set_attr "type" "veccomplex")]) -(define_insn "vec_widen_umult_odd_v16qi" +(define_insn "altivec_vmulesb" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") (match_operand:V16QI 2 "register_operand" "v")] - UNSPEC_VMULOUB))] + UNSPEC_VMULESB))] "TARGET_ALTIVEC" - "vmuloub %0,%1,%2" + "vmulesb %0,%1,%2" [(set_attr "type" "veccomplex")]) -(define_insn "vec_widen_smult_odd_v16qi" +(define_insn "altivec_vmulosb" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") (match_operand:V16QI 2 "register_operand" "v")] @@ -989,7 +1410,16 @@ "vmulosb %0,%1,%2" [(set_attr "type" "veccomplex")]) -(define_insn "vec_widen_umult_odd_v8hi" +(define_insn "altivec_vmuleuh" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMULEUH))] + "TARGET_ALTIVEC" + "vmuleuh %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulouh" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") (match_operand:V8HI 2 "register_operand" "v")] @@ -998,158 +1428,121 @@ "vmulouh %0,%1,%2" [(set_attr "type" "veccomplex")]) -(define_insn "vec_widen_smult_odd_v8hi" +(define_insn "altivec_vmulesh" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VMULOSH))] + UNSPEC_VMULESH))] "TARGET_ALTIVEC" - "vmulosh %0,%1,%2" + "vmulesh %0,%1,%2" [(set_attr "type" "veccomplex")]) - -;; logical ops. Have the logical ops follow the memory ops in -;; terms of whether to prefer VSX or Altivec - -(define_insn "*altivec_and<mode>3" - [(set (match_operand:VM 0 "register_operand" "=v") - (and:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" - "vand %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_ior<mode>3" - [(set (match_operand:VM 0 "register_operand" "=v") - (ior:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" - "vor %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_xor<mode>3" - [(set (match_operand:VM 0 "register_operand" "=v") - (xor:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" - "vxor %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_one_cmpl<mode>2" - [(set (match_operand:VM 0 "register_operand" "=v") - (not:VM (match_operand:VM 1 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" - "vnor %0,%1,%1" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_nor<mode>3" - [(set (match_operand:VM 0 "register_operand" "=v") - (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v"))))] - "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" - "vnor %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*altivec_andc<mode>3" - [(set (match_operand:VM 0 "register_operand" "=v") - (and:VM (not:VM (match_operand:VM 2 "register_operand" "v")) - (match_operand:VM 1 "register_operand" "v")))] - "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" - "vandc %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "altivec_vpkuhum" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") +(define_insn "altivec_vmulosh" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKUHUM))] + UNSPEC_VMULOSH))] "TARGET_ALTIVEC" - "vpkuhum %0,%1,%2" - [(set_attr "type" "vecperm")]) + "vmulosh %0,%1,%2" + [(set_attr "type" "veccomplex")]) -(define_insn "altivec_vpkuwum" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKUWUM))] - "TARGET_ALTIVEC" - "vpkuwum %0,%1,%2" - [(set_attr "type" "vecperm")]) +;; Vector pack/unpack (define_insn "altivec_vpkpx" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VPKPX))] "TARGET_ALTIVEC" - "vpkpx %0,%1,%2" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpkpx %0,%1,%2\"; + else + return \"vpkpx %0,%2,%1\"; + }" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkshss" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKSHSS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkshss %0,%1,%2" +(define_insn "altivec_vpks<VI_char>ss" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_SIGN_SIGN_SAT))] + "<VI_unit>" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpks<VI_char>ss %0,%1,%2\"; + else + return \"vpks<VI_char>ss %0,%2,%1\"; + }" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkswss" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKSWSS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkswss %0,%1,%2" +(define_insn "altivec_vpks<VI_char>us" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_SIGN_UNS_SAT))] + "<VI_unit>" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpks<VI_char>us %0,%1,%2\"; + else + return \"vpks<VI_char>us %0,%2,%1\"; + }" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkuhus" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKUHUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkuhus %0,%1,%2" +(define_insn "altivec_vpku<VI_char>us" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_SAT))] + "<VI_unit>" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpku<VI_char>us %0,%1,%2\"; + else + return \"vpku<VI_char>us %0,%2,%1\"; + }" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkshus" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKSHUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkshus %0,%1,%2" +(define_insn "altivec_vpku<VI_char>um" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD))] + "<VI_unit>" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpku<VI_char>um %0,%1,%2\"; + else + return \"vpku<VI_char>um %0,%2,%1\"; + }" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkuwus" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKUWUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkuwus %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vpkswus" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKSWUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkswus %0,%1,%2" +(define_insn "altivec_vpku<VI_char>um_direct" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD_DIRECT))] + "<VI_unit>" + "* + { + if (BYTES_BIG_ENDIAN) + return \"vpku<VI_char>um %0,%1,%2\"; + else + return \"vpku<VI_char>um %0,%2,%1\"; + }" [(set_attr "type" "vecperm")]) (define_insn "*altivec_vrl<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (rotate:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (rotate:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vrl<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1172,26 +1565,26 @@ [(set_attr "type" "vecperm")]) (define_insn "*altivec_vsl<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (ashift:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ashift:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vsl<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "*altivec_vsr<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (lshiftrt:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vsr<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "*altivec_vsra<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (ashiftrt:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vsra<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1233,64 +1626,242 @@ "vsum4s<VI_char>s %0,%1,%2" [(set_attr "type" "veccomplex")]) +;; FIXME: For the following two patterns, the scratch should only be +;; allocated for !VECTOR_ELT_ORDER_BIG, and the instructions should +;; be emitted separately. (define_insn "altivec_vsum2sws" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VSUM2SWS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR)) + (clobber (match_scratch:V4SI 3 "=v"))] "TARGET_ALTIVEC" - "vsum2sws %0,%1,%2" - [(set_attr "type" "veccomplex")]) +{ + if (VECTOR_ELT_ORDER_BIG) + return "vsum2sws %0,%1,%2"; + else + return "vsldoi %3,%2,%2,12\n\tvsum2sws %3,%1,%3\n\tvsldoi %0,%3,%3,4"; +} + [(set_attr "type" "veccomplex") + (set (attr "length") + (if_then_else + (match_test "VECTOR_ELT_ORDER_BIG") + (const_string "4") + (const_string "12")))]) (define_insn "altivec_vsumsws" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VSUMSWS)) + (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR)) + (clobber (match_scratch:V4SI 3 "=v"))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vsumsws %0,%1,%2"; + else + return "vspltw %3,%2,0\n\tvsumsws %3,%1,%3\n\tvsldoi %0,%3,%3,12"; +} + [(set_attr "type" "veccomplex") + (set (attr "length") + (if_then_else + (match_test "(VECTOR_ELT_ORDER_BIG)") + (const_string "4") + (const_string "12")))]) + +(define_insn "altivec_vsumsws_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSUMSWS_DIRECT)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" "vsumsws %0,%1,%2" [(set_attr "type" "veccomplex")]) -(define_insn "altivec_vspltb" +(define_expand "altivec_vspltb" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:QI 2 "u5bit_cint_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. We have to reflect + the actual selected index for the splat in the RTL. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (15 - INTVAL (operands[2])); + + v = gen_rtvec (1, operands[2]); + x = gen_rtx_VEC_SELECT (QImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v)); + x = gen_rtx_VEC_DUPLICATE (V16QImode, x); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vspltb_internal" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_duplicate:V16QI (vec_select:QI (match_operand:V16QI 1 "register_operand" "v") (parallel [(match_operand:QI 2 "u5bit_cint_operand" "")]))))] "TARGET_ALTIVEC" +{ + /* For true LE, this adjusts the selected index. For LE with + -maltivec=be, this reverses what was done in the define_expand + because the instruction already has big-endian bias. */ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (15 - INTVAL (operands[2])); + + return "vspltb %0,%1,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vspltb_direct" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:QI 2 "u5bit_cint_operand" "i")] + UNSPEC_VSPLT_DIRECT))] + "TARGET_ALTIVEC" "vspltb %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vsplth" +(define_expand "altivec_vsplth" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:QI 2 "u5bit_cint_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. We have to reflect + the actual selected index for the splat in the RTL. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (7 - INTVAL (operands[2])); + + v = gen_rtvec (1, operands[2]); + x = gen_rtx_VEC_SELECT (HImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v)); + x = gen_rtx_VEC_DUPLICATE (V8HImode, x); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vsplth_internal" [(set (match_operand:V8HI 0 "register_operand" "=v") (vec_duplicate:V8HI (vec_select:HI (match_operand:V8HI 1 "register_operand" "v") (parallel [(match_operand:QI 2 "u5bit_cint_operand" "")]))))] "TARGET_ALTIVEC" +{ + /* For true LE, this adjusts the selected index. For LE with + -maltivec=be, this reverses what was done in the define_expand + because the instruction already has big-endian bias. */ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (7 - INTVAL (operands[2])); + + return "vsplth %0,%1,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vsplth_direct" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:QI 2 "u5bit_cint_operand" "i")] + UNSPEC_VSPLT_DIRECT))] + "TARGET_ALTIVEC" "vsplth %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vspltw" +(define_expand "altivec_vspltw" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:QI 2 "u5bit_cint_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. We have to reflect + the actual selected index for the splat in the RTL. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + v = gen_rtvec (1, operands[2]); + x = gen_rtx_VEC_SELECT (SImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v)); + x = gen_rtx_VEC_DUPLICATE (V4SImode, x); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vspltw_internal" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_duplicate:V4SI (vec_select:SI (match_operand:V4SI 1 "register_operand" "v") (parallel [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))] "TARGET_ALTIVEC" +{ + /* For true LE, this adjusts the selected index. For LE with + -maltivec=be, this reverses what was done in the define_expand + because the instruction already has big-endian bias. */ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + return "vspltw %0,%1,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vspltw_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:QI 2 "u5bit_cint_operand" "i")] + UNSPEC_VSPLT_DIRECT))] + "TARGET_ALTIVEC" "vspltw %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vspltsf" +(define_expand "altivec_vspltsf" + [(use (match_operand:V4SF 0 "register_operand" "")) + (use (match_operand:V4SF 1 "register_operand" "")) + (use (match_operand:QI 2 "u5bit_cint_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. We have to reflect + the actual selected index for the splat in the RTL. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + v = gen_rtvec (1, operands[2]); + x = gen_rtx_VEC_SELECT (SFmode, operands[1], gen_rtx_PARALLEL (VOIDmode, v)); + x = gen_rtx_VEC_DUPLICATE (V4SFmode, x); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vspltsf_internal" [(set (match_operand:V4SF 0 "register_operand" "=v") (vec_duplicate:V4SF (vec_select:SF (match_operand:V4SF 1 "register_operand" "v") (parallel [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))] "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" - "vspltw %0,%1,%2" +{ + /* For true LE, this adjusts the selected index. For LE with + -maltivec=be, this reverses what was done in the define_expand + because the instruction already has big-endian bias. */ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + return "vspltw %0,%1,%2"; +} [(set_attr "type" "vecperm")]) (define_insn "altivec_vspltis<VI_char>" @@ -1308,7 +1879,22 @@ "vrfiz %0,%1" [(set_attr "type" "vecfloat")]) -(define_insn "altivec_vperm_<mode>" +(define_expand "altivec_vperm_<mode>" + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v")] + UNSPEC_VPERM))] + "TARGET_ALTIVEC" +{ + if (!VECTOR_ELT_ORDER_BIG) + { + altivec_expand_vec_perm_le (operands); + DONE; + } +}) + +(define_insn "*altivec_vperm_<mode>_internal" [(set (match_operand:VM 0 "register_operand" "=v") (unspec:VM [(match_operand:VM 1 "register_operand" "v") (match_operand:VM 2 "register_operand" "v") @@ -1318,7 +1904,22 @@ "vperm %0,%1,%2,%3" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vperm_<mode>_uns" +(define_expand "altivec_vperm_<mode>_uns" + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v")] + UNSPEC_VPERM_UNS))] + "TARGET_ALTIVEC" +{ + if (!VECTOR_ELT_ORDER_BIG) + { + altivec_expand_vec_perm_le (operands); + DONE; + } +}) + +(define_insn "*altivec_vperm_<mode>_uns_internal" [(set (match_operand:VM 0 "register_operand" "=v") (unspec:VM [(match_operand:VM 1 "register_operand" "v") (match_operand:VM 2 "register_operand" "v") @@ -1335,7 +1936,12 @@ (match_operand:V16QI 3 "register_operand" "")] UNSPEC_VPERM))] "TARGET_ALTIVEC" - "") +{ + if (!BYTES_BIG_ENDIAN) { + altivec_expand_vec_perm_le (operands); + DONE; + } +}) (define_expand "vec_perm_constv16qi" [(match_operand:V16QI 0 "register_operand" "") @@ -1476,89 +2082,109 @@ "vsldoi %0,%1,%2,%3" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupkhsb" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKHSB))] - "TARGET_ALTIVEC" - "vupkhsb %0,%1" +(define_insn "altivec_vupkhs<VU_char>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN))] + "<VI_unit>" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vupkhs<VU_char> %0,%1"; + else + return "vupkls<VU_char> %0,%1"; +} [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupkhpx" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKHPX))] - "TARGET_ALTIVEC" - "vupkhpx %0,%1" +(define_insn "*altivec_vupkhs<VU_char>_direct" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN_DIRECT))] + "<VI_unit>" + "vupkhs<VU_char> %0,%1" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupkhsh" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKHSH))] - "TARGET_ALTIVEC" - "vupkhsh %0,%1" +(define_insn "altivec_vupkls<VU_char>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN))] + "<VI_unit>" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vupkls<VU_char> %0,%1"; + else + return "vupkhs<VU_char> %0,%1"; +} [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupklsb" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKLSB))] - "TARGET_ALTIVEC" - "vupklsb %0,%1" +(define_insn "*altivec_vupkls<VU_char>_direct" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN_DIRECT))] + "<VI_unit>" + "vupkls<VU_char> %0,%1" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupklpx" +(define_insn "altivec_vupkhpx" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKLPX))] + UNSPEC_VUPKHPX))] "TARGET_ALTIVEC" - "vupklpx %0,%1" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vupkhpx %0,%1"; + else + return "vupklpx %0,%1"; +} [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupklsh" +(define_insn "altivec_vupklpx" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKLSH))] + UNSPEC_VUPKLPX))] "TARGET_ALTIVEC" - "vupklsh %0,%1" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vupklpx %0,%1"; + else + return "vupkhpx %0,%1"; +} [(set_attr "type" "vecperm")]) ;; Compare vectors producing a vector result and a predicate, setting CR6 to ;; indicate a combined status (define_insn "*altivec_vcmpequ<VI_char>_p" [(set (reg:CC 74) - (unspec:CC [(eq:CC (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))] + (unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] UNSPEC_PREDICATE)) - (set (match_operand:VI 0 "register_operand" "=v") - (eq:VI (match_dup 1) + (set (match_operand:VI2 0 "register_operand" "=v") + (eq:VI2 (match_dup 1) (match_dup 2)))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "<VI_unit>" "vcmpequ<VI_char>. %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_vcmpgts<VI_char>_p" [(set (reg:CC 74) - (unspec:CC [(gt:CC (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))] + (unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] UNSPEC_PREDICATE)) - (set (match_operand:VI 0 "register_operand" "=v") - (gt:VI (match_dup 1) + (set (match_operand:VI2 0 "register_operand" "=v") + (gt:VI2 (match_dup 1) (match_dup 2)))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "<VI_unit>" "vcmpgts<VI_char>. %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_vcmpgtu<VI_char>_p" [(set (reg:CC 74) - (unspec:CC [(gtu:CC (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))] + (unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] UNSPEC_PREDICATE)) - (set (match_operand:VI 0 "register_operand" "=v") - (gtu:VI (match_dup 1) + (set (match_operand:VI2 0 "register_operand" "=v") + (gtu:VI2 (match_dup 1) (match_dup 2)))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "<VI_unit>" "vcmpgtu<VI_char>. %0,%1,%2" [(set_attr "type" "veccmp")]) @@ -1710,7 +2336,21 @@ ;; Parallel some of the LVE* and STV*'s with unspecs because some have ;; identical rtl but different instructions-- and gcc gets confused. -(define_insn "altivec_lve<VI_char>x" +(define_expand "altivec_lve<VI_char>x" + [(parallel + [(set (match_operand:VI 0 "register_operand" "=v") + (match_operand:VI 1 "memory_operand" "Z")) + (unspec [(const_int 0)] UNSPEC_LVE)])] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_LVE); + DONE; + } +}) + +(define_insn "*altivec_lve<VI_char>x_internal" [(parallel [(set (match_operand:VI 0 "register_operand" "=v") (match_operand:VI 1 "memory_operand" "Z")) @@ -1728,16 +2368,44 @@ "lvewx %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "altivec_lvxl" +(define_expand "altivec_lvxl_<mode>" [(parallel - [(set (match_operand:V4SI 0 "register_operand" "=v") - (match_operand:V4SI 1 "memory_operand" "Z")) + [(set (match_operand:VM2 0 "register_operand" "=v") + (match_operand:VM2 1 "memory_operand" "Z")) (unspec [(const_int 0)] UNSPEC_SET_VSCR)])] "TARGET_ALTIVEC" - "lvxl %0,%y1" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_SET_VSCR); + DONE; + } +}) + +(define_insn "*altivec_lvxl_<mode>_internal" + [(parallel + [(set (match_operand:VM2 0 "register_operand" "=v") + (match_operand:VM2 1 "memory_operand" "Z")) + (unspec [(const_int 0)] UNSPEC_SET_VSCR)])] + "TARGET_ALTIVEC" + "lvx %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "altivec_lvx_<mode>" +(define_expand "altivec_lvx_<mode>" + [(parallel + [(set (match_operand:VM2 0 "register_operand" "=v") + (match_operand:VM2 1 "memory_operand" "Z")) + (unspec [(const_int 0)] UNSPEC_LVX)])] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_LVX); + DONE; + } +}) + +(define_insn "*altivec_lvx_<mode>_internal" [(parallel [(set (match_operand:VM2 0 "register_operand" "=v") (match_operand:VM2 1 "memory_operand" "Z")) @@ -1746,7 +2414,21 @@ "lvx %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "altivec_stvx_<mode>" +(define_expand "altivec_stvx_<mode>" + [(parallel + [(set (match_operand:VM2 0 "memory_operand" "=Z") + (match_operand:VM2 1 "register_operand" "v")) + (unspec [(const_int 0)] UNSPEC_STVX)])] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_stvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVX); + DONE; + } +}) + +(define_insn "*altivec_stvx_<mode>_internal" [(parallel [(set (match_operand:VM2 0 "memory_operand" "=Z") (match_operand:VM2 1 "register_operand" "v")) @@ -1755,16 +2437,42 @@ "stvx %1,%y0" [(set_attr "type" "vecstore")]) -(define_insn "altivec_stvxl" +(define_expand "altivec_stvxl_<mode>" [(parallel - [(set (match_operand:V4SI 0 "memory_operand" "=Z") - (match_operand:V4SI 1 "register_operand" "v")) + [(set (match_operand:VM2 0 "memory_operand" "=Z") + (match_operand:VM2 1 "register_operand" "v")) + (unspec [(const_int 0)] UNSPEC_STVXL)])] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_stvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVXL); + DONE; + } +}) + +(define_insn "*altivec_stvxl_<mode>_internal" + [(parallel + [(set (match_operand:VM2 0 "memory_operand" "=Z") + (match_operand:VM2 1 "register_operand" "v")) (unspec [(const_int 0)] UNSPEC_STVXL)])] "TARGET_ALTIVEC" "stvxl %1,%y0" [(set_attr "type" "vecstore")]) -(define_insn "altivec_stve<VI_char>x" +(define_expand "altivec_stve<VI_char>x" + [(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z") + (unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_stvex_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVE); + DONE; + } +}) + +(define_insn "*altivec_stve<VI_char>x_internal" [(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z") (unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))] "TARGET_ALTIVEC" @@ -1779,20 +2487,28 @@ [(set_attr "type" "vecstore")]) ;; Generate -;; vspltis? SCRATCH0,0 +;; xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0 ;; vsubu?m SCRATCH2,SCRATCH1,%1 ;; vmaxs? %0,%1,SCRATCH2" (define_expand "abs<mode>2" - [(set (match_dup 2) (vec_duplicate:VI (const_int 0))) - (set (match_dup 3) - (minus:VI (match_dup 2) - (match_operand:VI 1 "register_operand" "v"))) - (set (match_operand:VI 0 "register_operand" "=v") - (smax:VI (match_dup 1) (match_dup 3)))] - "TARGET_ALTIVEC" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) + (minus:VI2 (match_dup 2) + (match_operand:VI2 1 "register_operand" "v"))) + (set (match_operand:VI2 0 "register_operand" "=v") + (smax:VI2 (match_dup 1) (match_dup 4)))] + "<VI_unit>" { - operands[2] = gen_reg_rtx (GET_MODE (operands[0])); - operands[3] = gen_reg_rtx (GET_MODE (operands[0])); + int i, n_elt = GET_MODE_NUNITS (<MODE>mode); + rtvec v = rtvec_alloc (n_elt); + + /* Create an all 0 constant. */ + for (i = 0; i < n_elt; ++i) + RTVEC_ELT (v, i) = const0_rtx; + + operands[2] = gen_reg_rtx (<MODE>mode); + operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v); + operands[4] = gen_reg_rtx (<MODE>mode); }) ;; Generate @@ -1844,7 +2560,7 @@ emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero)); - emit_insn (gen_altivec_vsumsws (dest, vtmp1, vzero)); + emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero)); DONE; }) @@ -1860,7 +2576,7 @@ emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); emit_insn (gen_altivec_vsum4ubs (vtmp1, operands[1], vzero)); - emit_insn (gen_altivec_vsumsws (dest, vtmp1, vzero)); + emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero)); DONE; }) @@ -1950,49 +2666,19 @@ DONE; }") -(define_expand "vec_unpacks_hi_v16qi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKHSB))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupkhsb (operands[0], operands[1])); - DONE; -}") - -(define_expand "vec_unpacks_hi_v8hi" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKHSH))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupkhsh (operands[0], operands[1])); - DONE; -}") - -(define_expand "vec_unpacks_lo_v16qi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKLSB))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupklsb (operands[0], operands[1])); - DONE; -}") +(define_expand "vec_unpacks_hi_<VP_small_lc>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN_DIRECT))] + "<VI_unit>" + "") -(define_expand "vec_unpacks_lo_v8hi" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKLSH))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupklsh (operands[0], operands[1])); - DONE; -}") +(define_expand "vec_unpacks_lo_<VP_small_lc>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN_DIRECT))] + "<VI_unit>" + "") (define_insn "vperm_v8hiv4si" [(set (match_operand:V4SI 0 "register_operand" "=v") @@ -2025,25 +2711,26 @@ rtx vzero = gen_reg_rtx (V8HImode); rtx mask = gen_reg_rtx (V16QImode); rtvec v = rtvec_alloc (16); + bool be = BYTES_BIG_ENDIAN; emit_insn (gen_altivec_vspltish (vzero, const0_rtx)); - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 0); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 2); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 4); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 6); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7); + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 7); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 0 : 16); + RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 16 : 6); + RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 1 : 16); + RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 5); + RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 2 : 16); + RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 16 : 4); + RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 3 : 16); + RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 3); + RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 4 : 16); + RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 2); + RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 5 : 16); + RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 1); + RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 6 : 16); + RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 0); + RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16); emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask)); @@ -2060,25 +2747,26 @@ rtx vzero = gen_reg_rtx (V4SImode); rtx mask = gen_reg_rtx (V16QImode); rtvec v = rtvec_alloc (16); + bool be = BYTES_BIG_ENDIAN; emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 0); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 2); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 4); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 6); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7); + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 7); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 17 : 6); + RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 0 : 17); + RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 1 : 16); + RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 5); + RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 17 : 4); + RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 2 : 17); + RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 3 : 16); + RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 3); + RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 17 : 2); + RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 4 : 17); + RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 5 : 16); + RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 1); + RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 : 0); + RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 6 : 17); + RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16); emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask)); @@ -2095,25 +2783,26 @@ rtx vzero = gen_reg_rtx (V8HImode); rtx mask = gen_reg_rtx (V16QImode); rtvec v = rtvec_alloc (16); + bool be = BYTES_BIG_ENDIAN; emit_insn (gen_altivec_vspltish (vzero, const0_rtx)); - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 8); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 10); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 12); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 14); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15); + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 8 : 16); + RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 16 : 14); + RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 9 : 16); + RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13); + RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 10 : 16); + RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 16 : 12); + RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16); + RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11); + RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 12 : 16); + RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 10); + RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16); + RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 9); + RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 14 : 16); + RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 8); + RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16); emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask)); @@ -2130,25 +2819,26 @@ rtx vzero = gen_reg_rtx (V4SImode); rtx mask = gen_reg_rtx (V16QImode); rtvec v = rtvec_alloc (16); + bool be = BYTES_BIG_ENDIAN; emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 8); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 12); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 14); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15); + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 17 : 14); + RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 8 : 17); + RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 9 : 16); + RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13); + RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 17 : 12); + RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 10 : 17); + RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16); + RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11); + RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 17 : 10); + RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 12 : 17); + RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16); + RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 9); + RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 : 8); + RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17); + RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16); emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask)); @@ -2166,9 +2856,18 @@ rtx ve = gen_reg_rtx (V8HImode); rtx vo = gen_reg_rtx (V8HImode); - emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); + } DONE; }") @@ -2183,9 +2882,18 @@ rtx ve = gen_reg_rtx (V8HImode); rtx vo = gen_reg_rtx (V8HImode); - emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); + } DONE; }") @@ -2200,9 +2908,18 @@ rtx ve = gen_reg_rtx (V8HImode); rtx vo = gen_reg_rtx (V8HImode); - emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); + } DONE; }") @@ -2217,9 +2934,18 @@ rtx ve = gen_reg_rtx (V8HImode); rtx vo = gen_reg_rtx (V8HImode); - emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); + } DONE; }") @@ -2234,9 +2960,18 @@ rtx ve = gen_reg_rtx (V4SImode); rtx vo = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (operands[0], vo, ve)); + } DONE; }") @@ -2251,9 +2986,18 @@ rtx ve = gen_reg_rtx (V4SImode); rtx vo = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw_direct (operands[0], vo, ve)); + } DONE; }") @@ -2268,9 +3012,18 @@ rtx ve = gen_reg_rtx (V4SImode); rtx vo = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (operands[0], vo, ve)); + } DONE; }") @@ -2285,35 +3038,28 @@ rtx ve = gen_reg_rtx (V4SImode); rtx vo = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); - DONE; -}") - -(define_expand "vec_pack_trunc_v8hi" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKUHUM))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vpkuhum (operands[0], operands[1], operands[2])); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw_direct (operands[0], vo, ve)); + } DONE; }") -(define_expand "vec_pack_trunc_v4si" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKUWUM))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vpkuwum (operands[0], operands[1], operands[2])); - DONE; -}") +(define_expand "vec_pack_trunc_<mode>" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD))] + "<VI_unit>" + "") (define_expand "altivec_negv4sf2" [(use (match_operand:V4SF 0 "register_operand" "")) @@ -2460,3 +3206,243 @@ emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx)); DONE; }") + + +;; Power8 vector instructions encoded as Altivec instructions + +;; Vector count leading zeros +(define_insn "*p8v_clz<mode>2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (clz:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vclz<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector population count +(define_insn "*p8v_popcount<mode>2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vpopcnt<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector Gather Bits by Bytes by Doubleword +(define_insn "p8v_vgbbd" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] + UNSPEC_VGBBD))] + "TARGET_P8_VECTOR" + "vgbbd %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + + +;; 128-bit binary integer arithmetic +;; We have a special container type (V1TImode) to allow operations using the +;; ISA 2.07 128-bit binary support to target the VMX/altivec registers without +;; having to worry about the register allocator deciding GPRs are better. + +(define_insn "altivec_vadduqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (plus:V1TI (match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")))] + "TARGET_VADDUQM" + "vadduqm %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddcuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")] + UNSPEC_VADDCUQ))] + "TARGET_VADDUQM" + "vaddcuq %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubuqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (minus:V1TI (match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")))] + "TARGET_VADDUQM" + "vsubuqm %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubcuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")] + UNSPEC_VSUBCUQ))] + "TARGET_VADDUQM" + "vsubcuq %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddeuqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VADDEUQM))] + "TARGET_VADDUQM" + "vaddeuqm %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddecuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VADDECUQ))] + "TARGET_VADDUQM" + "vaddecuq %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubeuqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VSUBEUQM))] + "TARGET_VADDUQM" + "vsubeuqm %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubecuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VSUBECUQ))] + "TARGET_VADDUQM" + "vsubecuq %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; We use V2DI as the output type to simplify converting the permute +;; bits into an integer +(define_insn "altivec_vbpermq" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VBPERMQ))] + "TARGET_P8_VECTOR" + "vbpermq %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Decimal Integer operations +(define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB]) + +(define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add") + (UNSPEC_BCDSUB "sub")]) + +(define_code_iterator BCD_TEST [eq lt gt unordered]) + +(define_insn "bcd<bcd_add_sub>" + [(set (match_operand:V1TI 0 "register_operand" "") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "") + (match_operand:V1TI 2 "register_operand" "") + (match_operand:QI 3 "const_0_to_1_operand" "")] + UNSPEC_BCD_ADD_SUB)) + (clobber (reg:CCFP 74))] + "TARGET_P8_VECTOR" + "bcd<bcd_add_sub>. %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Use a floating point type (V2DFmode) for the compare to set CR6 so that we +;; can use the unordered test for BCD nans and add/subtracts that overflow. An +;; UNORDERED test on an integer type (like V1TImode) is not defined. The type +;; probably should be one that can go in the VMX (Altivec) registers, so we +;; can't use DDmode or DFmode. +(define_insn "*bcd<bcd_add_sub>_test" + [(set (reg:CCFP 74) + (compare:CCFP + (unspec:V2DF [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:QI 3 "const_0_to_1_operand" "i")] + UNSPEC_BCD_ADD_SUB) + (match_operand:V2DF 4 "zero_constant" "j"))) + (clobber (match_scratch:V1TI 0 "=v"))] + "TARGET_P8_VECTOR" + "bcd<bcd_add_sub>. %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "*bcd<bcd_add_sub>_test2" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:QI 3 "const_0_to_1_operand" "i")] + UNSPEC_BCD_ADD_SUB)) + (set (reg:CCFP 74) + (compare:CCFP + (unspec:V2DF [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_BCD_ADD_SUB) + (match_operand:V2DF 4 "zero_constant" "j")))] + "TARGET_P8_VECTOR" + "bcd<bcd_add_sub>. %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_expand "bcd<bcd_add_sub>_<code>" + [(parallel [(set (reg:CCFP 74) + (compare:CCFP + (unspec:V2DF [(match_operand:V1TI 1 "register_operand" "") + (match_operand:V1TI 2 "register_operand" "") + (match_operand:QI 3 "const_0_to_1_operand" "")] + UNSPEC_BCD_ADD_SUB) + (match_dup 4))) + (clobber (match_scratch:V1TI 5 ""))]) + (set (match_operand:SI 0 "register_operand" "") + (BCD_TEST:SI (reg:CCFP 74) + (const_int 0)))] + "TARGET_P8_VECTOR" +{ + operands[4] = CONST0_RTX (V2DFmode); +}) + +;; Peephole2 pattern to combine a bcdadd/bcdsub that calculates the value and +;; the bcdadd/bcdsub that tests the value. The combiner won't work since +;; CR6 is a hard coded register. Unfortunately, all of the Altivec predicate +;; support is hard coded to use the fixed register CR6 instead of creating +;; a register class for CR6. + +(define_peephole2 + [(parallel [(set (match_operand:V1TI 0 "register_operand" "") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "") + (match_operand:V1TI 2 "register_operand" "") + (match_operand:QI 3 "const_0_to_1_operand" "")] + UNSPEC_BCD_ADD_SUB)) + (clobber (reg:CCFP 74))]) + (parallel [(set (reg:CCFP 74) + (compare:CCFP + (unspec:V2DF [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_BCD_ADD_SUB) + (match_operand:V2DF 4 "zero_constant" ""))) + (clobber (match_operand:V1TI 5 "register_operand" ""))])] + "TARGET_P8_VECTOR" + [(parallel [(set (match_dup 0) + (unspec:V1TI [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_BCD_ADD_SUB)) + (set (reg:CCFP 74) + (compare:CCFP + (unspec:V2DF [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_BCD_ADD_SUB) + (match_dup 4)))])]) |