From 38a8aecfb882072900434499696b5c32a2274515 Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Mon, 21 Jul 2014 16:47:22 -0700 Subject: [4.9] Switch gcc-4.9 to use google/gcc-4_9 branch. This source drop uses svn version r212828 of google/gcc-4.9 branch. We also cherry-picked r213062, r213063 and r213064 to fix windows build issues. All gcc-4.9 patches before July 3rd are ported to google/gcc-4.9. The following prior commits has not been merged to google branch yet. (They are included in this commit). e7af147f979e657fe2df00808e5b4319b0e088c6, baf87df3cb2683649ba7e9872362a7e721117c23, and c231900e5dcc14d8296bd9f62b45997a49d4d5e7. Change-Id: I4bea3ea470387ff751c2be4cb0d4a12059b9299b --- gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h | 59 +++ gcc-4.9/gcc/config/aarch64/aarch64-linux.h | 35 +- gcc-4.9/gcc/config/aarch64/aarch64-modes.def | 1 + gcc-4.9/gcc/config/aarch64/aarch64-simd.md | 461 ++++++++++++++++++--- gcc-4.9/gcc/config/aarch64/aarch64.c | 20 +- gcc-4.9/gcc/config/aarch64/aarch64.md | 28 +- gcc-4.9/gcc/config/aarch64/arm_neon.h | 50 +-- gcc-4.9/gcc/config/aarch64/iterators.md | 16 +- 8 files changed, 522 insertions(+), 148 deletions(-) create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h (limited to 'gcc-4.9/gcc/config/aarch64') diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h new file mode 100644 index 000000000..91d235ff1 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h @@ -0,0 +1,59 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_AARCH64_LINUX_ANDROID_H +#define GCC_AARCH64_LINUX_ANDROID_H + + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + ANDROID_TARGET_OS_CPP_BUILTINS(); \ + } \ + while (0) + +#undef LINK_SPEC +#define LINK_SPEC \ + LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \ + LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) + +#undef CC1_SPEC +#define CC1_SPEC \ + LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \ + GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC("-fpic")) + +#define CC1PLUS_SPEC \ + LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC) + +#undef LIB_SPEC +#define LIB_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC) + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC) + +#endif /* GCC_AARCH64_LINUX_ANDROID_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h index f32d19f16..f8a97c899 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h +++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h @@ -21,8 +21,10 @@ #ifndef GCC_AARCH64_LINUX_H #define GCC_AARCH64_LINUX_H -#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1" -#define BIONIC_DYNAMIC_LINKER "/system/bin/linker64" +#ifndef RUNTIME_ROOT_PREFIX +#define RUNTIME_ROOT_PREFIX "" +#endif +#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1" #define CPP_SPEC "%{pthread:-D_REENTRANT}" @@ -36,38 +38,13 @@ %{mbig-endian:-EB} %{mlittle-endian:-EL} \ -maarch64linux%{mbig-endian:b}" +#define LINK_SPEC LINUX_TARGET_LINK_SPEC + #define TARGET_OS_CPP_BUILTINS() \ do \ { \ GNU_USER_TARGET_OS_CPP_BUILTINS(); \ - ANDROID_TARGET_OS_CPP_BUILTINS(); \ } \ while (0) -#undef LINK_SPEC -#define LINK_SPEC \ - LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \ - LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) - -#undef CC1_SPEC -#define CC1_SPEC \ - LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \ - GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC("-fpic")) - -#define CC1PLUS_SPEC \ - LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC) - -#undef LIB_SPEC -#define LIB_SPEC \ - LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ - GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) - -#undef STARTFILE_SPEC -#define STARTFILE_SPEC \ - LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC) - -#undef ENDFILE_SPEC -#define ENDFILE_SPEC \ - LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC) - #endif /* GCC_AARCH64_LINUX_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def index 1d2cc7679..f9c436948 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def +++ b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def @@ -31,6 +31,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */ VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */ VECTOR_MODES (FLOAT, 8); /* V2SF. */ VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */ +VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */ /* Oct Int: 256-bit integer mode needed for 32-byte vector arguments. */ INT_MODE (OI, 32); diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md index 73aee2c3d..1f827b57d 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md @@ -934,14 +934,22 @@ [(set_attr "type" "neon_minmax")] ) -;; Move into low-half clearing high half to 0. +;; vec_concat gives a new vector with the low elements from operand 1, and +;; the high elements from operand 2. That is to say, given op1 = { a, b } +;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. +;; What that means, is that the RTL descriptions of the below patterns +;; need to change depending on endianness. -(define_insn "move_lo_quad_" +;; Move to the low architectural bits of the register. +;; On little-endian this is { operand, zeroes } +;; On big-endian this is { zeroes, operand } + +(define_insn "move_lo_quad_internal_" [(set (match_operand:VQ 0 "register_operand" "=w,w,w") (vec_concat:VQ (match_operand: 1 "register_operand" "w,r,r") (vec_duplicate: (const_int 0))))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "@ dup\\t%d0, %1.d[0] fmov\\t%d0, %1 @@ -952,7 +960,39 @@ (set_attr "length" "4")] ) -;; Move into high-half. +(define_insn "move_lo_quad_internal_be_" + [(set (match_operand:VQ 0 "register_operand" "=w,w,w") + (vec_concat:VQ + (vec_duplicate: (const_int 0)) + (match_operand: 1 "register_operand" "w,r,r")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "@ + dup\\t%d0, %1.d[0] + fmov\\t%d0, %1 + dup\\t%d0, %1" + [(set_attr "type" "neon_dup,f_mcr,neon_dup") + (set_attr "simd" "yes,*,yes") + (set_attr "fp" "*,yes,*") + (set_attr "length" "4")] +) + +(define_expand "move_lo_quad_" + [(match_operand:VQ 0 "register_operand") + (match_operand:VQ 1 "register_operand")] + "TARGET_SIMD" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_move_lo_quad_internal_be_ (operands[0], operands[1])); + else + emit_insn (gen_move_lo_quad_internal_ (operands[0], operands[1])); + DONE; +} +) + +;; Move operand1 to the high architectural bits of the register, keeping +;; the low architectural bits of operand2. +;; For little-endian this is { operand2, operand1 } +;; For big-endian this is { operand1, operand2 } (define_insn "aarch64_simd_move_hi_quad_" [(set (match_operand:VQ 0 "register_operand" "+w,w") @@ -961,12 +1001,25 @@ (match_dup 0) (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) (match_operand: 1 "register_operand" "w,r")))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "@ ins\\t%0.d[1], %1.d[0] ins\\t%0.d[1], %1" - [(set_attr "type" "neon_ins") - (set_attr "length" "4")] + [(set_attr "type" "neon_ins")] +) + +(define_insn "aarch64_simd_move_hi_quad_be_" + [(set (match_operand:VQ 0 "register_operand" "+w,w") + (vec_concat:VQ + (match_operand: 1 "register_operand" "w,r") + (vec_select: + (match_dup 0) + (match_operand:VQ 2 "vect_par_cnst_hi_half" ""))))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "@ + ins\\t%0.d[1], %1.d[0] + ins\\t%0.d[1], %1" + [(set_attr "type" "neon_ins")] ) (define_expand "move_hi_quad_" @@ -974,9 +1027,13 @@ (match_operand: 1 "register_operand" "")] "TARGET_SIMD" { - rtx p = aarch64_simd_vect_par_cnst_half (mode, false); - emit_insn (gen_aarch64_simd_move_hi_quad_ (operands[0], - operands[1], p)); + rtx p = aarch64_simd_vect_par_cnst_half (mode, BYTES_BIG_ENDIAN); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_simd_move_hi_quad_be_ (operands[0], + operands[1], p)); + else + emit_insn (gen_aarch64_simd_move_hi_quad_ (operands[0], + operands[1], p)); DONE; }) @@ -2321,12 +2378,44 @@ (vec_concat: (match_operand:VDIC 1 "register_operand" "w") (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "mov\\t%0.8b, %1.8b" [(set_attr "type" "neon_move")] ) -(define_insn_and_split "aarch64_combine" +(define_insn "*aarch64_combinez_be" + [(set (match_operand: 0 "register_operand" "=&w") + (vec_concat: + (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz") + (match_operand:VDIC 1 "register_operand" "w")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "mov\\t%0.8b, %1.8b" + [(set_attr "type" "neon_move")] +) + +(define_expand "aarch64_combine" + [(match_operand: 0 "register_operand") + (match_operand:VDC 1 "register_operand") + (match_operand:VDC 2 "register_operand")] + "TARGET_SIMD" +{ + rtx op1, op2; + if (BYTES_BIG_ENDIAN) + { + op1 = operands[2]; + op2 = operands[1]; + } + else + { + op1 = operands[1]; + op2 = operands[2]; + } + emit_insn (gen_aarch64_combine_internal (operands[0], op1, op2)); + DONE; +} +) + +(define_insn_and_split "aarch64_combine_internal" [(set (match_operand: 0 "register_operand" "=&w") (vec_concat: (match_operand:VDC 1 "register_operand" "w") (match_operand:VDC 2 "register_operand" "w")))] @@ -2335,16 +2424,19 @@ "&& reload_completed" [(const_int 0)] { - aarch64_split_simd_combine (operands[0], operands[1], operands[2]); + if (BYTES_BIG_ENDIAN) + aarch64_split_simd_combine (operands[0], operands[2], operands[1]); + else + aarch64_split_simd_combine (operands[0], operands[1], operands[2]); DONE; } [(set_attr "type" "multiple")] ) (define_expand "aarch64_simd_combine" - [(set (match_operand: 0 "register_operand" "=&w") - (vec_concat: (match_operand:VDC 1 "register_operand" "w") - (match_operand:VDC 2 "register_operand" "w")))] + [(match_operand: 0 "register_operand") + (match_operand:VDC 1 "register_operand") + (match_operand:VDC 2 "register_operand")] "TARGET_SIMD" { emit_insn (gen_move_lo_quad_ (operands[0], operands[1])); @@ -2633,7 +2725,41 @@ ;; sqdmulh_lane -(define_insn "aarch64_sqdmulh_lane" +(define_expand "aarch64_sqdmulh_lane" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_lane_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_lane" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_lane_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sqdmulh_lane_internal" [(set (match_operand:VDQHS 0 "register_operand" "=w") (unspec:VDQHS [(match_operand:VDQHS 1 "register_operand" "w") @@ -2649,7 +2775,41 @@ [(set_attr "type" "neon_sat_mul__scalar")] ) -(define_insn "aarch64_sqdmulh_laneq" +(define_expand "aarch64_sqdmulh_laneq" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_laneq_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_laneq" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_laneq_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sqdmulh_laneq_internal" [(set (match_operand:VDQHS 0 "register_operand" "=w") (unspec:VDQHS [(match_operand:VDQHS 1 "register_operand" "w") @@ -2659,24 +2819,56 @@ VQDMULH))] "TARGET_SIMD" "* - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return \"sqdmulh\\t%0., %1., %2.[%3]\";" [(set_attr "type" "neon_sat_mul__scalar")] ) -(define_insn "aarch64_sqdmulh_lane" +(define_expand "aarch64_sqdmulh_lane" + [(match_operand:SD_HSI 0 "register_operand" "") + (match_operand:SD_HSI 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_lane_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_lane" + [(match_operand:SD_HSI 0 "register_operand" "") + (match_operand:SD_HSI 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_lane_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sqdmulh_lane_internal" [(set (match_operand:SD_HSI 0 "register_operand" "=w") (unspec:SD_HSI [(match_operand:SD_HSI 1 "register_operand" "w") (vec_select: - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] VQDMULH))] "TARGET_SIMD" "* - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); - operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return \"sqdmulh\\t%0, %1, %2.[%3]\";" [(set_attr "type" "neon_sat_mul__scalar")] ) @@ -2712,7 +2904,31 @@ (sign_extend: (vec_duplicate:VD_HSI (vec_select: - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + )) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + return + "sqdmll\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "aarch64_sqdmll_laneq_internal" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend: + (vec_duplicate:VD_HSI + (vec_select: + (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) )) (const_int 1))))] @@ -2735,7 +2951,30 @@ (match_operand:SD_HSI 2 "register_operand" "w")) (sign_extend: (vec_select: - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + ) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + return + "sqdmll\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "aarch64_sqdmll_laneq_internal" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:SD_HSI 2 "register_operand" "w")) + (sign_extend: + (vec_select: + (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) ) (const_int 1))))] @@ -2752,11 +2991,12 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlal_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); @@ -2767,12 +3007,13 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); - emit_insn (gen_aarch64_sqdmlal_lane_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlal_laneq_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); DONE; @@ -2782,11 +3023,12 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlsl_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); @@ -2797,12 +3039,13 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); - emit_insn (gen_aarch64_sqdmlsl_lane_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlsl_laneq_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); DONE; @@ -2890,7 +3133,33 @@ (sign_extend: (vec_duplicate: (vec_select: - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]) + )))) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + return + "sqdmll2\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "aarch64_sqdmll2_laneq_internal" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (sign_extend: + (vec_duplicate: + (vec_select: + (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]) )))) (const_int 1))))] @@ -2907,12 +3176,13 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlal2_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); @@ -2923,13 +3193,14 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); - emit_insn (gen_aarch64_sqdmlal2_lane_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlal2_laneq_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); DONE; @@ -2939,12 +3210,13 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlsl2_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); @@ -2955,13 +3227,14 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); - emit_insn (gen_aarch64_sqdmlsl2_lane_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlsl2_laneq_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); DONE; @@ -3041,7 +3314,28 @@ (sign_extend: (vec_duplicate:VD_HSI (vec_select: - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return "sqdmull\\t%0, %1, %2.[%3]"; + } + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +(define_insn "aarch64_sqdmull_laneq_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VD_HSI 1 "register_operand" "w")) + (sign_extend: + (vec_duplicate:VD_HSI + (vec_select: + (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) (const_int 1)))] @@ -3061,7 +3355,27 @@ (match_operand:SD_HSI 1 "register_operand" "w")) (sign_extend: (vec_select: - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")])) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return "sqdmull\\t%0, %1, %2.[%3]"; + } + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +(define_insn "aarch64_sqdmull_laneq_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:SD_HSI 1 "register_operand" "w")) + (sign_extend: + (vec_select: + (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")])) )) (const_int 1)))] @@ -3076,11 +3390,12 @@ (define_expand "aarch64_sqdmull_lane" [(match_operand: 0 "register_operand" "=w") (match_operand:VSD_HSI 1 "register_operand" "w") - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode) / 2); + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); emit_insn (gen_aarch64_sqdmull_lane_internal (operands[0], operands[1], operands[2], operands[3])); DONE; @@ -3089,12 +3404,13 @@ (define_expand "aarch64_sqdmull_laneq" [(match_operand: 0 "register_operand" "=w") (match_operand:VD_HSI 1 "register_operand" "w") - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); - emit_insn (gen_aarch64_sqdmull_lane_internal + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmull_laneq_internal (operands[0], operands[1], operands[2], operands[3])); DONE; }) @@ -3143,7 +3459,7 @@ (define_expand "aarch64_sqdmull2" [(match_operand: 0 "register_operand" "=w") (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand: 2 "register_operand" "w")] + (match_operand:VQ_HSI 2 "register_operand" "w")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); @@ -3165,7 +3481,30 @@ (sign_extend: (vec_duplicate: (vec_select: - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return "sqdmull2\\t%0, %1, %2.[%3]"; + } + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +(define_insn "aarch64_sqdmull2_laneq_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend: + (vec_duplicate: + (vec_select: + (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) (const_int 1)))] @@ -3180,12 +3519,13 @@ (define_expand "aarch64_sqdmull2_lane" [(match_operand: 0 "register_operand" "=w") (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode) / 2); + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); emit_insn (gen_aarch64_sqdmull2_lane_internal (operands[0], operands[1], operands[2], operands[3], p)); @@ -3195,13 +3535,14 @@ (define_expand "aarch64_sqdmull2_laneq" [(match_operand: 0 "register_operand" "=w") (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); - emit_insn (gen_aarch64_sqdmull2_lane_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmull2_laneq_internal (operands[0], operands[1], operands[2], operands[3], p)); DONE; diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c index 7b6c2b38e..bf35031ec 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.c +++ b/gcc-4.9/gcc/config/aarch64/aarch64.c @@ -1405,6 +1405,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); int ncrn, nvrn, nregs; bool allocate_ncrn, allocate_nvrn; + HOST_WIDE_INT size; /* We need to do this once per argument. */ if (pcum->aapcs_arg_processed) @@ -1412,6 +1413,11 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, pcum->aapcs_arg_processed = true; + /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ + size + = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode), + UNITS_PER_WORD); + allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode); allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v, mode, @@ -1462,9 +1468,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, } ncrn = pcum->aapcs_ncrn; - nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)) - + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - + nregs = size / UNITS_PER_WORD; /* C6 - C9. though the sign and zero extension semantics are handled elsewhere. This is the case where the argument fits @@ -1513,13 +1517,12 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, pcum->aapcs_nextncrn = NUM_ARG_REGS; /* The argument is passed on stack; record the needed number of words for - this argument (we can re-use NREGS) and align the total size if - necessary. */ + this argument and align the total size if necessary. */ on_stack: - pcum->aapcs_stack_words = nregs; + pcum->aapcs_stack_words = size / UNITS_PER_WORD; if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT) pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size, - 16 / UNITS_PER_WORD) + 1; + 16 / UNITS_PER_WORD); return; } @@ -6304,7 +6307,8 @@ aarch64_vector_mode_supported_p (enum machine_mode mode) || mode == V16QImode || mode == V2DImode || mode == V2SImode || mode == V4HImode || mode == V8QImode || mode == V2SFmode - || mode == V4SFmode || mode == V2DFmode)) + || mode == V4SFmode || mode == V2DFmode + || mode == V1DFmode)) return true; return false; diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md index c86a29d8e..df81045e9 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64.md @@ -2823,17 +2823,18 @@ ;; Arithmetic right shift using SISD or Integer instruction (define_insn "*aarch64_ashr_sisd_or_int_3" - [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + [(set (match_operand:GPI 0 "register_operand" "=w,&w,&w,r") (ashiftrt:GPI - (match_operand:GPI 1 "register_operand" "w,w,r") - (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us,w,rUs")))] + (match_operand:GPI 1 "register_operand" "w,w,w,r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us,w,0,rUs")))] "" "@ sshr\t%0, %1, %2 # + # asr\t%0, %1, %2" - [(set_attr "simd" "yes,yes,no") - (set_attr "type" "neon_shift_imm,neon_shift_reg,shift_reg")] + [(set_attr "simd" "yes,yes,yes,no") + (set_attr "type" "neon_shift_imm,neon_shift_reg,neon_shift_reg,shift_reg")] ) (define_split @@ -2842,11 +2843,13 @@ (match_operand:DI 1 "aarch64_simd_register") (match_operand:QI 2 "aarch64_simd_register")))] "TARGET_SIMD && reload_completed" - [(set (match_dup 2) + [(set (match_dup 3) (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) (set (match_dup 0) - (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_SSHL))] - "" + (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_SISD_SSHL))] +{ + operands[3] = gen_lowpart (QImode, operands[0]); +} ) (define_split @@ -2855,11 +2858,13 @@ (match_operand:SI 1 "aarch64_simd_register") (match_operand:QI 2 "aarch64_simd_register")))] "TARGET_SIMD && reload_completed" - [(set (match_dup 2) + [(set (match_dup 3) (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) (set (match_dup 0) - (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SSHL_2S))] - "" + (unspec:SI [(match_dup 1) (match_dup 3)] UNSPEC_SSHL_2S))] +{ + operands[3] = gen_lowpart (QImode, operands[0]); +} ) (define_insn "*aarch64_sisd_ushl" @@ -3608,6 +3613,7 @@ (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")] UNSPEC_TLSDESC)) (clobber (reg:DI LR_REGNUM)) + (clobber (reg:CC CC_REGNUM)) (clobber (match_scratch:DI 1 "=r"))] "TARGET_TLS_DESC" "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h index b03d11422..c01669b2c 100644 --- a/gcc-4.9/gcc/config/aarch64/arm_neon.h +++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h @@ -21008,7 +21008,7 @@ vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, +vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c, int const __d) { return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d); @@ -21030,8 +21030,7 @@ vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) { - int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d); + return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) @@ -21059,7 +21058,7 @@ vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, +vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c, int const __d) { return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d); @@ -21081,8 +21080,7 @@ vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) { - int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d); + return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) @@ -21104,7 +21102,7 @@ vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d) +vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d); } @@ -21116,7 +21114,7 @@ vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c) } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d) +vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d); } @@ -21136,7 +21134,7 @@ vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, +vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c, int const __d) { return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d); @@ -21158,8 +21156,7 @@ vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) { - int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d); + return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) @@ -21187,7 +21184,7 @@ vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, +vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c, int const __d) { return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d); @@ -21209,8 +21206,7 @@ vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) { - int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d); + return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) @@ -21232,7 +21228,7 @@ vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d) +vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d); } @@ -21244,7 +21240,7 @@ vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c) } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d) +vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d); } @@ -21282,7 +21278,7 @@ vqdmulhh_s16 (int16x1_t __a, int16x1_t __b) } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c) +vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c); } @@ -21294,7 +21290,7 @@ vqdmulhs_s32 (int32x1_t __a, int32x1_t __b) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c) +vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c); } @@ -21314,7 +21310,7 @@ vqdmull_high_s16 (int16x8_t __a, int16x8_t __b) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c) +vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c) { return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c); } @@ -21334,8 +21330,7 @@ vqdmull_high_n_s16 (int16x8_t __a, int16_t __b) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c) { - int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c); + return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) @@ -21363,7 +21358,7 @@ vqdmull_high_s32 (int32x4_t __a, int32x4_t __b) } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c) +vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c) { return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c); } @@ -21383,8 +21378,7 @@ vqdmull_high_n_s32 (int32x4_t __a, int32_t __b) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c) { - int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c); + return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) @@ -21406,7 +21400,7 @@ vqdmullh_s16 (int16x1_t __a, int16x1_t __b) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c) +vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c); } @@ -21418,7 +21412,7 @@ vqdmulls_s32 (int32x1_t __a, int32x1_t __b) } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c) +vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c); } @@ -21594,7 +21588,7 @@ vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b) } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c) +vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c); } @@ -21606,7 +21600,7 @@ vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c) +vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c); } diff --git a/gcc-4.9/gcc/config/aarch64/iterators.md b/gcc-4.9/gcc/config/aarch64/iterators.md index f1339b8cc..e76e3ef10 100644 --- a/gcc-4.9/gcc/config/aarch64/iterators.md +++ b/gcc-4.9/gcc/config/aarch64/iterators.md @@ -396,14 +396,15 @@ (SI "SI") (HI "HI") (QI "QI")]) -;; Define container mode for lane selection. -(define_mode_attr VCOND [(V4HI "V4HI") (V8HI "V4HI") +;; 64-bit container modes the inner or scalar source mode. +(define_mode_attr VCOND [(HI "V4HI") (SI "V2SI") + (V4HI "V4HI") (V8HI "V4HI") (V2SI "V2SI") (V4SI "V2SI") (DI "DI") (V2DI "DI") (V2SF "V2SF") (V4SF "V2SF") (V2DF "DF")]) -;; Define container mode for lane selection. +;; 128-bit container modes the inner or scalar source mode. (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI") (V4HI "V8HI") (V8HI "V8HI") (V2SI "V4SI") (V4SI "V4SI") @@ -412,15 +413,6 @@ (V2DF "V2DF") (SI "V4SI") (HI "V8HI") (QI "V16QI")]) -;; Define container mode for lane selection. -(define_mode_attr VCON [(V8QI "V16QI") (V16QI "V16QI") - (V4HI "V8HI") (V8HI "V8HI") - (V2SI "V4SI") (V4SI "V4SI") - (DI "V2DI") (V2DI "V2DI") - (V2SF "V4SF") (V4SF "V4SF") - (V2DF "V2DF") (SI "V4SI") - (HI "V8HI") (QI "V16QI")]) - ;; Half modes of all vector modes. (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") (V4HI "V2HI") (V8HI "V4HI") -- cgit v1.2.3