From 38a8aecfb882072900434499696b5c32a2274515 Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Mon, 21 Jul 2014 16:47:22 -0700 Subject: [4.9] Switch gcc-4.9 to use google/gcc-4_9 branch. This source drop uses svn version r212828 of google/gcc-4.9 branch. We also cherry-picked r213062, r213063 and r213064 to fix windows build issues. All gcc-4.9 patches before July 3rd are ported to google/gcc-4.9. The following prior commits has not been merged to google branch yet. (They are included in this commit). e7af147f979e657fe2df00808e5b4319b0e088c6, baf87df3cb2683649ba7e9872362a7e721117c23, and c231900e5dcc14d8296bd9f62b45997a49d4d5e7. Change-Id: I4bea3ea470387ff751c2be4cb0d4a12059b9299b --- gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h | 59 ++ gcc-4.9/gcc/config/aarch64/aarch64-linux.h | 35 +- gcc-4.9/gcc/config/aarch64/aarch64-modes.def | 1 + gcc-4.9/gcc/config/aarch64/aarch64-simd.md | 461 ++++++++++++++-- gcc-4.9/gcc/config/aarch64/aarch64.c | 20 +- gcc-4.9/gcc/config/aarch64/aarch64.md | 28 +- gcc-4.9/gcc/config/aarch64/arm_neon.h | 50 +- gcc-4.9/gcc/config/aarch64/iterators.md | 16 +- gcc-4.9/gcc/config/alpha/alpha.c | 6 + gcc-4.9/gcc/config/arm/aout.h | 9 +- gcc-4.9/gcc/config/arm/arm-cores.def | 9 +- gcc-4.9/gcc/config/arm/arm-opts.h | 9 +- gcc-4.9/gcc/config/arm/arm.c | 13 +- gcc-4.9/gcc/config/arm/arm.h | 9 +- gcc-4.9/gcc/config/arm/arm.md | 13 +- gcc-4.9/gcc/config/arm/arm_neon.h | 608 +++++++++++++++++---- gcc-4.9/gcc/config/arm/bpabi.h | 9 +- gcc-4.9/gcc/config/arm/elf.h | 9 +- gcc-4.9/gcc/config/arm/linux-eabi.h | 5 +- gcc-4.9/gcc/config/arm/linux-elf.h | 11 +- gcc-4.9/gcc/config/arm/linux-gas.h | 9 +- gcc-4.9/gcc/config/arm/linux-grte.h | 27 + gcc-4.9/gcc/config/arm/neon-docgen.ml | 424 -------------- gcc-4.9/gcc/config/arm/neon-gen.ml | 520 ------------------ gcc-4.9/gcc/config/arm/netbsd-elf.h | 9 +- gcc-4.9/gcc/config/arm/uclinux-eabi.h | 9 +- gcc-4.9/gcc/config/arm/uclinux-elf.h | 9 +- gcc-4.9/gcc/config/arm/unspecs.md | 9 +- gcc-4.9/gcc/config/arm/vxworks.h | 9 +- gcc-4.9/gcc/config/avr/avr-fixed.md | 4 +- gcc-4.9/gcc/config/avr/avr.h | 14 +- gcc-4.9/gcc/config/avr/avr.md | 9 + gcc-4.9/gcc/config/dbx.h | 9 +- gcc-4.9/gcc/config/i386/driver-i386.c | 5 + gcc-4.9/gcc/config/i386/gnu-user.h | 6 +- gcc-4.9/gcc/config/i386/i386-protos.h | 11 + gcc-4.9/gcc/config/i386/i386.c | 362 +++++++++++- gcc-4.9/gcc/config/i386/i386.md | 97 ++-- gcc-4.9/gcc/config/i386/i386.opt | 20 + gcc-4.9/gcc/config/i386/linux.h | 20 +- gcc-4.9/gcc/config/i386/linux64.h | 19 +- gcc-4.9/gcc/config/i386/sse.md | 64 +++ gcc-4.9/gcc/config/initfini-array.h | 9 +- gcc-4.9/gcc/config/linux-grte.h | 41 ++ gcc-4.9/gcc/config/linux.c | 4 +- gcc-4.9/gcc/config/linux.h | 17 +- gcc-4.9/gcc/config/msp430/msp430-opts.h | 32 ++ gcc-4.9/gcc/config/msp430/msp430.c | 117 +++- gcc-4.9/gcc/config/msp430/msp430.h | 4 +- gcc-4.9/gcc/config/msp430/msp430.md | 32 +- gcc-4.9/gcc/config/msp430/predicates.md | 4 + gcc-4.9/gcc/config/newlib-stdint.h | 9 +- gcc-4.9/gcc/config/rs6000/htm.md | 2 +- gcc-4.9/gcc/config/rs6000/htmxlintrin.h | 9 +- gcc-4.9/gcc/config/rs6000/linux-grte.h | 41 ++ gcc-4.9/gcc/config/rs6000/linux64.h | 6 +- gcc-4.9/gcc/config/rs6000/rs6000-builtin.def | 21 +- gcc-4.9/gcc/config/rs6000/rs6000-protos.h | 2 +- gcc-4.9/gcc/config/rs6000/rs6000.c | 71 ++- gcc-4.9/gcc/config/rs6000/rs6000.h | 17 +- gcc-4.9/gcc/config/rs6000/rs6000.md | 24 +- gcc-4.9/gcc/config/rs6000/sysv4.h | 11 +- gcc-4.9/gcc/config/rs6000/vsx.md | 43 +- gcc-4.9/gcc/config/rtems.h | 9 +- gcc-4.9/gcc/config/sol2-clearcap.map | 2 + gcc-4.9/gcc/config/sol2-clearcapv2.map | 7 + gcc-4.9/gcc/config/sol2.h | 11 +- gcc-4.9/gcc/config/sol2.opt | 4 + gcc-4.9/gcc/config/t-sol2 | 7 + gcc-4.9/gcc/config/v850/rtems.h | 9 +- gcc-4.9/gcc/config/v850/v850-opts.h | 9 +- gcc-4.9/gcc/config/v850/v850.h | 9 +- 72 files changed, 2183 insertions(+), 1445 deletions(-) create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h create mode 100644 gcc-4.9/gcc/config/arm/linux-grte.h delete mode 100644 gcc-4.9/gcc/config/arm/neon-docgen.ml delete mode 100644 gcc-4.9/gcc/config/arm/neon-gen.ml create mode 100644 gcc-4.9/gcc/config/linux-grte.h create mode 100644 gcc-4.9/gcc/config/msp430/msp430-opts.h create mode 100644 gcc-4.9/gcc/config/rs6000/linux-grte.h create mode 100644 gcc-4.9/gcc/config/sol2-clearcap.map create mode 100644 gcc-4.9/gcc/config/sol2-clearcapv2.map (limited to 'gcc-4.9/gcc/config') diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h new file mode 100644 index 000000000..91d235ff1 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h @@ -0,0 +1,59 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_AARCH64_LINUX_ANDROID_H +#define GCC_AARCH64_LINUX_ANDROID_H + + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + ANDROID_TARGET_OS_CPP_BUILTINS(); \ + } \ + while (0) + +#undef LINK_SPEC +#define LINK_SPEC \ + LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \ + LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) + +#undef CC1_SPEC +#define CC1_SPEC \ + LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \ + GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC("-fpic")) + +#define CC1PLUS_SPEC \ + LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC) + +#undef LIB_SPEC +#define LIB_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC) + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC) + +#endif /* GCC_AARCH64_LINUX_ANDROID_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h index f32d19f16..f8a97c899 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h +++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h @@ -21,8 +21,10 @@ #ifndef GCC_AARCH64_LINUX_H #define GCC_AARCH64_LINUX_H -#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1" -#define BIONIC_DYNAMIC_LINKER "/system/bin/linker64" +#ifndef RUNTIME_ROOT_PREFIX +#define RUNTIME_ROOT_PREFIX "" +#endif +#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1" #define CPP_SPEC "%{pthread:-D_REENTRANT}" @@ -36,38 +38,13 @@ %{mbig-endian:-EB} %{mlittle-endian:-EL} \ -maarch64linux%{mbig-endian:b}" +#define LINK_SPEC LINUX_TARGET_LINK_SPEC + #define TARGET_OS_CPP_BUILTINS() \ do \ { \ GNU_USER_TARGET_OS_CPP_BUILTINS(); \ - ANDROID_TARGET_OS_CPP_BUILTINS(); \ } \ while (0) -#undef LINK_SPEC -#define LINK_SPEC \ - LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \ - LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) - -#undef CC1_SPEC -#define CC1_SPEC \ - LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \ - GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC("-fpic")) - -#define CC1PLUS_SPEC \ - LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC) - -#undef LIB_SPEC -#define LIB_SPEC \ - LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ - GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) - -#undef STARTFILE_SPEC -#define STARTFILE_SPEC \ - LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC) - -#undef ENDFILE_SPEC -#define ENDFILE_SPEC \ - LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC) - #endif /* GCC_AARCH64_LINUX_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def index 1d2cc7679..f9c436948 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def +++ b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def @@ -31,6 +31,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */ VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */ VECTOR_MODES (FLOAT, 8); /* V2SF. */ VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */ +VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */ /* Oct Int: 256-bit integer mode needed for 32-byte vector arguments. */ INT_MODE (OI, 32); diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md index 73aee2c3d..1f827b57d 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md @@ -934,14 +934,22 @@ [(set_attr "type" "neon_minmax")] ) -;; Move into low-half clearing high half to 0. +;; vec_concat gives a new vector with the low elements from operand 1, and +;; the high elements from operand 2. That is to say, given op1 = { a, b } +;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. +;; What that means, is that the RTL descriptions of the below patterns +;; need to change depending on endianness. -(define_insn "move_lo_quad_" +;; Move to the low architectural bits of the register. +;; On little-endian this is { operand, zeroes } +;; On big-endian this is { zeroes, operand } + +(define_insn "move_lo_quad_internal_" [(set (match_operand:VQ 0 "register_operand" "=w,w,w") (vec_concat:VQ (match_operand: 1 "register_operand" "w,r,r") (vec_duplicate: (const_int 0))))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "@ dup\\t%d0, %1.d[0] fmov\\t%d0, %1 @@ -952,7 +960,39 @@ (set_attr "length" "4")] ) -;; Move into high-half. +(define_insn "move_lo_quad_internal_be_" + [(set (match_operand:VQ 0 "register_operand" "=w,w,w") + (vec_concat:VQ + (vec_duplicate: (const_int 0)) + (match_operand: 1 "register_operand" "w,r,r")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "@ + dup\\t%d0, %1.d[0] + fmov\\t%d0, %1 + dup\\t%d0, %1" + [(set_attr "type" "neon_dup,f_mcr,neon_dup") + (set_attr "simd" "yes,*,yes") + (set_attr "fp" "*,yes,*") + (set_attr "length" "4")] +) + +(define_expand "move_lo_quad_" + [(match_operand:VQ 0 "register_operand") + (match_operand:VQ 1 "register_operand")] + "TARGET_SIMD" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_move_lo_quad_internal_be_ (operands[0], operands[1])); + else + emit_insn (gen_move_lo_quad_internal_ (operands[0], operands[1])); + DONE; +} +) + +;; Move operand1 to the high architectural bits of the register, keeping +;; the low architectural bits of operand2. +;; For little-endian this is { operand2, operand1 } +;; For big-endian this is { operand1, operand2 } (define_insn "aarch64_simd_move_hi_quad_" [(set (match_operand:VQ 0 "register_operand" "+w,w") @@ -961,12 +1001,25 @@ (match_dup 0) (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) (match_operand: 1 "register_operand" "w,r")))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "@ ins\\t%0.d[1], %1.d[0] ins\\t%0.d[1], %1" - [(set_attr "type" "neon_ins") - (set_attr "length" "4")] + [(set_attr "type" "neon_ins")] +) + +(define_insn "aarch64_simd_move_hi_quad_be_" + [(set (match_operand:VQ 0 "register_operand" "+w,w") + (vec_concat:VQ + (match_operand: 1 "register_operand" "w,r") + (vec_select: + (match_dup 0) + (match_operand:VQ 2 "vect_par_cnst_hi_half" ""))))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "@ + ins\\t%0.d[1], %1.d[0] + ins\\t%0.d[1], %1" + [(set_attr "type" "neon_ins")] ) (define_expand "move_hi_quad_" @@ -974,9 +1027,13 @@ (match_operand: 1 "register_operand" "")] "TARGET_SIMD" { - rtx p = aarch64_simd_vect_par_cnst_half (mode, false); - emit_insn (gen_aarch64_simd_move_hi_quad_ (operands[0], - operands[1], p)); + rtx p = aarch64_simd_vect_par_cnst_half (mode, BYTES_BIG_ENDIAN); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_simd_move_hi_quad_be_ (operands[0], + operands[1], p)); + else + emit_insn (gen_aarch64_simd_move_hi_quad_ (operands[0], + operands[1], p)); DONE; }) @@ -2321,12 +2378,44 @@ (vec_concat: (match_operand:VDIC 1 "register_operand" "w") (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "mov\\t%0.8b, %1.8b" [(set_attr "type" "neon_move")] ) -(define_insn_and_split "aarch64_combine" +(define_insn "*aarch64_combinez_be" + [(set (match_operand: 0 "register_operand" "=&w") + (vec_concat: + (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz") + (match_operand:VDIC 1 "register_operand" "w")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "mov\\t%0.8b, %1.8b" + [(set_attr "type" "neon_move")] +) + +(define_expand "aarch64_combine" + [(match_operand: 0 "register_operand") + (match_operand:VDC 1 "register_operand") + (match_operand:VDC 2 "register_operand")] + "TARGET_SIMD" +{ + rtx op1, op2; + if (BYTES_BIG_ENDIAN) + { + op1 = operands[2]; + op2 = operands[1]; + } + else + { + op1 = operands[1]; + op2 = operands[2]; + } + emit_insn (gen_aarch64_combine_internal (operands[0], op1, op2)); + DONE; +} +) + +(define_insn_and_split "aarch64_combine_internal" [(set (match_operand: 0 "register_operand" "=&w") (vec_concat: (match_operand:VDC 1 "register_operand" "w") (match_operand:VDC 2 "register_operand" "w")))] @@ -2335,16 +2424,19 @@ "&& reload_completed" [(const_int 0)] { - aarch64_split_simd_combine (operands[0], operands[1], operands[2]); + if (BYTES_BIG_ENDIAN) + aarch64_split_simd_combine (operands[0], operands[2], operands[1]); + else + aarch64_split_simd_combine (operands[0], operands[1], operands[2]); DONE; } [(set_attr "type" "multiple")] ) (define_expand "aarch64_simd_combine" - [(set (match_operand: 0 "register_operand" "=&w") - (vec_concat: (match_operand:VDC 1 "register_operand" "w") - (match_operand:VDC 2 "register_operand" "w")))] + [(match_operand: 0 "register_operand") + (match_operand:VDC 1 "register_operand") + (match_operand:VDC 2 "register_operand")] "TARGET_SIMD" { emit_insn (gen_move_lo_quad_ (operands[0], operands[1])); @@ -2633,7 +2725,41 @@ ;; sqdmulh_lane -(define_insn "aarch64_sqdmulh_lane" +(define_expand "aarch64_sqdmulh_lane" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_lane_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_lane" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_lane_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sqdmulh_lane_internal" [(set (match_operand:VDQHS 0 "register_operand" "=w") (unspec:VDQHS [(match_operand:VDQHS 1 "register_operand" "w") @@ -2649,7 +2775,41 @@ [(set_attr "type" "neon_sat_mul__scalar")] ) -(define_insn "aarch64_sqdmulh_laneq" +(define_expand "aarch64_sqdmulh_laneq" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_laneq_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_laneq" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_laneq_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sqdmulh_laneq_internal" [(set (match_operand:VDQHS 0 "register_operand" "=w") (unspec:VDQHS [(match_operand:VDQHS 1 "register_operand" "w") @@ -2659,24 +2819,56 @@ VQDMULH))] "TARGET_SIMD" "* - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return \"sqdmulh\\t%0., %1., %2.[%3]\";" [(set_attr "type" "neon_sat_mul__scalar")] ) -(define_insn "aarch64_sqdmulh_lane" +(define_expand "aarch64_sqdmulh_lane" + [(match_operand:SD_HSI 0 "register_operand" "") + (match_operand:SD_HSI 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_lane_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_lane" + [(match_operand:SD_HSI 0 "register_operand" "") + (match_operand:SD_HSI 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_lane_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sqdmulh_lane_internal" [(set (match_operand:SD_HSI 0 "register_operand" "=w") (unspec:SD_HSI [(match_operand:SD_HSI 1 "register_operand" "w") (vec_select: - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] VQDMULH))] "TARGET_SIMD" "* - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); - operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return \"sqdmulh\\t%0, %1, %2.[%3]\";" [(set_attr "type" "neon_sat_mul__scalar")] ) @@ -2712,7 +2904,31 @@ (sign_extend: (vec_duplicate:VD_HSI (vec_select: - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + )) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + return + "sqdmll\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "aarch64_sqdmll_laneq_internal" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend: + (vec_duplicate:VD_HSI + (vec_select: + (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) )) (const_int 1))))] @@ -2735,7 +2951,30 @@ (match_operand:SD_HSI 2 "register_operand" "w")) (sign_extend: (vec_select: - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + ) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + return + "sqdmll\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "aarch64_sqdmll_laneq_internal" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:SD_HSI 2 "register_operand" "w")) + (sign_extend: + (vec_select: + (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) ) (const_int 1))))] @@ -2752,11 +2991,12 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlal_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); @@ -2767,12 +3007,13 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); - emit_insn (gen_aarch64_sqdmlal_lane_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlal_laneq_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); DONE; @@ -2782,11 +3023,12 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlsl_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); @@ -2797,12 +3039,13 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); - emit_insn (gen_aarch64_sqdmlsl_lane_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlsl_laneq_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); DONE; @@ -2890,7 +3133,33 @@ (sign_extend: (vec_duplicate: (vec_select: - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]) + )))) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + return + "sqdmll2\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "aarch64_sqdmll2_laneq_internal" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (sign_extend: + (vec_duplicate: + (vec_select: + (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]) )))) (const_int 1))))] @@ -2907,12 +3176,13 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlal2_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); @@ -2923,13 +3193,14 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); - emit_insn (gen_aarch64_sqdmlal2_lane_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlal2_laneq_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); DONE; @@ -2939,12 +3210,13 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlsl2_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); @@ -2955,13 +3227,14 @@ [(match_operand: 0 "register_operand" "=w") (match_operand: 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand: 3 "register_operand" "") + (match_operand: 3 "register_operand" "") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); - emit_insn (gen_aarch64_sqdmlsl2_lane_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlsl2_laneq_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); DONE; @@ -3041,7 +3314,28 @@ (sign_extend: (vec_duplicate:VD_HSI (vec_select: - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return "sqdmull\\t%0, %1, %2.[%3]"; + } + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +(define_insn "aarch64_sqdmull_laneq_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VD_HSI 1 "register_operand" "w")) + (sign_extend: + (vec_duplicate:VD_HSI + (vec_select: + (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) (const_int 1)))] @@ -3061,7 +3355,27 @@ (match_operand:SD_HSI 1 "register_operand" "w")) (sign_extend: (vec_select: - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")])) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return "sqdmull\\t%0, %1, %2.[%3]"; + } + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +(define_insn "aarch64_sqdmull_laneq_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:SD_HSI 1 "register_operand" "w")) + (sign_extend: + (vec_select: + (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")])) )) (const_int 1)))] @@ -3076,11 +3390,12 @@ (define_expand "aarch64_sqdmull_lane" [(match_operand: 0 "register_operand" "=w") (match_operand:VSD_HSI 1 "register_operand" "w") - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode) / 2); + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); emit_insn (gen_aarch64_sqdmull_lane_internal (operands[0], operands[1], operands[2], operands[3])); DONE; @@ -3089,12 +3404,13 @@ (define_expand "aarch64_sqdmull_laneq" [(match_operand: 0 "register_operand" "=w") (match_operand:VD_HSI 1 "register_operand" "w") - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); - emit_insn (gen_aarch64_sqdmull_lane_internal + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmull_laneq_internal (operands[0], operands[1], operands[2], operands[3])); DONE; }) @@ -3143,7 +3459,7 @@ (define_expand "aarch64_sqdmull2" [(match_operand: 0 "register_operand" "=w") (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand: 2 "register_operand" "w")] + (match_operand:VQ_HSI 2 "register_operand" "w")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); @@ -3165,7 +3481,30 @@ (sign_extend: (vec_duplicate: (vec_select: - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return "sqdmull2\\t%0, %1, %2.[%3]"; + } + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +(define_insn "aarch64_sqdmull2_laneq_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend: + (vec_duplicate: + (vec_select: + (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) (const_int 1)))] @@ -3180,12 +3519,13 @@ (define_expand "aarch64_sqdmull2_lane" [(match_operand: 0 "register_operand" "=w") (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode) / 2); + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); emit_insn (gen_aarch64_sqdmull2_lane_internal (operands[0], operands[1], operands[2], operands[3], p)); @@ -3195,13 +3535,14 @@ (define_expand "aarch64_sqdmull2_laneq" [(match_operand: 0 "register_operand" "=w") (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); - emit_insn (gen_aarch64_sqdmull2_lane_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmull2_laneq_internal (operands[0], operands[1], operands[2], operands[3], p)); DONE; diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c index 7b6c2b38e..bf35031ec 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.c +++ b/gcc-4.9/gcc/config/aarch64/aarch64.c @@ -1405,6 +1405,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); int ncrn, nvrn, nregs; bool allocate_ncrn, allocate_nvrn; + HOST_WIDE_INT size; /* We need to do this once per argument. */ if (pcum->aapcs_arg_processed) @@ -1412,6 +1413,11 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, pcum->aapcs_arg_processed = true; + /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ + size + = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode), + UNITS_PER_WORD); + allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode); allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v, mode, @@ -1462,9 +1468,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, } ncrn = pcum->aapcs_ncrn; - nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)) - + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - + nregs = size / UNITS_PER_WORD; /* C6 - C9. though the sign and zero extension semantics are handled elsewhere. This is the case where the argument fits @@ -1513,13 +1517,12 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, pcum->aapcs_nextncrn = NUM_ARG_REGS; /* The argument is passed on stack; record the needed number of words for - this argument (we can re-use NREGS) and align the total size if - necessary. */ + this argument and align the total size if necessary. */ on_stack: - pcum->aapcs_stack_words = nregs; + pcum->aapcs_stack_words = size / UNITS_PER_WORD; if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT) pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size, - 16 / UNITS_PER_WORD) + 1; + 16 / UNITS_PER_WORD); return; } @@ -6304,7 +6307,8 @@ aarch64_vector_mode_supported_p (enum machine_mode mode) || mode == V16QImode || mode == V2DImode || mode == V2SImode || mode == V4HImode || mode == V8QImode || mode == V2SFmode - || mode == V4SFmode || mode == V2DFmode)) + || mode == V4SFmode || mode == V2DFmode + || mode == V1DFmode)) return true; return false; diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md index c86a29d8e..df81045e9 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64.md @@ -2823,17 +2823,18 @@ ;; Arithmetic right shift using SISD or Integer instruction (define_insn "*aarch64_ashr_sisd_or_int_3" - [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + [(set (match_operand:GPI 0 "register_operand" "=w,&w,&w,r") (ashiftrt:GPI - (match_operand:GPI 1 "register_operand" "w,w,r") - (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us,w,rUs")))] + (match_operand:GPI 1 "register_operand" "w,w,w,r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us,w,0,rUs")))] "" "@ sshr\t%0, %1, %2 # + # asr\t%0, %1, %2" - [(set_attr "simd" "yes,yes,no") - (set_attr "type" "neon_shift_imm,neon_shift_reg,shift_reg")] + [(set_attr "simd" "yes,yes,yes,no") + (set_attr "type" "neon_shift_imm,neon_shift_reg,neon_shift_reg,shift_reg")] ) (define_split @@ -2842,11 +2843,13 @@ (match_operand:DI 1 "aarch64_simd_register") (match_operand:QI 2 "aarch64_simd_register")))] "TARGET_SIMD && reload_completed" - [(set (match_dup 2) + [(set (match_dup 3) (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) (set (match_dup 0) - (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_SSHL))] - "" + (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_SISD_SSHL))] +{ + operands[3] = gen_lowpart (QImode, operands[0]); +} ) (define_split @@ -2855,11 +2858,13 @@ (match_operand:SI 1 "aarch64_simd_register") (match_operand:QI 2 "aarch64_simd_register")))] "TARGET_SIMD && reload_completed" - [(set (match_dup 2) + [(set (match_dup 3) (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) (set (match_dup 0) - (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SSHL_2S))] - "" + (unspec:SI [(match_dup 1) (match_dup 3)] UNSPEC_SSHL_2S))] +{ + operands[3] = gen_lowpart (QImode, operands[0]); +} ) (define_insn "*aarch64_sisd_ushl" @@ -3608,6 +3613,7 @@ (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")] UNSPEC_TLSDESC)) (clobber (reg:DI LR_REGNUM)) + (clobber (reg:CC CC_REGNUM)) (clobber (match_scratch:DI 1 "=r"))] "TARGET_TLS_DESC" "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h index b03d11422..c01669b2c 100644 --- a/gcc-4.9/gcc/config/aarch64/arm_neon.h +++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h @@ -21008,7 +21008,7 @@ vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, +vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c, int const __d) { return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d); @@ -21030,8 +21030,7 @@ vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) { - int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d); + return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) @@ -21059,7 +21058,7 @@ vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, +vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c, int const __d) { return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d); @@ -21081,8 +21080,7 @@ vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) { - int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d); + return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) @@ -21104,7 +21102,7 @@ vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d) +vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d); } @@ -21116,7 +21114,7 @@ vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c) } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d) +vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d); } @@ -21136,7 +21134,7 @@ vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, +vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c, int const __d) { return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d); @@ -21158,8 +21156,7 @@ vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) { - int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d); + return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) @@ -21187,7 +21184,7 @@ vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, +vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c, int const __d) { return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d); @@ -21209,8 +21206,7 @@ vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) { - int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d); + return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) @@ -21232,7 +21228,7 @@ vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d) +vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d); } @@ -21244,7 +21240,7 @@ vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c) } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d) +vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d); } @@ -21282,7 +21278,7 @@ vqdmulhh_s16 (int16x1_t __a, int16x1_t __b) } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c) +vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c); } @@ -21294,7 +21290,7 @@ vqdmulhs_s32 (int32x1_t __a, int32x1_t __b) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c) +vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c); } @@ -21314,7 +21310,7 @@ vqdmull_high_s16 (int16x8_t __a, int16x8_t __b) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c) +vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c) { return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c); } @@ -21334,8 +21330,7 @@ vqdmull_high_n_s16 (int16x8_t __a, int16_t __b) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c) { - int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c); + return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) @@ -21363,7 +21358,7 @@ vqdmull_high_s32 (int32x4_t __a, int32x4_t __b) } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c) +vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c) { return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c); } @@ -21383,8 +21378,7 @@ vqdmull_high_n_s32 (int32x4_t __a, int32_t __b) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c) { - int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c); + return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) @@ -21406,7 +21400,7 @@ vqdmullh_s16 (int16x1_t __a, int16x1_t __b) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c) +vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c); } @@ -21418,7 +21412,7 @@ vqdmulls_s32 (int32x1_t __a, int32x1_t __b) } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c) +vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c); } @@ -21594,7 +21588,7 @@ vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b) } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c) +vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c); } @@ -21606,7 +21600,7 @@ vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c) +vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c); } diff --git a/gcc-4.9/gcc/config/aarch64/iterators.md b/gcc-4.9/gcc/config/aarch64/iterators.md index f1339b8cc..e76e3ef10 100644 --- a/gcc-4.9/gcc/config/aarch64/iterators.md +++ b/gcc-4.9/gcc/config/aarch64/iterators.md @@ -396,14 +396,15 @@ (SI "SI") (HI "HI") (QI "QI")]) -;; Define container mode for lane selection. -(define_mode_attr VCOND [(V4HI "V4HI") (V8HI "V4HI") +;; 64-bit container modes the inner or scalar source mode. +(define_mode_attr VCOND [(HI "V4HI") (SI "V2SI") + (V4HI "V4HI") (V8HI "V4HI") (V2SI "V2SI") (V4SI "V2SI") (DI "DI") (V2DI "DI") (V2SF "V2SF") (V4SF "V2SF") (V2DF "DF")]) -;; Define container mode for lane selection. +;; 128-bit container modes the inner or scalar source mode. (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI") (V4HI "V8HI") (V8HI "V8HI") (V2SI "V4SI") (V4SI "V4SI") @@ -412,15 +413,6 @@ (V2DF "V2DF") (SI "V4SI") (HI "V8HI") (QI "V16QI")]) -;; Define container mode for lane selection. -(define_mode_attr VCON [(V8QI "V16QI") (V16QI "V16QI") - (V4HI "V8HI") (V8HI "V8HI") - (V2SI "V4SI") (V4SI "V4SI") - (DI "V2DI") (V2DI "V2DI") - (V2SF "V4SF") (V4SF "V4SF") - (V2DF "V2DF") (SI "V4SI") - (HI "V8HI") (QI "V16QI")]) - ;; Half modes of all vector modes. (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") (V4HI "V2HI") (V8HI "V4HI") diff --git a/gcc-4.9/gcc/config/alpha/alpha.c b/gcc-4.9/gcc/config/alpha/alpha.c index dc07a02c0..d5c7908be 100644 --- a/gcc-4.9/gcc/config/alpha/alpha.c +++ b/gcc-4.9/gcc/config/alpha/alpha.c @@ -8715,6 +8715,11 @@ alpha_handle_trap_shadows (void) } break; + case BARRIER: + /* __builtin_unreachable can expand to no code at all, + leaving (barrier) RTXes in the instruction stream. */ + goto close_shadow_notrapb; + case JUMP_INSN: case CALL_INSN: case CODE_LABEL: @@ -8730,6 +8735,7 @@ alpha_handle_trap_shadows (void) n = emit_insn_before (gen_trapb (), i); PUT_MODE (n, TImode); PUT_MODE (i, TImode); + close_shadow_notrapb: trap_pending = 0; shadow.used.i = 0; shadow.used.fp = 0; diff --git a/gcc-4.9/gcc/config/arm/aout.h b/gcc-4.9/gcc/config/arm/aout.h index 51d32a9d4..c8f4e45c6 100644 --- a/gcc-4.9/gcc/config/arm/aout.h +++ b/gcc-4.9/gcc/config/arm/aout.h @@ -14,8 +14,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #ifndef ASM_APP_ON diff --git a/gcc-4.9/gcc/config/arm/arm-cores.def b/gcc-4.9/gcc/config/arm/arm-cores.def index 42f00b463..56041ec8b 100644 --- a/gcc-4.9/gcc/config/arm/arm-cores.def +++ b/gcc-4.9/gcc/config/arm/arm-cores.def @@ -14,8 +14,13 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* Before using #include to read this file, define a macro: diff --git a/gcc-4.9/gcc/config/arm/arm-opts.h b/gcc-4.9/gcc/config/arm/arm-opts.h index a8393975a..21902940e 100644 --- a/gcc-4.9/gcc/config/arm/arm-opts.h +++ b/gcc-4.9/gcc/config/arm/arm-opts.h @@ -13,8 +13,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #ifndef ARM_OPTS_H diff --git a/gcc-4.9/gcc/config/arm/arm.c b/gcc-4.9/gcc/config/arm/arm.c index 83763555c..3c237cb6d 100644 --- a/gcc-4.9/gcc/config/arm/arm.c +++ b/gcc-4.9/gcc/config/arm/arm.c @@ -16739,11 +16739,12 @@ thumb1_reorg (void) rtx prev, insn = BB_END (bb); bool insn_clobbered = false; - while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn)) + while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn)) insn = PREV_INSN (insn); /* Find the last cbranchsi4_insn in basic block BB. */ - if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn) + if (insn == BB_HEAD (bb) + || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn) continue; /* Get the register with which we are comparing. */ @@ -28210,9 +28211,13 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, fputs (":\n", file); if (flag_pic) { - /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */ + /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */ rtx tem = XEXP (DECL_RTL (function), 0); - tem = plus_constant (GET_MODE (tem), tem, -7); + /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC + pipeline offset is four rather than eight. Adjust the offset + accordingly. */ + tem = plus_constant (GET_MODE (tem), tem, + TARGET_THUMB1_ONLY ? -3 : -7); tem = gen_rtx_MINUS (GET_MODE (tem), tem, gen_rtx_SYMBOL_REF (Pmode, diff --git a/gcc-4.9/gcc/config/arm/arm.h b/gcc-4.9/gcc/config/arm/arm.h index 4d9121436..ab5167a8b 100644 --- a/gcc-4.9/gcc/config/arm/arm.h +++ b/gcc-4.9/gcc/config/arm/arm.h @@ -17,8 +17,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #ifndef GCC_ARM_H diff --git a/gcc-4.9/gcc/config/arm/arm.md b/gcc-4.9/gcc/config/arm/arm.md index 662465f2a..467f9ce4e 100644 --- a/gcc-4.9/gcc/config/arm/arm.md +++ b/gcc-4.9/gcc/config/arm/arm.md @@ -75,6 +75,8 @@ ] ) +;; UNSPEC_VOLATILE Usage: + ;;--------------------------------------------------------------------------- ;; Attributes @@ -8349,8 +8351,8 @@ (define_insn_and_split "*arm_cmpdi_unsigned" [(set (reg:CC_CZ CC_REGNUM) - (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r") - (match_operand:DI 1 "arm_di_operand" "Py,r,rDi")))] + (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r,r") + (match_operand:DI 1 "arm_di_operand" "Py,r,Di,rDi")))] "TARGET_32BIT" "#" ; "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1" @@ -8370,9 +8372,9 @@ operands[1] = gen_lowpart (SImode, operands[1]); } [(set_attr "conds" "set") - (set_attr "enabled_for_depr_it" "yes,yes,no") - (set_attr "arch" "t2,t2,*") - (set_attr "length" "6,6,8") + (set_attr "enabled_for_depr_it" "yes,yes,no,*") + (set_attr "arch" "t2,t2,t2,a") + (set_attr "length" "6,6,10,8") (set_attr "type" "multiple")] ) @@ -9860,6 +9862,7 @@ "TARGET_32BIT" "%i1%?\\t%0, %2, %4%S3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "shift" "4") (set_attr "arch" "a,t2,t2,a") ;; Thumb2 doesn't allow the stack pointer to be used for diff --git a/gcc-4.9/gcc/config/arm/arm_neon.h b/gcc-4.9/gcc/config/arm/arm_neon.h index 37a6e611b..95735433d 100644 --- a/gcc-4.9/gcc/config/arm/arm_neon.h +++ b/gcc-4.9/gcc/config/arm/arm_neon.h @@ -1,5 +1,4 @@ -/* ARM NEON intrinsics include file. This file is generated automatically - using neon-gen.ml. Please do not edit manually. +/* ARM NEON intrinsics include file. Copyright (C) 2006-2014 Free Software Foundation, Inc. Contributed by CodeSourcery. @@ -7707,12 +7706,32 @@ vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) return (poly16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); } +/* For big-endian, the shuffle masks for ZIP, UZP and TRN must be changed as + follows. (nelt = the number of elements within a vector.) + + Firstly, a value of N within a mask, becomes (N ^ (nelt - 1)), as gcc vector + extension's indexing scheme is reversed *within each vector* (relative to the + neon intrinsics view), but without changing which of the two vectors. + + Secondly, the elements within each mask are reversed, as the mask is itself a + vector, and will itself be loaded in reverse order (again, relative to the + neon intrinsics view, i.e. that would result from a "vld1" instruction). */ + __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) vtrn_s8 (int8x8_t __a, int8x8_t __b) { int8x8x2_t __rv; - __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); - __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif return __rv; } @@ -7720,8 +7739,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) vtrn_s16 (int16x4_t __a, int16x4_t __b) { int16x4x2_t __rv; - __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); - __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); +#endif return __rv; } @@ -7729,8 +7753,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) vtrn_u8 (uint8x8_t __a, uint8x8_t __b) { uint8x8x2_t __rv; - __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); - __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif return __rv; } @@ -7738,8 +7771,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) vtrn_u16 (uint16x4_t __a, uint16x4_t __b) { uint16x4x2_t __rv; - __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); - __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); +#endif return __rv; } @@ -7747,8 +7785,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) vtrn_p8 (poly8x8_t __a, poly8x8_t __b) { poly8x8x2_t __rv; - __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); - __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif return __rv; } @@ -7756,8 +7803,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) vtrn_p16 (poly16x4_t __a, poly16x4_t __b) { poly16x4x2_t __rv; - __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); - __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); +#endif return __rv; } @@ -7765,8 +7817,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) vtrn_s32 (int32x2_t __a, int32x2_t __b) { int32x2x2_t __rv; - __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -7774,8 +7831,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vtrn_f32 (float32x2_t __a, float32x2_t __b) { float32x2x2_t __rv; - __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -7783,8 +7845,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) vtrn_u32 (uint32x2_t __a, uint32x2_t __b) { uint32x2x2_t __rv; - __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -7792,8 +7859,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) vtrnq_s8 (int8x16_t __a, int8x16_t __b) { int8x16x2_t __rv; - __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); - __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); +#endif return __rv; } @@ -7801,8 +7877,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) vtrnq_s16 (int16x8_t __a, int16x8_t __b) { int16x8x2_t __rv; - __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); - __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif return __rv; } @@ -7810,8 +7895,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) vtrnq_s32 (int32x4_t __a, int32x4_t __b) { int32x4x2_t __rv; - __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); - __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); +#endif return __rv; } @@ -7819,8 +7909,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vtrnq_f32 (float32x4_t __a, float32x4_t __b) { float32x4x2_t __rv; - __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); - __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); +#endif return __rv; } @@ -7828,8 +7923,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) vtrnq_u8 (uint8x16_t __a, uint8x16_t __b) { uint8x16x2_t __rv; - __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); - __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); +#endif return __rv; } @@ -7837,8 +7941,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) vtrnq_u16 (uint16x8_t __a, uint16x8_t __b) { uint16x8x2_t __rv; - __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); - __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif return __rv; } @@ -7846,8 +7959,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) vtrnq_u32 (uint32x4_t __a, uint32x4_t __b) { uint32x4x2_t __rv; - __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); - __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); +#endif return __rv; } @@ -7855,8 +7973,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) vtrnq_p8 (poly8x16_t __a, poly8x16_t __b) { poly8x16x2_t __rv; - __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); - __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); +#endif return __rv; } @@ -7864,8 +7991,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) vtrnq_p16 (poly16x8_t __a, poly16x8_t __b) { poly16x8x2_t __rv; - __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); - __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif return __rv; } @@ -7873,8 +8009,17 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) vzip_s8 (int8x8_t __a, int8x8_t __b) { int8x8x2_t __rv; - __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); - __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 12, 4, 13, 5, 14, 6, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 0, 9, 1, 10, 2, 11, 3 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif return __rv; } @@ -7882,8 +8027,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) vzip_s16 (int16x4_t __a, int16x4_t __b) { int16x4x2_t __rv; - __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); - __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); +#endif return __rv; } @@ -7891,8 +8041,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) vzip_u8 (uint8x8_t __a, uint8x8_t __b) { uint8x8x2_t __rv; - __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); - __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 12, 4, 13, 5, 14, 6, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 0, 9, 1, 10, 2, 11, 3 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif return __rv; } @@ -7900,8 +8059,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) vzip_u16 (uint16x4_t __a, uint16x4_t __b) { uint16x4x2_t __rv; - __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); - __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); +#endif return __rv; } @@ -7909,8 +8073,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) vzip_p8 (poly8x8_t __a, poly8x8_t __b) { poly8x8x2_t __rv; - __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); - __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 12, 4, 13, 5, 14, 6, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 0, 9, 1, 10, 2, 11, 3 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif return __rv; } @@ -7918,8 +8091,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) vzip_p16 (poly16x4_t __a, poly16x4_t __b) { poly16x4x2_t __rv; - __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); - __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); +#endif return __rv; } @@ -7927,8 +8105,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) vzip_s32 (int32x2_t __a, int32x2_t __b) { int32x2x2_t __rv; - __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -7936,8 +8119,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vzip_f32 (float32x2_t __a, float32x2_t __b) { float32x2x2_t __rv; - __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -7945,8 +8133,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) vzip_u32 (uint32x2_t __a, uint32x2_t __b) { uint32x2x2_t __rv; - __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -7954,8 +8147,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) vzipq_s8 (int8x16_t __a, int8x16_t __b) { int8x16x2_t __rv; - __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); - __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +#endif return __rv; } @@ -7963,8 +8165,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) vzipq_s16 (int16x8_t __a, int16x8_t __b) { int16x8x2_t __rv; - __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); - __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 12, 4, 13, 5, 14, 6, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 0, 9, 1, 10, 2, 11, 3 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif return __rv; } @@ -7972,8 +8183,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) vzipq_s32 (int32x4_t __a, int32x4_t __b) { int32x4x2_t __rv; - __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); - __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); +#endif return __rv; } @@ -7981,8 +8197,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vzipq_f32 (float32x4_t __a, float32x4_t __b) { float32x4x2_t __rv; - __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); - __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); +#endif return __rv; } @@ -7990,8 +8211,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) vzipq_u8 (uint8x16_t __a, uint8x16_t __b) { uint8x16x2_t __rv; - __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); - __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +#endif return __rv; } @@ -7999,8 +8229,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) vzipq_u16 (uint16x8_t __a, uint16x8_t __b) { uint16x8x2_t __rv; - __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); - __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 12, 4, 13, 5, 14, 6, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 0, 9, 1, 10, 2, 11, 3 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif return __rv; } @@ -8008,8 +8247,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) vzipq_u32 (uint32x4_t __a, uint32x4_t __b) { uint32x4x2_t __rv; - __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); - __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); +#endif return __rv; } @@ -8017,8 +8261,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) vzipq_p8 (poly8x16_t __a, poly8x16_t __b) { poly8x16x2_t __rv; - __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); - __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +#endif return __rv; } @@ -8026,8 +8279,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) vzipq_p16 (poly16x8_t __a, poly16x8_t __b) { poly16x8x2_t __rv; - __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); - __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 12, 4, 13, 5, 14, 6, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 0, 9, 1, 10, 2, 11, 3 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif return __rv; } @@ -8035,8 +8297,17 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) vuzp_s8 (int8x8_t __a, int8x8_t __b) { int8x8x2_t __rv; - __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); - __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 9, 11, 13, 15, 1, 3, 5, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 10, 12, 14, 0, 2, 4, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif return __rv; } @@ -8044,8 +8315,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) vuzp_s16 (int16x4_t __a, int16x4_t __b) { int16x4x2_t __rv; - __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); - __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); +#endif return __rv; } @@ -8053,8 +8329,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) vuzp_s32 (int32x2_t __a, int32x2_t __b) { int32x2x2_t __rv; - __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -8062,8 +8343,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vuzp_f32 (float32x2_t __a, float32x2_t __b) { float32x2x2_t __rv; - __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -8071,8 +8357,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) vuzp_u8 (uint8x8_t __a, uint8x8_t __b) { uint8x8x2_t __rv; - __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); - __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 9, 11, 13, 15, 1, 3, 5, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 10, 12, 14, 0, 2, 4, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif return __rv; } @@ -8080,8 +8375,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) vuzp_u16 (uint16x4_t __a, uint16x4_t __b) { uint16x4x2_t __rv; - __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); - __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); +#endif return __rv; } @@ -8089,8 +8389,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) vuzp_u32 (uint32x2_t __a, uint32x2_t __b) { uint32x2x2_t __rv; - __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -8098,8 +8403,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) vuzp_p8 (poly8x8_t __a, poly8x8_t __b) { poly8x8x2_t __rv; - __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); - __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 9, 11, 13, 15, 1, 3, 5, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 10, 12, 14, 0, 2, 4, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif return __rv; } @@ -8107,8 +8421,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) vuzp_p16 (poly16x4_t __a, poly16x4_t __b) { poly16x4x2_t __rv; - __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); - __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); +#endif return __rv; } @@ -8116,8 +8435,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) vuzpq_s8 (int8x16_t __a, int8x16_t __b) { int8x16x2_t __rv; - __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); - __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); +#endif return __rv; } @@ -8125,8 +8453,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) vuzpq_s16 (int16x8_t __a, int16x8_t __b) { int16x8x2_t __rv; - __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); - __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 9, 11, 13, 15, 1, 3, 5, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 10, 12, 14, 0, 2, 4, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif return __rv; } @@ -8134,8 +8471,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) vuzpq_s32 (int32x4_t __a, int32x4_t __b) { int32x4x2_t __rv; - __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); - __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); +#endif return __rv; } @@ -8143,8 +8485,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vuzpq_f32 (float32x4_t __a, float32x4_t __b) { float32x4x2_t __rv; - __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); - __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); +#endif return __rv; } @@ -8152,8 +8499,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) vuzpq_u8 (uint8x16_t __a, uint8x16_t __b) { uint8x16x2_t __rv; - __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); - __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); +#endif return __rv; } @@ -8161,8 +8517,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) vuzpq_u16 (uint16x8_t __a, uint16x8_t __b) { uint16x8x2_t __rv; - __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); - __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 9, 11, 13, 15, 1, 3, 5, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 10, 12, 14, 0, 2, 4, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif return __rv; } @@ -8170,8 +8535,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) vuzpq_u32 (uint32x4_t __a, uint32x4_t __b) { uint32x4x2_t __rv; - __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); - __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); +#endif return __rv; } @@ -8179,8 +8549,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) vuzpq_p8 (poly8x16_t __a, poly8x16_t __b) { poly8x16x2_t __rv; - __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); - __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); +#endif return __rv; } @@ -8188,8 +8567,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) vuzpq_p16 (poly16x8_t __a, poly16x8_t __b) { poly16x8x2_t __rv; - __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); - __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 9, 11, 13, 15, 1, 3, 5, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 10, 12, 14, 0, 2, 4, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif return __rv; } diff --git a/gcc-4.9/gcc/config/arm/bpabi.h b/gcc-4.9/gcc/config/arm/bpabi.h index bc223f8e3..7a576ac46 100644 --- a/gcc-4.9/gcc/config/arm/bpabi.h +++ b/gcc-4.9/gcc/config/arm/bpabi.h @@ -14,8 +14,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* Use the AAPCS ABI by default. */ diff --git a/gcc-4.9/gcc/config/arm/elf.h b/gcc-4.9/gcc/config/arm/elf.h index 2edf520de..15a32fb8a 100644 --- a/gcc-4.9/gcc/config/arm/elf.h +++ b/gcc-4.9/gcc/config/arm/elf.h @@ -16,8 +16,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #ifndef OBJECT_FORMAT_ELF diff --git a/gcc-4.9/gcc/config/arm/linux-eabi.h b/gcc-4.9/gcc/config/arm/linux-eabi.h index 4d42cbfc8..350639f32 100644 --- a/gcc-4.9/gcc/config/arm/linux-eabi.h +++ b/gcc-4.9/gcc/config/arm/linux-eabi.h @@ -68,8 +68,9 @@ GLIBC_DYNAMIC_LINKER_DEFAULT and TARGET_DEFAULT_FLOAT_ABI. */ #undef GLIBC_DYNAMIC_LINKER -#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "/lib/ld-linux.so.3" -#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT "/lib/ld-linux-armhf.so.3" + +#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.3" +#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT RUNTIME_ROOT_PREFIX "/lib/ld-linux-armhf.so.3" #define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_SOFT_FLOAT #define GLIBC_DYNAMIC_LINKER \ diff --git a/gcc-4.9/gcc/config/arm/linux-elf.h b/gcc-4.9/gcc/config/arm/linux-elf.h index 5dc3328e8..e825ae48c 100644 --- a/gcc-4.9/gcc/config/arm/linux-elf.h +++ b/gcc-4.9/gcc/config/arm/linux-elf.h @@ -14,8 +14,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* elfos.h should have already been included. Now just override @@ -57,7 +62,7 @@ #define LIBGCC_SPEC "%{mfloat-abi=soft*:-lfloat} -lgcc" -#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" +#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2" #define LINUX_TARGET_LINK_SPEC "%{h*} \ %{static:-Bstatic} \ diff --git a/gcc-4.9/gcc/config/arm/linux-gas.h b/gcc-4.9/gcc/config/arm/linux-gas.h index 52a739c26..1dd043782 100644 --- a/gcc-4.9/gcc/config/arm/linux-gas.h +++ b/gcc-4.9/gcc/config/arm/linux-gas.h @@ -15,8 +15,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* This is how we tell the assembler that a symbol is weak. diff --git a/gcc-4.9/gcc/config/arm/linux-grte.h b/gcc-4.9/gcc/config/arm/linux-grte.h new file mode 100644 index 000000000..7ee5806b7 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/linux-grte.h @@ -0,0 +1,27 @@ +/* Definitions for ARM Linux-based GRTE (Google RunTime Environment). + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Chris Demetriou. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#undef SUBSUBTARGET_EXTRA_SPECS +#define SUBSUBTARGET_EXTRA_SPECS LINUX_GRTE_EXTRA_SPECS diff --git a/gcc-4.9/gcc/config/arm/neon-docgen.ml b/gcc-4.9/gcc/config/arm/neon-docgen.ml deleted file mode 100644 index 5788a533e..000000000 --- a/gcc-4.9/gcc/config/arm/neon-docgen.ml +++ /dev/null @@ -1,424 +0,0 @@ -(* ARM NEON documentation generator. - - Copyright (C) 2006-2014 Free Software Foundation, Inc. - Contributed by CodeSourcery. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 3, or (at your option) any later - version. - - GCC is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see - . - - This is an O'Caml program. The O'Caml compiler is available from: - - http://caml.inria.fr/ - - Or from your favourite OS's friendly packaging system. Tested with version - 3.09.2, though other versions will probably work too. - - Compile with: - ocamlc -c neon.ml - ocamlc -o neon-docgen neon.cmo neon-docgen.ml - - Run with: - /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi -*) - -open Neon - -(* The combined "ops" and "reinterp" table. *) -let ops_reinterp = reinterp @ ops - -(* Helper functions for extracting things from the "ops" table. *) -let single_opcode desired_opcode () = - List.fold_left (fun got_so_far -> - fun row -> - match row with - (opcode, _, _, _, _, _) -> - if opcode = desired_opcode then row :: got_so_far - else got_so_far - ) [] ops_reinterp - -let multiple_opcodes desired_opcodes () = - List.fold_left (fun got_so_far -> - fun desired_opcode -> - (single_opcode desired_opcode ()) @ got_so_far) - [] desired_opcodes - -let ldx_opcode number () = - List.fold_left (fun got_so_far -> - fun row -> - match row with - (opcode, _, _, _, _, _) -> - match opcode with - Vldx n | Vldx_lane n | Vldx_dup n when n = number -> - row :: got_so_far - | _ -> got_so_far - ) [] ops_reinterp - -let stx_opcode number () = - List.fold_left (fun got_so_far -> - fun row -> - match row with - (opcode, _, _, _, _, _) -> - match opcode with - Vstx n | Vstx_lane n when n = number -> - row :: got_so_far - | _ -> got_so_far - ) [] ops_reinterp - -let tbl_opcode () = - List.fold_left (fun got_so_far -> - fun row -> - match row with - (opcode, _, _, _, _, _) -> - match opcode with - Vtbl _ -> row :: got_so_far - | _ -> got_so_far - ) [] ops_reinterp - -let tbx_opcode () = - List.fold_left (fun got_so_far -> - fun row -> - match row with - (opcode, _, _, _, _, _) -> - match opcode with - Vtbx _ -> row :: got_so_far - | _ -> got_so_far - ) [] ops_reinterp - -(* The groups of intrinsics. *) -let intrinsic_groups = - [ "Addition", single_opcode Vadd; - "Multiplication", single_opcode Vmul; - "Multiply-accumulate", single_opcode Vmla; - "Multiply-subtract", single_opcode Vmls; - "Fused-multiply-accumulate", single_opcode Vfma; - "Fused-multiply-subtract", single_opcode Vfms; - "Round to integral (to nearest, ties to even)", single_opcode Vrintn; - "Round to integral (to nearest, ties away from zero)", single_opcode Vrinta; - "Round to integral (towards +Inf)", single_opcode Vrintp; - "Round to integral (towards -Inf)", single_opcode Vrintm; - "Round to integral (towards 0)", single_opcode Vrintz; - "Subtraction", single_opcode Vsub; - "Comparison (equal-to)", single_opcode Vceq; - "Comparison (greater-than-or-equal-to)", single_opcode Vcge; - "Comparison (less-than-or-equal-to)", single_opcode Vcle; - "Comparison (greater-than)", single_opcode Vcgt; - "Comparison (less-than)", single_opcode Vclt; - "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage; - "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale; - "Comparison (absolute greater-than)", single_opcode Vcagt; - "Comparison (absolute less-than)", single_opcode Vcalt; - "Test bits", single_opcode Vtst; - "Absolute difference", single_opcode Vabd; - "Absolute difference and accumulate", single_opcode Vaba; - "Maximum", single_opcode Vmax; - "Minimum", single_opcode Vmin; - "Pairwise add", single_opcode Vpadd; - "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada; - "Folding maximum", single_opcode Vpmax; - "Folding minimum", single_opcode Vpmin; - "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts]; - "Vector shift left", single_opcode Vshl; - "Vector shift left by constant", single_opcode Vshl_n; - "Vector shift right by constant", single_opcode Vshr_n; - "Vector shift right by constant and accumulate", single_opcode Vsra_n; - "Vector shift right and insert", single_opcode Vsri; - "Vector shift left and insert", single_opcode Vsli; - "Absolute value", single_opcode Vabs; - "Negation", single_opcode Vneg; - "Bitwise not", single_opcode Vmvn; - "Count leading sign bits", single_opcode Vcls; - "Count leading zeros", single_opcode Vclz; - "Count number of set bits", single_opcode Vcnt; - "Reciprocal estimate", single_opcode Vrecpe; - "Reciprocal square-root estimate", single_opcode Vrsqrte; - "Get lanes from a vector", single_opcode Vget_lane; - "Set lanes in a vector", single_opcode Vset_lane; - "Create vector from literal bit pattern", single_opcode Vcreate; - "Set all lanes to the same value", - multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane]; - "Combining vectors", single_opcode Vcombine; - "Splitting vectors", multiple_opcodes [Vget_high; Vget_low]; - "Conversions", multiple_opcodes [Vcvt; Vcvt_n]; - "Move, single_opcode narrowing", single_opcode Vmovn; - "Move, single_opcode long", single_opcode Vmovl; - "Table lookup", tbl_opcode; - "Extended table lookup", tbx_opcode; - "Multiply, lane", single_opcode Vmul_lane; - "Long multiply, lane", single_opcode Vmull_lane; - "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane; - "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane; - "Multiply-accumulate, lane", single_opcode Vmla_lane; - "Multiply-subtract, lane", single_opcode Vmls_lane; - "Vector multiply by scalar", single_opcode Vmul_n; - "Vector long multiply by scalar", single_opcode Vmull_n; - "Vector saturating doubling long multiply by scalar", - single_opcode Vqdmull_n; - "Vector saturating doubling multiply high by scalar", - single_opcode Vqdmulh_n; - "Vector multiply-accumulate by scalar", single_opcode Vmla_n; - "Vector multiply-subtract by scalar", single_opcode Vmls_n; - "Vector extract", single_opcode Vext; - "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16]; - "Bit selection", single_opcode Vbsl; - "Transpose elements", single_opcode Vtrn; - "Zip elements", single_opcode Vzip; - "Unzip elements", single_opcode Vuzp; - "Element/structure loads, VLD1 variants", ldx_opcode 1; - "Element/structure stores, VST1 variants", stx_opcode 1; - "Element/structure loads, VLD2 variants", ldx_opcode 2; - "Element/structure stores, VST2 variants", stx_opcode 2; - "Element/structure loads, VLD3 variants", ldx_opcode 3; - "Element/structure stores, VST3 variants", stx_opcode 3; - "Element/structure loads, VLD4 variants", ldx_opcode 4; - "Element/structure stores, VST4 variants", stx_opcode 4; - "Logical operations (AND)", single_opcode Vand; - "Logical operations (OR)", single_opcode Vorr; - "Logical operations (exclusive OR)", single_opcode Veor; - "Logical operations (AND-NOT)", single_opcode Vbic; - "Logical operations (OR-NOT)", single_opcode Vorn; - "Reinterpret casts", single_opcode Vreinterp ] - -(* Given an intrinsic shape, produce a string to document the corresponding - operand shapes. *) -let rec analyze_shape shape = - let rec n_things n thing = - match n with - 0 -> [] - | n -> thing :: (n_things (n - 1) thing) - in - let rec analyze_shape_elt reg_no elt = - match elt with - Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}" - | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}" - | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}" - | Immed -> "#@var{0}" - | VecArray (1, elt) -> - let elt_regexp = analyze_shape_elt 0 elt in - "@{" ^ elt_regexp ^ "@}" - | VecArray (n, elt) -> - let rec f m = - match m with - 0 -> [] - | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1)) - in - let ops = List.rev (f n) in - "@{" ^ (commas (fun x -> x) ops "") ^ "@}" - | (PtrTo elt | CstPtrTo elt) -> - "[" ^ (analyze_shape_elt reg_no elt) ^ "]" - | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]" - | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]" - | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]" - | Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts)) - in - match shape with - All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) "" - | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^ - ", " ^ (analyze_shape_elt 0 Dreg) - | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^ - (analyze_shape_elt 0 elt) - | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^ - ", " ^ (analyze_shape_elt 0 Dreg) - | Wide_noreg elt -> analyze_shape (Long_noreg elt) - | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^ - ", " ^ (analyze_shape_elt 0 Qreg) - | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) "" - | By_scalar Dreg -> - analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |]) - | By_scalar Qreg -> - analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |]) - | By_scalar _ -> assert false - | Wide_lane -> - analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) - | Wide_scalar -> - analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) - | Pair_result elt -> - let elt_regexp = analyze_shape_elt 0 elt in - let elt_regexp' = analyze_shape_elt 1 elt in - elt_regexp ^ ", " ^ elt_regexp' - | Unary_scalar _ -> "FIXME Unary_scalar" - | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |]) - | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |]) - | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |]) - -(* Document a single intrinsic. *) -let describe_intrinsic first chan - (elt_ty, (_, features, shape, name, munge, _)) = - let c_arity, new_elt_ty = munge shape elt_ty in - let c_types = strings_of_arity c_arity in - Printf.fprintf chan "@itemize @bullet\n"; - let item_code = if first then "@item" else "@itemx" in - Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types) - (intrinsic_name name) (string_of_elt elt_ty); - Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) ""); - if not (List.exists (fun feature -> feature = No_op) features) then - begin - let print_one_insn name = - Printf.fprintf chan "@code{"; - let no_suffix = (new_elt_ty = NoElts) in - let name_with_suffix = - if no_suffix then name - else name ^ "." ^ (string_of_elt_dots new_elt_ty) - in - let possible_operands = analyze_all_shapes features shape - analyze_shape - in - let rec print_one_possible_operand op = - Printf.fprintf chan "%s %s}" name_with_suffix op - in - (* If the intrinsic expands to multiple instructions, we assume - they are all of the same form. *) - print_one_possible_operand (List.hd possible_operands) - in - let rec print_insns names = - match names with - [] -> () - | [name] -> print_one_insn name - | name::names -> (print_one_insn name; - Printf.fprintf chan " @emph{or} "; - print_insns names) - in - let insn_names = get_insn_names features name in - Printf.fprintf chan "@*@emph{Form of expected instruction(s):} "; - print_insns insn_names; - Printf.fprintf chan "\n" - end; - Printf.fprintf chan "@end itemize\n"; - Printf.fprintf chan "\n\n" - -(* Document a group of intrinsics. *) -let document_group chan (group_title, group_extractor) = - (* Extract the rows in question from the ops table and then turn them - into a list of intrinsics. *) - let intrinsics = - List.fold_left (fun got_so_far -> - fun row -> - match row with - (_, _, _, _, _, elt_tys) -> - List.fold_left (fun got_so_far' -> - fun elt_ty -> - (elt_ty, row) :: got_so_far') - got_so_far elt_tys - ) [] (group_extractor ()) - in - (* Emit the title for this group. *) - Printf.fprintf chan "@subsubsection %s\n\n" group_title; - (* Emit a description of each intrinsic. *) - List.iter (describe_intrinsic true chan) intrinsics; - (* Close this group. *) - Printf.fprintf chan "\n\n" - -let gnu_header chan = - List.iter (fun s -> Printf.fprintf chan "%s\n" s) [ - "@c Copyright (C) 2006-2014 Free Software Foundation, Inc."; - "@c This is part of the GCC manual."; - "@c For copying conditions, see the file gcc.texi."; - ""; - "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml"; - "@c Please do not edit manually."] - -let crypto_doc = -" -@itemize @bullet -@item poly128_t vldrq_p128(poly128_t const *) -@end itemize - -@itemize @bullet -@item void vstrq_p128(poly128_t *, poly128_t) -@end itemize - -@itemize @bullet -@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t) -@end itemize - -@itemize @bullet -@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t) -@end itemize - -@itemize @bullet -@item uint32_t vsha1h_u32 (uint32_t) -@*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha1cq_u32 (uint32x4_t, uint32_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha1c.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha1pq_u32 (uint32x4_t, uint32_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha1p.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha1mq_u32 (uint32x4_t, uint32_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha1m.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha1su0q_u32 (uint32x4_t, uint32x4_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha1su0.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha1su1q_u32 (uint32x4_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha1su1.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha256hq_u32 (uint32x4_t, uint32x4_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha256h.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha256h2q_u32 (uint32x4_t, uint32x4_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha256h2.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha256su0q_u32 (uint32x4_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha256su0.32 @var{q0}, @var{q1}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha256su1q_u32 (uint32x4_t, uint32x4_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha256su1.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item poly128_t vmull_p64 (poly64_t a, poly64_t b) -@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}} -@end itemize - -@itemize @bullet -@item poly128_t vmull_high_p64 (poly64x2_t a, poly64x2_t b) -@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}} -@end itemize -" - -(* Program entry point. *) -let _ = - if Array.length Sys.argv <> 2 then - failwith "Usage: neon-docgen " - else - let file = Sys.argv.(1) in - try - let chan = open_out file in - gnu_header chan; - List.iter (document_group chan) intrinsic_groups; - Printf.fprintf chan "%s\n" crypto_doc; - close_out chan - with Sys_error sys -> - failwith ("Could not create output file " ^ file ^ ": " ^ sys) diff --git a/gcc-4.9/gcc/config/arm/neon-gen.ml b/gcc-4.9/gcc/config/arm/neon-gen.ml deleted file mode 100644 index f3dd86b0a..000000000 --- a/gcc-4.9/gcc/config/arm/neon-gen.ml +++ /dev/null @@ -1,520 +0,0 @@ -(* Auto-generate ARM Neon intrinsics header file. - Copyright (C) 2006-2014 Free Software Foundation, Inc. - Contributed by CodeSourcery. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 3, or (at your option) any later - version. - - GCC is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see - . - - This is an O'Caml program. The O'Caml compiler is available from: - - http://caml.inria.fr/ - - Or from your favourite OS's friendly packaging system. Tested with version - 3.09.2, though other versions will probably work too. - - Compile with: - ocamlc -c neon.ml - ocamlc -o neon-gen neon.cmo neon-gen.ml - - Run with: - ./neon-gen > arm_neon.h -*) - -open Neon - -(* The format codes used in the following functions are documented at: - http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\ - #6_printflikefunctionsforprettyprinting - (one line, remove the backslash.) -*) - -(* Following functions can be used to approximate GNU indentation style. *) -let start_function () = - Format.printf "@["; - ref 0 - -let end_function nesting = - match !nesting with - 0 -> Format.printf "@;@;@]" - | _ -> failwith ("Bad nesting (ending function at level " - ^ (string_of_int !nesting) ^ ")") - -let open_braceblock nesting = - begin match !nesting with - 0 -> Format.printf "@,@<0>{@[@," - | _ -> Format.printf "@,@[ @<0>{@[@," - end; - incr nesting - -let close_braceblock nesting = - decr nesting; - match !nesting with - 0 -> Format.printf "@]@,@<0>}" - | _ -> Format.printf "@]@,@<0>}@]" - -let print_function arity fnname body = - let ffmt = start_function () in - Format.printf "__extension__ static __inline "; - let inl = "__attribute__ ((__always_inline__))" in - begin match arity with - Arity0 ret -> - Format.printf "%s %s@,%s (void)" (string_of_vectype ret) inl fnname - | Arity1 (ret, arg0) -> - Format.printf "%s %s@,%s (%s __a)" (string_of_vectype ret) inl fnname - (string_of_vectype arg0) - | Arity2 (ret, arg0, arg1) -> - Format.printf "%s %s@,%s (%s __a, %s __b)" - (string_of_vectype ret) inl fnname (string_of_vectype arg0) - (string_of_vectype arg1) - | Arity3 (ret, arg0, arg1, arg2) -> - Format.printf "%s %s@,%s (%s __a, %s __b, %s __c)" - (string_of_vectype ret) inl fnname (string_of_vectype arg0) - (string_of_vectype arg1) (string_of_vectype arg2) - | Arity4 (ret, arg0, arg1, arg2, arg3) -> - Format.printf "%s %s@,%s (%s __a, %s __b, %s __c, %s __d)" - (string_of_vectype ret) inl fnname (string_of_vectype arg0) - (string_of_vectype arg1) (string_of_vectype arg2) - (string_of_vectype arg3) - end; - open_braceblock ffmt; - let rec print_lines = function - [] -> () - | "" :: lines -> print_lines lines - | [line] -> Format.printf "%s" line - | line::lines -> Format.printf "%s@," line ; print_lines lines in - print_lines body; - close_braceblock ffmt; - end_function ffmt - -let union_string num elts base = - let itype = inttype_for_array num elts in - let iname = string_of_inttype itype - and sname = string_of_vectype (T_arrayof (num, elts)) in - Printf.sprintf "union { %s __i; %s __o; } %s" sname iname base - -let rec signed_ctype = function - T_uint8x8 | T_poly8x8 -> T_int8x8 - | T_uint8x16 | T_poly8x16 -> T_int8x16 - | T_uint16x4 | T_poly16x4 -> T_int16x4 - | T_uint16x8 | T_poly16x8 -> T_int16x8 - | T_uint32x2 -> T_int32x2 - | T_uint32x4 -> T_int32x4 - | T_uint64x1 -> T_int64x1 - | T_uint64x2 -> T_int64x2 - | T_poly64x2 -> T_int64x2 - (* Cast to types defined by mode in arm.c, not random types pulled in from - the header in use. This fixes incompatible pointer errors when - compiling with C++. *) - | T_uint8 | T_int8 -> T_intQI - | T_uint16 | T_int16 -> T_intHI - | T_uint32 | T_int32 -> T_intSI - | T_uint64 | T_int64 -> T_intDI - | T_float16 -> T_floatHF - | T_float32 -> T_floatSF - | T_poly8 -> T_intQI - | T_poly16 -> T_intHI - | T_poly64 -> T_intDI - | T_poly128 -> T_intTI - | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt) - | T_ptrto elt -> T_ptrto (signed_ctype elt) - | T_const elt -> T_const (signed_ctype elt) - | x -> x - -let add_cast ctype cval = - let stype = signed_ctype ctype in - if ctype <> stype then - Printf.sprintf "(%s) %s" (string_of_vectype stype) cval - else - cval - -let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")" - -(* Return a tuple of a list of declarations to go at the start of the function, - and a list of statements needed to return THING. *) -let return arity thing = - match arity with - Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _) - | Arity4 (ret, _, _, _, _) -> - begin match ret with - T_arrayof (num, vec) -> - let uname = union_string num vec "__rv" in - [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"] - | T_void -> - [], [thing ^ ";"] - | _ -> - [], ["return " ^ (cast_for_return ret) ^ thing ^ ";"] - end - -let mask_shape_for_shuffle = function - All (num, reg) -> All (num, reg) - | Pair_result reg -> All (2, reg) - | _ -> failwith "mask_for_shuffle" - -let mask_elems shuffle shape elttype part = - let elem_size = elt_width elttype in - let num_elems = - match regmap shape 0 with - Dreg -> 64 / elem_size - | Qreg -> 128 / elem_size - | _ -> failwith "mask_elems" in - shuffle elem_size num_elems part - -(* Return a tuple of a list of declarations 0and a list of statements needed - to implement an intrinsic using __builtin_shuffle. SHUFFLE is a function - which returns a list of elements suitable for using as a mask. *) - -let shuffle_fn shuffle shape arity elttype = - let mshape = mask_shape_for_shuffle shape in - let masktype = type_for_elt mshape (unsigned_of_elt elttype) 0 in - let masktype_str = string_of_vectype masktype in - let shuffle_res = type_for_elt mshape elttype 0 in - let shuffle_res_str = string_of_vectype shuffle_res in - match arity with - Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _) - | Arity4 (ret, _, _, _, _) -> - begin match ret with - T_arrayof (num, vec) -> - let elems1 = mask_elems shuffle mshape elttype `lo - and elems2 = mask_elems shuffle mshape elttype `hi in - let mask1 = (String.concat ", " (List.map string_of_int elems1)) - and mask2 = (String.concat ", " (List.map string_of_int elems2)) in - let shuf1 = Printf.sprintf - "__rv.val[0] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });" - shuffle_res_str masktype_str mask1 - and shuf2 = Printf.sprintf - "__rv.val[1] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });" - shuffle_res_str masktype_str mask2 in - [Printf.sprintf "%s __rv;" (string_of_vectype ret);], - [shuf1; shuf2; "return __rv;"] - | _ -> - let elems = mask_elems shuffle mshape elttype `lo in - let mask = (String.concat ", " (List.map string_of_int elems)) in - let shuf = Printf.sprintf - "return (%s) __builtin_shuffle (__a, (%s) { %s });" shuffle_res_str masktype_str mask in - [""], - [shuf] - end - -let rec element_type ctype = - match ctype with - T_arrayof (_, v) -> element_type v - | _ -> ctype - -let params ps = - let pdecls = ref [] in - let ptype t p = - match t with - T_arrayof (num, elts) -> - let uname = union_string num elts (p ^ "u") in - let decl = Printf.sprintf "%s = { %s };" uname p in - pdecls := decl :: !pdecls; - p ^ "u.__o" - | _ -> add_cast t p in - let plist = match ps with - Arity0 _ -> [] - | Arity1 (_, t1) -> [ptype t1 "__a"] - | Arity2 (_, t1, t2) -> [ptype t1 "__a"; ptype t2 "__b"] - | Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"] - | Arity4 (_, t1, t2, t3, t4) -> - [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in - !pdecls, plist - -let modify_params features plist = - let is_flipped = - List.exists (function Flipped _ -> true | _ -> false) features in - if is_flipped then - match plist with - [ a; b ] -> [ b; a ] - | _ -> - failwith ("Don't know how to flip args " ^ (String.concat ", " plist)) - else - plist - -(* !!! Decide whether to add an extra information word based on the shape - form. *) -let extra_word shape features paramlist bits = - let use_word = - match shape with - All _ | Long | Long_noreg _ | Wide | Wide_noreg _ | Narrow - | By_scalar _ | Wide_scalar | Wide_lane | Binary_imm _ | Long_imm - | Narrow_imm -> true - | _ -> List.mem InfoWord features - in - if use_word then - paramlist @ [string_of_int bits] - else - paramlist - -(* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0). - Bit 1 represents floats & polynomials (1), or ordinary integers (0). - Bit 2 represents rounding (1) vs none (0). *) -let infoword_value elttype features = - let bits01 = - match elt_class elttype with - Signed | ConvClass (Signed, _) | ConvClass (_, Signed) -> 0b001 - | Poly -> 0b010 - | Float -> 0b011 - | _ -> 0b000 - and rounding_bit = if List.mem Rounding features then 0b100 else 0b000 in - bits01 lor rounding_bit - -(* "Cast" type operations will throw an exception in mode_of_elt (actually in - elt_width, called from there). Deal with that here, and generate a suffix - with multiple modes (). *) -let rec mode_suffix elttype shape = - try - let mode = mode_of_elt elttype shape in - string_of_mode mode - with MixedMode (dst, src) -> - let dstmode = mode_of_elt ~argpos:0 dst shape - and srcmode = mode_of_elt ~argpos:1 src shape in - string_of_mode dstmode ^ string_of_mode srcmode - -let get_shuffle features = - try - match List.find (function Use_shuffle _ -> true | _ -> false) features with - Use_shuffle fn -> Some fn - | _ -> None - with Not_found -> None - -let print_feature_test_start features = - try - match List.find (fun feature -> - match feature with Requires_feature _ -> true - | Requires_arch _ -> true - | Requires_FP_bit _ -> true - | _ -> false) - features with - Requires_feature feature -> - Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature - | Requires_arch arch -> - Format.printf "#if __ARM_ARCH >= %d@\n" arch - | Requires_FP_bit bit -> - Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n" - (1 lsl bit) - | _ -> assert false - with Not_found -> assert true - -let print_feature_test_end features = - let feature = - List.exists (function Requires_feature _ -> true - | Requires_arch _ -> true - | Requires_FP_bit _ -> true - | _ -> false) features in - if feature then Format.printf "#endif@\n" - - -let print_variant opcode features shape name (ctype, asmtype, elttype) = - let bits = infoword_value elttype features in - let modesuf = mode_suffix elttype shape in - let pdecls, paramlist = params ctype in - let rdecls, stmts = - match get_shuffle features with - Some shuffle -> shuffle_fn shuffle shape ctype elttype - | None -> - let paramlist' = modify_params features paramlist in - let paramlist'' = extra_word shape features paramlist' bits in - let parstr = String.concat ", " paramlist'' in - let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)" - (builtin_name features name) modesuf parstr in - return ctype builtin in - let body = pdecls @ rdecls @ stmts - and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in - begin - print_feature_test_start features; - print_function ctype fnname body; - print_feature_test_end features; - end - -(* When this function processes the element types in the ops table, it rewrites - them in a list of tuples (a,b,c): - a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8) - b : Asm type : a single, processed element type, e.g. P16. This is the - type which should be attached to the asm opcode. - c : Variant type : the unprocessed type for this variant (e.g. in add - instructions which don't care about the sign, b might be i16 and c - might be s16.) -*) - -let print_op (opcode, features, shape, name, munge, types) = - let sorted_types = List.sort compare types in - let munged_types = List.map - (fun elt -> let c, asm = munge shape elt in c, asm, elt) sorted_types in - List.iter - (fun variant -> print_variant opcode features shape name variant) - munged_types - -let print_ops ops = - List.iter print_op ops - -(* Output type definitions. Table entries are: - cbase : "C" name for the type. - abase : "ARM" base name for the type (i.e. int in int8x8_t). - esize : element size. - enum : element count. - alevel: architecture level at which available. -*) - -type fpulevel = CRYPTO | ALL - -let deftypes () = - let typeinfo = [ - (* Doubleword vector types. *) - "__builtin_neon_qi", "int", 8, 8, ALL; - "__builtin_neon_hi", "int", 16, 4, ALL; - "__builtin_neon_si", "int", 32, 2, ALL; - "__builtin_neon_di", "int", 64, 1, ALL; - "__builtin_neon_hf", "float", 16, 4, ALL; - "__builtin_neon_sf", "float", 32, 2, ALL; - "__builtin_neon_poly8", "poly", 8, 8, ALL; - "__builtin_neon_poly16", "poly", 16, 4, ALL; - "__builtin_neon_poly64", "poly", 64, 1, CRYPTO; - "__builtin_neon_uqi", "uint", 8, 8, ALL; - "__builtin_neon_uhi", "uint", 16, 4, ALL; - "__builtin_neon_usi", "uint", 32, 2, ALL; - "__builtin_neon_udi", "uint", 64, 1, ALL; - - (* Quadword vector types. *) - "__builtin_neon_qi", "int", 8, 16, ALL; - "__builtin_neon_hi", "int", 16, 8, ALL; - "__builtin_neon_si", "int", 32, 4, ALL; - "__builtin_neon_di", "int", 64, 2, ALL; - "__builtin_neon_sf", "float", 32, 4, ALL; - "__builtin_neon_poly8", "poly", 8, 16, ALL; - "__builtin_neon_poly16", "poly", 16, 8, ALL; - "__builtin_neon_poly64", "poly", 64, 2, CRYPTO; - "__builtin_neon_uqi", "uint", 8, 16, ALL; - "__builtin_neon_uhi", "uint", 16, 8, ALL; - "__builtin_neon_usi", "uint", 32, 4, ALL; - "__builtin_neon_udi", "uint", 64, 2, ALL - ] in - List.iter - (fun (cbase, abase, esize, enum, fpulevel) -> - let attr = - match enum with - 1 -> "" - | _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))" - (esize * enum / 8) in - if fpulevel == CRYPTO then - Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n"; - Format.printf "typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum attr; - if fpulevel == CRYPTO then - Format.printf "#endif\n";) - typeinfo; - Format.print_newline (); - (* Extra types not in . *) - Format.printf "typedef float float32_t;\n"; - Format.printf "typedef __builtin_neon_poly8 poly8_t;\n"; - Format.printf "typedef __builtin_neon_poly16 poly16_t;\n"; - Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n"; - Format.printf "typedef __builtin_neon_poly64 poly64_t;\n"; - Format.printf "typedef __builtin_neon_poly128 poly128_t;\n"; - Format.printf "#endif\n" - -(* Output structs containing arrays, for load & store instructions etc. - poly128_t is deliberately not included here because it has no array types - defined for it. *) - -let arrtypes () = - let typeinfo = [ - "int", 8, ALL; "int", 16, ALL; - "int", 32, ALL; "int", 64, ALL; - "uint", 8, ALL; "uint", 16, ALL; - "uint", 32, ALL; "uint", 64, ALL; - "float", 32, ALL; "poly", 8, ALL; - "poly", 16, ALL; "poly", 64, CRYPTO - ] in - let writestruct elname elsize regsize arrsize fpulevel = - let elnum = regsize / elsize in - let structname = - Printf.sprintf "%s%dx%dx%d_t" elname elsize elnum arrsize in - let sfmt = start_function () in - Format.printf "%stypedef struct %s" - (if fpulevel == CRYPTO then "#ifdef __ARM_FEATURE_CRYPTO\n" else "") structname; - open_braceblock sfmt; - Format.printf "%s%dx%d_t val[%d];" elname elsize elnum arrsize; - close_braceblock sfmt; - Format.printf " %s;%s" structname (if fpulevel == CRYPTO then "\n#endif\n" else ""); - end_function sfmt; - in - for n = 2 to 4 do - List.iter - (fun (elname, elsize, alevel) -> - writestruct elname elsize 64 n alevel; - writestruct elname elsize 128 n alevel) - typeinfo - done - -let print_lines = List.iter (fun s -> Format.printf "%s@\n" s) - -(* Do it. *) - -let _ = - print_lines [ -"/* ARM NEON intrinsics include file. This file is generated automatically"; -" using neon-gen.ml. Please do not edit manually."; -""; -" Copyright (C) 2006-2014 Free Software Foundation, Inc."; -" Contributed by CodeSourcery."; -""; -" This file is part of GCC."; -""; -" GCC is free software; you can redistribute it and/or modify it"; -" under the terms of the GNU General Public License as published"; -" by the Free Software Foundation; either version 3, or (at your"; -" option) any later version."; -""; -" GCC is distributed in the hope that it will be useful, but WITHOUT"; -" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY"; -" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public"; -" License for more details."; -""; -" Under Section 7 of GPL version 3, you are granted additional"; -" permissions described in the GCC Runtime Library Exception, version"; -" 3.1, as published by the Free Software Foundation."; -""; -" You should have received a copy of the GNU General Public License and"; -" a copy of the GCC Runtime Library Exception along with this program;"; -" see the files COPYING3 and COPYING.RUNTIME respectively. If not, see"; -" . */"; -""; -"#ifndef _GCC_ARM_NEON_H"; -"#define _GCC_ARM_NEON_H 1"; -""; -"#ifndef __ARM_NEON__"; -"#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h"; -"#else"; -""; -"#ifdef __cplusplus"; -"extern \"C\" {"; -"#endif"; -""; -"#include "; -""]; - deftypes (); - arrtypes (); - Format.print_newline (); - print_ops ops; - Format.print_newline (); - print_ops reinterp; - print_ops reinterpq; - Format.printf "%s" crypto_intrinsics; - print_lines [ -"#ifdef __cplusplus"; -"}"; -"#endif"; -"#endif"; -"#endif"] diff --git a/gcc-4.9/gcc/config/arm/netbsd-elf.h b/gcc-4.9/gcc/config/arm/netbsd-elf.h index 9deda9679..645551fdf 100644 --- a/gcc-4.9/gcc/config/arm/netbsd-elf.h +++ b/gcc-4.9/gcc/config/arm/netbsd-elf.h @@ -14,8 +14,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* Run-time Target Specification. */ diff --git a/gcc-4.9/gcc/config/arm/uclinux-eabi.h b/gcc-4.9/gcc/config/arm/uclinux-eabi.h index b5055ce40..4d57d1388 100644 --- a/gcc-4.9/gcc/config/arm/uclinux-eabi.h +++ b/gcc-4.9/gcc/config/arm/uclinux-eabi.h @@ -14,8 +14,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* Override settings that are different to the uclinux-elf or diff --git a/gcc-4.9/gcc/config/arm/uclinux-elf.h b/gcc-4.9/gcc/config/arm/uclinux-elf.h index 5cd4fe527..43edba70c 100644 --- a/gcc-4.9/gcc/config/arm/uclinux-elf.h +++ b/gcc-4.9/gcc/config/arm/uclinux-elf.h @@ -14,8 +14,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* We don't want a PLT. */ diff --git a/gcc-4.9/gcc/config/arm/unspecs.md b/gcc-4.9/gcc/config/arm/unspecs.md index 87edf18e2..db3fea5f5 100644 --- a/gcc-4.9/gcc/config/arm/unspecs.md +++ b/gcc-4.9/gcc/config/arm/unspecs.md @@ -58,6 +58,7 @@ ; instruction stream. UNSPEC_PIC_OFFSET ; A symbolic 12-bit OFFSET that has been treated ; correctly for PIC usage. + UNSPEC_GOT_PREL_SYM ; Specify an R_ARM_GOT_PREL relocation of a symbol. UNSPEC_GOTSYM_OFF ; The offset of the start of the GOT from a ; a given symbolic address. UNSPEC_THUMB1_CASESI ; A Thumb1 compressed dispatch-table call. @@ -70,6 +71,11 @@ ; that. UNSPEC_UNALIGNED_STORE ; Same for str/strh. UNSPEC_PIC_UNIFIED ; Create a common pic addressing form. + UNSPEC_PROLOGUE_USE ; As USE insns are not meaningful after reload, + ; this unspec is used to prevent the deletion of + ; instructions setting registers for EH handling + ; and stack frame generation. Operand 0 is the + ; register to "use". UNSPEC_LL ; Represent an unpaired load-register-exclusive. UNSPEC_VRINTZ ; Represent a float to integral float rounding ; towards zero. @@ -83,11 +89,12 @@ ; FPSCR rounding mode and signal inexactness. UNSPEC_VRINTA ; Represent a float to integral float rounding ; towards nearest, ties away from zero. - UNSPEC_GOT_PREL_SYM ; Specify an R_ARM_GOT_PREL relocation of a symbol ]) (define_c_enum "unspec" [ UNSPEC_WADDC ; Used by the intrinsic form of the iWMMXt WADDC instruction. + UNSPEC_WMADDS ; Used by the intrinsic form of the iWMMXt WMADDS instruction. + UNSPEC_WMADDU ; Used by the intrinsic form of the iWMMXt WMADDU instruction. UNSPEC_WABS ; Used by the intrinsic form of the iWMMXt WABS instruction. UNSPEC_WQMULWMR ; Used by the intrinsic form of the iWMMXt WQMULWMR instruction. UNSPEC_WQMULMR ; Used by the intrinsic form of the iWMMXt WQMULMR instruction. diff --git a/gcc-4.9/gcc/config/arm/vxworks.h b/gcc-4.9/gcc/config/arm/vxworks.h index 8bef16bc4..faf84724d 100644 --- a/gcc-4.9/gcc/config/arm/vxworks.h +++ b/gcc-4.9/gcc/config/arm/vxworks.h @@ -17,8 +17,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ diff --git a/gcc-4.9/gcc/config/avr/avr-fixed.md b/gcc-4.9/gcc/config/avr/avr-fixed.md index 1652415b1..9c8489edd 100644 --- a/gcc-4.9/gcc/config/avr/avr-fixed.md +++ b/gcc-4.9/gcc/config/avr/avr-fixed.md @@ -430,8 +430,8 @@ } // Input and output of the libgcc function - const unsigned int regno_in[] = { -1, 22, 22, -1, 18 }; - const unsigned int regno_out[] = { -1, 24, 24, -1, 22 }; + const unsigned int regno_in[] = { -1U, 22, 22, -1U, 18 }; + const unsigned int regno_out[] = { -1U, 24, 24, -1U, 22 }; operands[3] = gen_rtx_REG (mode, regno_out[(size_t) GET_MODE_SIZE (mode)]); operands[4] = gen_rtx_REG (mode, regno_in[(size_t) GET_MODE_SIZE (mode)]); diff --git a/gcc-4.9/gcc/config/avr/avr.h b/gcc-4.9/gcc/config/avr/avr.h index 78434ec5e..9d34983e4 100644 --- a/gcc-4.9/gcc/config/avr/avr.h +++ b/gcc-4.9/gcc/config/avr/avr.h @@ -251,18 +251,18 @@ enum reg_class { #define REG_CLASS_CONTENTS { \ {0x00000000,0x00000000}, /* NO_REGS */ \ {0x00000001,0x00000000}, /* R0_REG */ \ - {3 << REG_X,0x00000000}, /* POINTER_X_REGS, r26 - r27 */ \ - {3 << REG_Y,0x00000000}, /* POINTER_Y_REGS, r28 - r29 */ \ - {3 << REG_Z,0x00000000}, /* POINTER_Z_REGS, r30 - r31 */ \ + {3u << REG_X,0x00000000}, /* POINTER_X_REGS, r26 - r27 */ \ + {3u << REG_Y,0x00000000}, /* POINTER_Y_REGS, r28 - r29 */ \ + {3u << REG_Z,0x00000000}, /* POINTER_Z_REGS, r30 - r31 */ \ {0x00000000,0x00000003}, /* STACK_REG, STACK */ \ - {(3 << REG_Y) | (3 << REG_Z), \ + {(3u << REG_Y) | (3u << REG_Z), \ 0x00000000}, /* BASE_POINTER_REGS, r28 - r31 */ \ - {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z), \ + {(3u << REG_X) | (3u << REG_Y) | (3u << REG_Z), \ 0x00000000}, /* POINTER_REGS, r26 - r31 */ \ - {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z) | (3 << REG_W), \ + {(3u << REG_X) | (3u << REG_Y) | (3u << REG_Z) | (3u << REG_W), \ 0x00000000}, /* ADDW_REGS, r24 - r31 */ \ {0x00ff0000,0x00000000}, /* SIMPLE_LD_REGS r16 - r23 */ \ - {(3 << REG_X)|(3 << REG_Y)|(3 << REG_Z)|(3 << REG_W)|(0xff << 16), \ + {(3u << REG_X)|(3u << REG_Y)|(3u << REG_Z)|(3u << REG_W)|(0xffu << 16),\ 0x00000000}, /* LD_REGS, r16 - r31 */ \ {0x0000ffff,0x00000000}, /* NO_LD_REGS r0 - r15 */ \ {0xffffffff,0x00000000}, /* GENERAL_REGS, r0 - r31 */ \ diff --git a/gcc-4.9/gcc/config/avr/avr.md b/gcc-4.9/gcc/config/avr/avr.md index 2c59bf3f9..3bb2a914a 100644 --- a/gcc-4.9/gcc/config/avr/avr.md +++ b/gcc-4.9/gcc/config/avr/avr.md @@ -368,6 +368,15 @@ "" { int i; + + // Avoid (subreg (mem)) for non-generic address spaces below. Because + // of the poor addressing capabilities of these spaces it's better to + // load them in one chunk. And it avoids PR61443. + + if (MEM_P (operands[0]) + && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[0]))) + operands[0] = copy_to_mode_reg (mode, operands[0]); + for (i = GET_MODE_SIZE (mode) - 1; i >= 0; --i) { rtx part = simplify_gen_subreg (QImode, operands[0], mode, i); diff --git a/gcc-4.9/gcc/config/dbx.h b/gcc-4.9/gcc/config/dbx.h index 1b68bcd9a..8173bb06c 100644 --- a/gcc-4.9/gcc/config/dbx.h +++ b/gcc-4.9/gcc/config/dbx.h @@ -13,8 +13,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* This file causes gcc to prefer using DBX (stabs) debugging diff --git a/gcc-4.9/gcc/config/i386/driver-i386.c b/gcc-4.9/gcc/config/i386/driver-i386.c index 1f5a11c9c..80f6a0879 100644 --- a/gcc-4.9/gcc/config/i386/driver-i386.c +++ b/gcc-4.9/gcc/config/i386/driver-i386.c @@ -739,6 +739,11 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* Assume Core 2. */ cpu = "core2"; } + else if (has_longmode) + /* Perhaps some emulator? Assume x86-64, otherwise gcc + -march=native would be unusable for 64-bit compilations, + as all the CPUs below are 32-bit only. */ + cpu = "x86-64"; else if (has_sse3) /* It is Core Duo. */ cpu = "pentium-m"; diff --git a/gcc-4.9/gcc/config/i386/gnu-user.h b/gcc-4.9/gcc/config/i386/gnu-user.h index d9e3fa434..21b9e9692 100644 --- a/gcc-4.9/gcc/config/i386/gnu-user.h +++ b/gcc-4.9/gcc/config/i386/gnu-user.h @@ -70,10 +70,12 @@ along with GCC; see the file COPYING3. If not see "--32 %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}} " \ LINUX_OR_ANDROID_CC ("", ANDROID_ASM_SPEC) -#undef SUBTARGET_EXTRA_SPECS -#define SUBTARGET_EXTRA_SPECS \ +#undef SUBTARGET_EXTRA_SPECS_STR +#define SUBTARGET_EXTRA_SPECS_STR \ { "link_emulation", GNU_USER_LINK_EMULATION },\ { "dynamic_linker", GNU_USER_DYNAMIC_LINKER } +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS SUBTARGET_EXTRA_SPECS_STR #define GNU_USER_TARGET_LINK_SPEC "-m %(link_emulation) %{shared:-shared} \ %{!shared: \ diff --git a/gcc-4.9/gcc/config/i386/i386-protos.h b/gcc-4.9/gcc/config/i386/i386-protos.h index 6e3297880..fc0eb53f8 100644 --- a/gcc-4.9/gcc/config/i386/i386-protos.h +++ b/gcc-4.9/gcc/config/i386/i386-protos.h @@ -28,6 +28,16 @@ extern bool ix86_target_stack_probe (void); extern bool ix86_can_use_return_insn_p (void); extern void ix86_setup_frame_addresses (void); +/* Section names for function patch prologue and epilogue section. See + ix86_output_function_nops_prologue_epilogue() in i386.c for details. */ +#define FUNCTION_PATCH_PROLOGUE_SECTION "_function_patch_prologue" +#define FUNCTION_PATCH_EPILOGUE_SECTION "_function_patch_epilogue" + +extern bool ix86_output_function_nops_prologue_epilogue (FILE *, + const char *, + const char *, + int); + extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int); extern void ix86_expand_prologue (void); extern void ix86_maybe_emit_epilogue_vzeroupper (void); @@ -312,6 +322,7 @@ extern enum attr_cpu ix86_schedule; #endif extern const char * ix86_output_call_insn (rtx insn, rtx call_op); +extern bool adjacent_mem_locations (rtx mem1, rtx mem2); #ifdef RTX_CODE /* Target data for multipass lookahead scheduling. diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c index d7c592f48..df504335e 100644 --- a/gcc-4.9/gcc/config/i386/i386.c +++ b/gcc-4.9/gcc/config/i386/i386.c @@ -78,6 +78,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic.h" #include "dumpfile.h" #include "tree-pass.h" +#include "cfgloop.h" #include "context.h" #include "pass_manager.h" #include "target-globals.h" @@ -5017,8 +5018,11 @@ ix86_in_large_data_p (tree exp) HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); /* If this is an incomplete type with size 0, then we can't put it - in data because it might be too big when completed. */ - if (!size || size > ix86_section_threshold) + in data because it might be too big when completed. Also, + int_size_in_bytes returns -1 if size can vary or is larger than + an integer in which case also it is safer to assume that it goes in + large data. */ + if (size <= 0 || size > ix86_section_threshold) return true; } @@ -11730,6 +11734,246 @@ ix86_expand_epilogue (int style) m->fs = frame_state_save; } + +/* True if the current function should be patched with nops at prologue and + returns. */ +static bool patch_current_function_p = false; + +static inline bool +has_attribute (const char* attribute_name) +{ + return lookup_attribute (attribute_name, + DECL_ATTRIBUTES (current_function_decl)) != NULL; +} + +/* Return true if we patch the current function. By default a function + is patched if it has loops or if the number of insns is greater than + patch_functions_min_instructions (number of insns roughly translates + to number of instructions). */ + +static bool +check_should_patch_current_function (void) +{ + int num_insns = 0; + rtx insn; + const char *func_name = NULL; + struct loops *loops; + int num_loops = 0; + int min_functions_instructions; + + /* If a function has an attribute forcing patching on or off, do as it + indicates. */ + if (has_attribute ("always_patch_for_instrumentation")) + return true; + else if (has_attribute ("never_patch_for_instrumentation")) + return false; + + /* Patch the function if it has at least a loop. */ + if (!patch_functions_ignore_loops) + { + if (DECL_STRUCT_FUNCTION (current_function_decl)->cfg) + { + loops = flow_loops_find (NULL); + num_loops = loops->larray->length(); + /* FIXME - Deallocating the loop causes a seg-fault. */ +#if 0 + flow_loops_free (loops); +#endif + /* We are not concerned with the function body as a loop. */ + if (num_loops > 1) + return true; + } + } + + /* Else, check if function has more than patch_functions_min_instrctions. */ + + /* Borrowed this code from rest_of_handle_final() in final.c. */ + func_name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); + if (!patch_functions_dont_always_patch_main && + func_name && + strcmp("main", func_name) == 0) + return true; + + min_functions_instructions = + PARAM_VALUE (PARAM_FUNCTION_PATCH_MIN_INSTRUCTIONS); + if (min_functions_instructions > 0) + { + /* Calculate the number of instructions in this function and only emit + function patch for instrumentation if it is greater than + patch_functions_min_instructions. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (NONDEBUG_INSN_P (insn)) + ++num_insns; + } + if (num_insns < min_functions_instructions) + return false; + } + + return true; +} + +/* Emit the 11-byte patch space for the function prologue for functions that + qualify. */ + +static void +ix86_output_function_prologue (FILE *file, + HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + /* Only for 64-bit target. */ + if (TARGET_64BIT && patch_functions_for_instrumentation) + { + patch_current_function_p = check_should_patch_current_function(); + /* Emit the instruction 'jmp 09' followed by 9 bytes to make it 11-bytes + of nop. */ + ix86_output_function_nops_prologue_epilogue ( + file, + FUNCTION_PATCH_PROLOGUE_SECTION, + ASM_BYTE"0xeb,0x09", + 9); + } +} + +/* Emit the nop bytes at function prologue or return (including tail call + jumps). The number of nop bytes generated is at least 8. + Also emits a section named SECTION_NAME, which is a backpointer section + holding the addresses of the nop bytes in the text section. + SECTION_NAME is either '_function_patch_prologue' or + '_function_patch_epilogue'. The backpointer section can be used to navigate + through all the function entry and exit points which are patched with nops. + PRE_INSTRUCTIONS are the instructions, if any, at the start of the nop byte + sequence. NUM_REMAINING_NOPS are the number of nop bytes to fill, + excluding the number of bytes in PRE_INSTRUCTIONS. + Returns true if the function was patched, false otherwise. */ + +bool +ix86_output_function_nops_prologue_epilogue (FILE *file, + const char *section_name, + const char *pre_instructions, + int num_remaining_nops) +{ + static int labelno = 0; + char label[32], section_label[32]; + section *section = NULL; + int num_actual_nops = num_remaining_nops - sizeof(void *); + unsigned int section_flags = SECTION_RELRO; + char *section_name_comdat = NULL; + const char *decl_section_name = NULL; + const char *func_name = NULL; + char *section_name_function_sections = NULL; + size_t len; + + gcc_assert (num_remaining_nops >= 0); + + if (!patch_current_function_p) + return false; + + ASM_GENERATE_INTERNAL_LABEL (label, "LFPEL", labelno); + ASM_GENERATE_INTERNAL_LABEL (section_label, "LFPESL", labelno++); + + /* Align the start of nops to 2-byte boundary so that the 2-byte jump + instruction can be patched atomically at run time. */ + ASM_OUTPUT_ALIGN (file, 1); + + /* Emit nop bytes. They look like the following: + $LFPEL0: + + 0x90 (repeated num_actual_nops times) + .quad $LFPESL0 - . + followed by section 'section_name' which contains the address + of instruction at 'label'. + */ + ASM_OUTPUT_INTERNAL_LABEL (file, label); + if (pre_instructions) + fprintf (file, "%s\n", pre_instructions); + + while (num_actual_nops-- > 0) + asm_fprintf (file, ASM_BYTE"0x90\n"); + + fprintf (file, ASM_QUAD); + /* Output "section_label - ." for the relative address of the entry in + the section 'section_name'. */ + assemble_name_raw (file, section_label); + fprintf (file, " - ."); + fprintf (file, "\n"); + + /* Emit the backpointer section. For functions belonging to comdat group, + we emit a different section named '.foo' where 'foo' is + the name of the comdat section. This section is later renamed to + '' by ix86_elf_asm_named_section(). + We emit a unique section name for the back pointer section for comdat + functions because otherwise the 'get_section' call may return an existing + non-comdat section with the same name, leading to references from + non-comdat section to comdat functions. + */ + if (current_function_decl != NULL_TREE && + DECL_ONE_ONLY (current_function_decl) && + HAVE_COMDAT_GROUP) + { + decl_section_name = + TREE_STRING_POINTER (DECL_SECTION_NAME (current_function_decl)); + len = strlen (decl_section_name) + strlen (section_name) + 2; + section_name_comdat = (char *) alloca (len); + sprintf (section_name_comdat, "%s.%s", section_name, decl_section_name); + section_name = section_name_comdat; + section_flags |= SECTION_LINKONCE; + } + else if (flag_function_sections) + { + func_name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); + if (func_name) + { + len = strlen (func_name) + strlen (section_name) + 2; + section_name_function_sections = (char *) alloca (len); + sprintf (section_name_function_sections, "%s.%s", section_name, + func_name); + section_name = section_name_function_sections; + } + } + section = get_section (section_name, section_flags, current_function_decl); + switch_to_section (section); + /* Align the section to 8-byte boundary. */ + ASM_OUTPUT_ALIGN (file, 3); + + /* Emit address of the start of nop bytes in the section: + $LFPESP0: + .quad $LFPEL0 + */ + ASM_OUTPUT_INTERNAL_LABEL (file, section_label); + fprintf(file, ASM_QUAD); + assemble_name_raw (file, label); + fprintf (file, "\n"); + + /* Switching back to text section. */ + switch_to_section (function_section (current_function_decl)); + return true; +} + +/* Strips the characters after '_function_patch_prologue' or + '_function_patch_epilogue' and emits the section. */ + +static void +ix86_elf_asm_named_section (const char *name, unsigned int flags, + tree decl) +{ + const char *section_name = name; + if (!flag_function_sections && HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) + { + const int prologue_section_name_length = + sizeof(FUNCTION_PATCH_PROLOGUE_SECTION) - 1; + const int epilogue_section_name_length = + sizeof(FUNCTION_PATCH_EPILOGUE_SECTION) - 1; + + if (strncmp (name, FUNCTION_PATCH_PROLOGUE_SECTION, + prologue_section_name_length) == 0) + section_name = FUNCTION_PATCH_PROLOGUE_SECTION; + else if (strncmp (name, FUNCTION_PATCH_EPILOGUE_SECTION, + epilogue_section_name_length) == 0) + section_name = FUNCTION_PATCH_EPILOGUE_SECTION; + } + default_elf_asm_named_section (section_name, flags, decl); +} + /* Reset from the function's potential modifications. */ static void @@ -12659,7 +12903,9 @@ legitimate_pic_address_disp_p (rtx disp) return true; } else if (!SYMBOL_REF_FAR_ADDR_P (op0) - && SYMBOL_REF_LOCAL_P (op0) + && (SYMBOL_REF_LOCAL_P (op0) + || (TARGET_64BIT && ix86_pie_copyrelocs && flag_pie + && !SYMBOL_REF_FUNCTION_P (op0))) && ix86_cmodel != CM_LARGE_PIC) return true; break; @@ -21507,7 +21753,7 @@ ix86_expand_vec_perm (rtx operands[]) t1 = gen_reg_rtx (V32QImode); t2 = gen_reg_rtx (V32QImode); t3 = gen_reg_rtx (V32QImode); - vt2 = GEN_INT (128); + vt2 = GEN_INT (-128); for (i = 0; i < 32; i++) vec[i] = vt2; vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec)); @@ -23794,7 +24040,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, { const struct stringop_algs * algs; bool optimize_for_speed; - int max = -1; + int max = 0; const struct processor_costs *cost; int i; bool any_alg_usable_p = false; @@ -23832,7 +24078,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, /* If expected size is not known but max size is small enough so inline version is a win, set expected size into the range. */ - if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size + if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1) && expected_size == -1) expected_size = min_size / 2 + max_size / 2; @@ -23921,7 +24167,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, *dynamic_check = 128; return loop_1_byte; } - if (max == -1) + if (max <= 0) max = 4096; alg = decide_alg (count, max / 2, min_size, max_size, memset, zero_memset, dynamic_check, noalign); @@ -24945,6 +25191,15 @@ ix86_output_call_insn (rtx insn, rtx call_op) else xasm = "jmp\t%A0"; + /* Just before the sibling call, add 11-bytes of nops to patch function + exit: 2 bytes for 'jmp 09' and remaining 9 bytes. */ + if (TARGET_64BIT && patch_functions_for_instrumentation) + ix86_output_function_nops_prologue_epilogue ( + asm_out_file, + FUNCTION_PATCH_EPILOGUE_SECTION, + ASM_BYTE"0xeb, 0x09", + 9); + output_asm_insn (xasm, &call_op); return ""; } @@ -26238,13 +26493,17 @@ ix86_dependencies_evaluation_hook (rtx head, rtx tail) { edge e; edge_iterator ei; - /* Assume that region is SCC, i.e. all immediate predecessors - of non-head block are in the same region. */ + + /* Regions are SCCs with the exception of selective + scheduling with pipelining of outer blocks enabled. + So also check that immediate predecessors of a non-head + block are in the same region. */ FOR_EACH_EDGE (e, ei, bb->preds) { /* Avoid creating of loop-carried dependencies through - using topological odering in region. */ - if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) + using topological ordering in the region. */ + if (rgn == CONTAINING_RGN (e->src->index) + && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) add_dependee_for_func_arg (first_arg, e->src); } } @@ -28789,7 +29048,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name, ix86_builtins_isa[(int) code].isa = mask; mask &= ~OPTION_MASK_ISA_64BIT; - if (mask == 0 + if (flag_dyn_ipa + || mask == 0 || (mask & ix86_isa_flags) != 0 || (lang_hooks.builtin_function == lang_hooks.builtin_function_ext_scope)) @@ -37802,10 +38062,10 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total, else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) *total = 2; else if (flag_pic && SYMBOLIC_CONST (x) - && (!TARGET_64BIT - || (!GET_CODE (x) != LABEL_REF - && (GET_CODE (x) != SYMBOL_REF - || !SYMBOL_REF_LOCAL_P (x))))) + && !(TARGET_64BIT + && (GET_CODE (x) == LABEL_REF + || (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_LOCAL_P (x))))) *total = 1; else *total = 0; @@ -46745,6 +47005,70 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) atomic_feraiseexcept_call); } +/* Try to determine BASE/OFFSET/SIZE parts of the given MEM. + Return true if successful, false if all the values couldn't + be determined. + + This function only looks for REG/SYMBOL or REG/SYMBOL+CONST + address forms. */ + +static bool +get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset, + HOST_WIDE_INT *size) +{ + rtx addr_rtx; + if MEM_SIZE_KNOWN_P (mem) + *size = MEM_SIZE (mem); + else + return false; + + if (GET_CODE (XEXP (mem, 0)) == CONST) + addr_rtx = XEXP (XEXP (mem, 0), 0); + else + addr_rtx = (XEXP (mem, 0)); + + if (GET_CODE (addr_rtx) == REG + || GET_CODE (addr_rtx) == SYMBOL_REF) + { + *base = addr_rtx; + *offset = 0; + } + else if (GET_CODE (addr_rtx) == PLUS + && CONST_INT_P (XEXP (addr_rtx, 1))) + { + *base = XEXP (addr_rtx, 0); + *offset = INTVAL (XEXP (addr_rtx, 1)); + } + else + return false; + + return true; +} + +/* If MEM1 is adjacent to MEM2 and MEM1 has lower address, + return true. */ + +extern bool +adjacent_mem_locations (rtx mem1, rtx mem2) +{ + rtx base1, base2; + HOST_WIDE_INT off1, size1, off2, size2; + + if (get_memref_parts (mem1, &base1, &off1, &size1) + && get_memref_parts (mem2, &base2, &off2, &size2)) + { + if (GET_CODE (base1) == SYMBOL_REF + && GET_CODE (base2) == SYMBOL_REF + && SYMBOL_REF_DECL (base1) == SYMBOL_REF_DECL (base2)) + return (off1 + size1 == off2); + else if (REG_P (base1) + && REG_P (base2) + && REGNO (base1) == REGNO (base2)) + return (off1 + size1 == off2); + } + return false; +} + /* Initialize the GCC target structure. */ #undef TARGET_RETURN_IN_MEMORY #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory @@ -46787,9 +47111,15 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) #undef TARGET_BUILTIN_RECIPROCAL #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE ix86_output_function_prologue + #undef TARGET_ASM_FUNCTION_EPILOGUE #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue +#undef TARGET_ASM_NAMED_SECTION +#define TARGET_ASM_NAMED_SECTION ix86_elf_asm_named_section + #undef TARGET_ENCODE_SECTION_INFO #ifndef SUBTARGET_ENCODE_SECTION_INFO #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md index 9f103cf30..058702904 100644 --- a/gcc-4.9/gcc/config/i386/i386.md +++ b/gcc-4.9/gcc/config/i386/i386.md @@ -3201,7 +3201,7 @@ (const_string "1") (const_string "*"))) (set (attr "mode") - (cond [(eq_attr "alternative" "3,4,9,10,13,14,15") + (cond [(eq_attr "alternative" "3,4,9,10,12,13,14,15") (const_string "SI") (eq_attr "alternative" "11") (const_string "DI") @@ -4933,66 +4933,37 @@ ;; Avoid store forwarding (partial memory) stall penalty by extending ;; SImode value to DImode through XMM register instead of pushing two -;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES_TO_VEC -;; targets benefit from this optimization. Also note that fild -;; loads from memory only. +;; SImode values to stack. Also note that fild loads from memory only. -(define_insn "*floatunssi2_1" - [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") +(define_insn_and_split "*floatunssi2_i387_with_xmm" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") (unsigned_float:X87MODEF - (match_operand:SI 1 "nonimmediate_operand" "x,m"))) - (clobber (match_operand:DI 2 "memory_operand" "=m,m")) - (clobber (match_scratch:SI 3 "=X,x"))] + (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (match_scratch:DI 3 "=x")) + (clobber (match_operand:DI 2 "memory_operand" "=m"))] "!TARGET_64BIT && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) - && TARGET_SSE" + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC" "#" - [(set_attr "type" "multi") - (set_attr "mode" "")]) - -(define_split - [(set (match_operand:X87MODEF 0 "register_operand") - (unsigned_float:X87MODEF - (match_operand:SI 1 "register_operand"))) - (clobber (match_operand:DI 2 "memory_operand")) - (clobber (match_scratch:SI 3))] - "!TARGET_64BIT - && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) - && TARGET_SSE - && reload_completed" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) - (float:X87MODEF (match_dup 2)))] - "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);") - -(define_split - [(set (match_operand:X87MODEF 0 "register_operand") - (unsigned_float:X87MODEF - (match_operand:SI 1 "memory_operand"))) - (clobber (match_operand:DI 2 "memory_operand")) - (clobber (match_scratch:SI 3))] - "!TARGET_64BIT - && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) - && TARGET_SSE - && reload_completed" - [(set (match_dup 2) (match_dup 3)) + "&& reload_completed" + [(set (match_dup 3) (zero_extend:DI (match_dup 1))) + (set (match_dup 2) (match_dup 3)) (set (match_dup 0) (float:X87MODEF (match_dup 2)))] -{ - emit_move_insn (operands[3], operands[1]); - operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0); -}) + "" + [(set_attr "type" "multi") + (set_attr "mode" "")]) (define_expand "floatunssi2" [(parallel [(set (match_operand:X87MODEF 0 "register_operand") (unsigned_float:X87MODEF (match_operand:SI 1 "nonimmediate_operand"))) - (clobber (match_dup 2)) - (clobber (match_scratch:SI 3))])] + (clobber (match_scratch:DI 3)) + (clobber (match_dup 2))])] "!TARGET_64BIT && ((TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) - && TARGET_SSE) + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" { if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) @@ -9627,7 +9598,7 @@ (define_insn "x86_64_shrd" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") - (ior:DI (ashiftrt:DI (match_dup 0) + (ior:DI (lshiftrt:DI (match_dup 0) (match_operand:QI 2 "nonmemory_operand" "Jc")) (ashift:DI (match_operand:DI 1 "register_operand" "r") (minus:QI (const_int 64) (match_dup 2))))) @@ -9643,7 +9614,7 @@ (define_insn "x86_shrd" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") - (ior:SI (ashiftrt:SI (match_dup 0) + (ior:SI (lshiftrt:SI (match_dup 0) (match_operand:QI 2 "nonmemory_operand" "Ic")) (ashift:SI (match_operand:SI 1 "register_operand" "r") (minus:QI (const_int 32) (match_dup 2))))) @@ -10095,13 +10066,13 @@ [(set (match_dup 3) (match_dup 4)) (parallel [(set (match_dup 4) - (ior:DWIH (ashiftrt:DWIH (match_dup 4) (match_dup 2)) + (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2)) (ashift:DWIH (match_dup 5) (minus:QI (match_dup 6) (match_dup 2))))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 5) - (ior:DWIH (ashiftrt:DWIH (match_dup 5) (match_dup 2)) + (ior:DWIH (lshiftrt:DWIH (match_dup 5) (match_dup 2)) (ashift:DWIH (match_dup 3) (minus:QI (match_dup 6) (match_dup 2))))) (clobber (reg:CC FLAGS_REG))])] @@ -11611,7 +11582,18 @@ (define_insn "simple_return_internal" [(simple_return)] "reload_completed" - "ret" +{ + if (TARGET_64BIT && patch_functions_for_instrumentation) + { + /* Emit 10 nop bytes after ret. */ + if (ix86_output_function_nops_prologue_epilogue (asm_out_file, + FUNCTION_PATCH_EPILOGUE_SECTION, + "\tret", + 10)) + return ""; + } + return "ret"; +} [(set_attr "length" "1") (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "0") @@ -11624,7 +11606,18 @@ [(simple_return) (unspec [(const_int 0)] UNSPEC_REP)] "reload_completed" - "rep%; ret" +{ + if (TARGET_64BIT && patch_functions_for_instrumentation) + { + /* Emit 9 nop bytes after rep;ret. */ + if (ix86_output_function_nops_prologue_epilogue (asm_out_file, + FUNCTION_PATCH_EPILOGUE_SECTION, + "\trep\;ret", + 9)) + return ""; + } + return "rep\;ret"; +} [(set_attr "length" "2") (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "0") diff --git a/gcc-4.9/gcc/config/i386/i386.opt b/gcc-4.9/gcc/config/i386/i386.opt index 0f463a238..1e00b660e 100644 --- a/gcc-4.9/gcc/config/i386/i386.opt +++ b/gcc-4.9/gcc/config/i386/i386.opt @@ -108,6 +108,10 @@ int x_ix86_dump_tunes TargetSave int x_ix86_force_align_arg_pointer +;; -mcopyrelocs= +TargetSave +int x_ix86_copyrelocs + ;; -mforce-drap= TargetSave int x_ix86_force_drap @@ -291,6 +295,10 @@ mfancy-math-387 Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save Generate sin, cos, sqrt for FPU +mcopyrelocs +Target Report Var(ix86_pie_copyrelocs) Init(0) +Assume copy relocations support for pie builds. + mforce-drap Target Report Var(ix86_force_drap) Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack @@ -781,6 +789,18 @@ mrtm Target Report Mask(ISA_RTM) Var(ix86_isa_flags) Save Support RTM built-in functions and code generation +mpatch-functions-for-instrumentation +Target RejectNegative Report Var(patch_functions_for_instrumentation) Save +Patch function prologue and epilogue with custom NOPs for dynamic instrumentation. By default, functions with loops (controlled by -mpatch-functions-without-loop) or functions having instructions more than -mpatch-functions-min-instructions are patched. + +mpatch-functions-ignore-loops +Target RejectNegative Report Var(patch_functions_ignore_loops) Save +Ignore loops when deciding whether to patch a function for instrumentation (for use with -mpatch-functions-for-instrumentation). + +mno-patch-functions-main-always +Target Report RejectNegative Var(patch_functions_dont_always_patch_main) Save +Treat 'main' as any other function and only patch it if it meets the criteria for loops and minimum number of instructions (for use with -mpatch-functions-for-instrumentation). + mstack-protector-guard= Target RejectNegative Joined Enum(stack_protector_guard) Var(ix86_stack_protector_guard) Init(SSP_TLS) Use given stack-protector guard diff --git a/gcc-4.9/gcc/config/i386/linux.h b/gcc-4.9/gcc/config/i386/linux.h index 1fb1e0321..27d68b5db 100644 --- a/gcc-4.9/gcc/config/i386/linux.h +++ b/gcc-4.9/gcc/config/i386/linux.h @@ -20,4 +20,22 @@ along with GCC; see the file COPYING3. If not see . */ #define GNU_USER_LINK_EMULATION "elf_i386" -#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" +#ifndef RUNTIME_ROOT_PREFIX +#define RUNTIME_ROOT_PREFIX "" +#endif +#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2" + +/* These may be provided by config/linux-grtev*.h. */ +#ifndef LINUX_GRTE_EXTRA_SPECS +#define LINUX_GRTE_EXTRA_SPECS +#endif + +#undef SUBTARGET_EXTRA_SPECS +#ifndef SUBTARGET_EXTRA_SPECS_STR +#define SUBTARGET_EXTRA_SPECS \ + LINUX_GRTE_EXTRA_SPECS +#else +#define SUBTARGET_EXTRA_SPECS \ + LINUX_GRTE_EXTRA_SPECS \ + SUBTARGET_EXTRA_SPECS_STR +#endif diff --git a/gcc-4.9/gcc/config/i386/linux64.h b/gcc-4.9/gcc/config/i386/linux64.h index a90171e8c..5124a341b 100644 --- a/gcc-4.9/gcc/config/i386/linux64.h +++ b/gcc-4.9/gcc/config/i386/linux64.h @@ -27,6 +27,19 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define GNU_USER_LINK_EMULATION64 "elf_x86_64" #define GNU_USER_LINK_EMULATIONX32 "elf32_x86_64" -#define GLIBC_DYNAMIC_LINKER32 "/lib/ld-linux.so.2" -#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld-linux-x86-64.so.2" -#define GLIBC_DYNAMIC_LINKERX32 "/libx32/ld-linux-x32.so.2" +#ifndef RUNTIME_ROOT_PREFIX +#define RUNTIME_ROOT_PREFIX "" +#endif +#define GLIBC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2" +#define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib64/ld-linux-x86-64.so.2" +#define GLIBC_DYNAMIC_LINKERX32 RUNTIME_ROOT_PREFIX "/libx32/ld-linux-x32.so.2" + +/* These may be provided by config/linux-grtev*.h. */ +#ifndef LINUX_GRTE_EXTRA_SPECS +#define LINUX_GRTE_EXTRA_SPECS +#endif + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + LINUX_GRTE_EXTRA_SPECS + diff --git a/gcc-4.9/gcc/config/i386/sse.md b/gcc-4.9/gcc/config/i386/sse.md index 72a4d6d07..27ade1964 100644 --- a/gcc-4.9/gcc/config/i386/sse.md +++ b/gcc-4.9/gcc/config/i386/sse.md @@ -8255,6 +8255,36 @@ DONE; }) +(define_expand "usadv16qi" + [(match_operand:V4SI 0 "register_operand") + (match_operand:V16QI 1 "register_operand") + (match_operand:V16QI 2 "nonimmediate_operand") + (match_operand:V4SI 3 "nonimmediate_operand")] + "TARGET_SSE2" +{ + rtx t1 = gen_reg_rtx (V2DImode); + rtx t2 = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2])); + convert_move (t2, t1, 0); + emit_insn (gen_addv4si3 (operands[0], t2, operands[3])); + DONE; +}) + +(define_expand "usadv32qi" + [(match_operand:V8SI 0 "register_operand") + (match_operand:V32QI 1 "register_operand") + (match_operand:V32QI 2 "nonimmediate_operand") + (match_operand:V8SI 3 "nonimmediate_operand")] + "TARGET_AVX2" +{ + rtx t1 = gen_reg_rtx (V4DImode); + rtx t2 = gen_reg_rtx (V8SImode); + emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2])); + convert_move (t2, t1, 0); + emit_insn (gen_addv8si3 (operands[0], t2, operands[3])); + DONE; +}) + (define_insn "ashr3" [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x") (ashiftrt:VI24_AVX2 @@ -15606,3 +15636,37 @@ [(set_attr "type" "sselog1") (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) + +;; merge movsd/movhpd to movupd when TARGET_SSE_UNALIGNED_LOAD_OPTIMAL +;; is true. +(define_peephole2 + [(set (match_operand:DF 0 "register_operand") + (match_operand:DF 1 "memory_operand")) + (set (match_operand:V2DF 2 "register_operand") + (vec_concat:V2DF (match_dup 0) + (match_operand:DF 3 "memory_operand")))] + "TARGET_SSE_UNALIGNED_LOAD_OPTIMAL + && REGNO (operands[0]) == REGNO (operands[2]) + && adjacent_mem_locations (operands[1], operands[3])" + [(set (match_dup 2) + (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))] +{ + operands[4] = gen_rtx_MEM (V2DFmode, XEXP(operands[1], 0)); +}) + +;; merge movsd/movhpd to movupd when TARGET_SSE_UNALIGNED_STORE_OPTIMAL +;; is true. +(define_peephole2 + [(set (match_operand:DF 0 "memory_operand") + (vec_select:DF (match_operand:V2DF 1 "register_operand") + (parallel [(const_int 0)]))) + (set (match_operand:DF 2 "memory_operand") + (vec_select:DF (match_dup 1) + (parallel [(const_int 1)])))] + "TARGET_SSE_UNALIGNED_STORE_OPTIMAL + && adjacent_mem_locations (operands[0], operands[2])" + [(set (match_dup 3) + (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))] +{ + operands[3] = gen_rtx_MEM (V2DFmode, XEXP(operands[0], 0)); +}) diff --git a/gcc-4.9/gcc/config/initfini-array.h b/gcc-4.9/gcc/config/initfini-array.h index f7ae836e6..67e66f6f2 100644 --- a/gcc-4.9/gcc/config/initfini-array.h +++ b/gcc-4.9/gcc/config/initfini-array.h @@ -14,8 +14,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #ifdef HAVE_INITFINI_ARRAY_SUPPORT diff --git a/gcc-4.9/gcc/config/linux-grte.h b/gcc-4.9/gcc/config/linux-grte.h new file mode 100644 index 000000000..31e8a94ce --- /dev/null +++ b/gcc-4.9/gcc/config/linux-grte.h @@ -0,0 +1,41 @@ +/* Definitions for Linux-based GRTE (Google RunTime Environment). + Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc. + Contributed by Chris Demetriou and Ollie Wild. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Overrides LIB_SPEC from gnu-user.h. */ +#undef LIB_SPEC +#define LIB_SPEC \ + "%{pthread:-lpthread} \ + %{shared:-lc} \ + %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}" + +/* When GRTE links statically, it needs its NSS and resolver libraries + linked in as well. Note that when linking statically, these are + enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC. */ +#undef LINUX_GRTE_EXTRA_SPECS +#define LINUX_GRTE_EXTRA_SPECS \ + { "libc", "%{static:%(libc_static);:-lc}" }, \ + { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \ + { "libc_static", "-lc -lresolv" }, \ + { "libc_p_static", "-lc_p -lresolv_p" }, diff --git a/gcc-4.9/gcc/config/linux.c b/gcc-4.9/gcc/config/linux.c index 6162675d8..2f1cd8e63 100644 --- a/gcc-4.9/gcc/config/linux.c +++ b/gcc-4.9/gcc/config/linux.c @@ -38,7 +38,9 @@ linux_libc_has_function (enum function_class fn_class) return true; if (OPTION_BIONIC) if (fn_class == function_c94 - || fn_class == function_c99_misc) + || fn_class == function_c99_misc + || (fn_class == function_sincos && !TARGET_ANDROID) + ) return true; return false; diff --git a/gcc-4.9/gcc/config/linux.h b/gcc-4.9/gcc/config/linux.h index d38ef81e3..7c2a8e2c0 100644 --- a/gcc-4.9/gcc/config/linux.h +++ b/gcc-4.9/gcc/config/linux.h @@ -73,13 +73,16 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see GLIBC_DYNAMIC_LINKER must be defined for each target using them, or GLIBC_DYNAMIC_LINKER32 and GLIBC_DYNAMIC_LINKER64 for targets supporting both 32-bit and 64-bit compilation. */ -#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0" -#define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0" -#define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0" -#define UCLIBC_DYNAMIC_LINKERX32 "/lib/ldx32-uClibc.so.0" -#define BIONIC_DYNAMIC_LINKER "/system/bin/linker" -#define BIONIC_DYNAMIC_LINKER32 "/system/bin/linker" -#define BIONIC_DYNAMIC_LINKER64 "/system/bin/linker64" +#ifndef RUNTIME_ROOT_PREFIX +#define RUNTIME_ROOT_PREFIX "" +#endif +#define UCLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-uClibc.so.0" +#define UCLIBC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/lib/ld-uClibc.so.0" +#define UCLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib/ld64-uClibc.so.0" +#define UCLIBC_DYNAMIC_LINKERX32 RUNTIME_ROOT_PREFIX "/lib/ldx32-uClibc.so.0" +#define BIONIC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/system/bin/linker" +#define BIONIC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/system/bin/linker" +#define BIONIC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/system/bin/linker64" #define BIONIC_DYNAMIC_LINKERX32 "/system/bin/linkerx32" #define GNU_USER_DYNAMIC_LINKER \ diff --git a/gcc-4.9/gcc/config/msp430/msp430-opts.h b/gcc-4.9/gcc/config/msp430/msp430-opts.h new file mode 100644 index 000000000..119cfcb7f --- /dev/null +++ b/gcc-4.9/gcc/config/msp430/msp430-opts.h @@ -0,0 +1,32 @@ +/* GCC option-handling definitions for the TI MSP430 + Copyright (C) 2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef MSP430_OPTS_H +#define MSP430_OPTS_H + +enum msp430_hwmult_types +{ + NONE, + AUTO, + SMALL, + LARGE, + F5SERIES +}; + +#endif diff --git a/gcc-4.9/gcc/config/msp430/msp430.c b/gcc-4.9/gcc/config/msp430/msp430.c index c844aa2a1..9eb140974 100644 --- a/gcc-4.9/gcc/config/msp430/msp430.c +++ b/gcc-4.9/gcc/config/msp430/msp430.c @@ -730,6 +730,97 @@ msp430_get_raw_result_mode (int regno ATTRIBUTE_UNUSED) { return Pmode; } + +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR msp430_gimplify_va_arg_expr + +#include "gimplify.h" +#include "gimple-expr.h" + +static tree +msp430_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) +{ + tree addr, t, type_size, rounded_size, valist_tmp; + unsigned HOST_WIDE_INT align, boundary; + bool indirect; + + indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); + if (indirect) + type = build_pointer_type (type); + + align = PARM_BOUNDARY / BITS_PER_UNIT; + boundary = targetm.calls.function_arg_boundary (TYPE_MODE (type), type); + + /* When we align parameter on stack for caller, if the parameter + alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be + aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee + here with caller. */ + if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT) + boundary = MAX_SUPPORTED_STACK_ALIGNMENT; + + boundary /= BITS_PER_UNIT; + + /* Hoist the valist value into a temporary for the moment. */ + valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL); + + /* va_list pointer is aligned to PARM_BOUNDARY. If argument actually + requires greater alignment, we must perform dynamic alignment. */ + if (boundary > align + && !integer_zerop (TYPE_SIZE (type))) + { + /* FIXME: This is where this function diverts from targhooks.c: + std_gimplify_va_arg_expr(). It works, but I do not know why... */ + if (! POINTER_TYPE_P (type)) + { + t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp, + fold_build_pointer_plus_hwi (valist_tmp, boundary - 1)); + gimplify_and_add (t, pre_p); + + t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp, + fold_build2 (BIT_AND_EXPR, TREE_TYPE (valist), + valist_tmp, + build_int_cst (TREE_TYPE (valist), -boundary))); + gimplify_and_add (t, pre_p); + } + } + else + boundary = align; + + /* If the actual alignment is less than the alignment of the type, + adjust the type accordingly so that we don't assume strict alignment + when dereferencing the pointer. */ + boundary *= BITS_PER_UNIT; + if (boundary < TYPE_ALIGN (type)) + { + type = build_variant_type_copy (type); + TYPE_ALIGN (type) = boundary; + } + + /* Compute the rounded size of the type. */ + type_size = size_in_bytes (type); + rounded_size = round_up (type_size, align); + + /* Reduce rounded_size so it's sharable with the postqueue. */ + gimplify_expr (&rounded_size, pre_p, post_p, is_gimple_val, fb_rvalue); + + /* Get AP. */ + addr = valist_tmp; + + /* Compute new value for AP. */ + t = fold_build_pointer_plus (valist_tmp, rounded_size); + t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t); + gimplify_and_add (t, pre_p); + + addr = fold_convert (build_pointer_type (type), addr); + + if (indirect) + addr = build_va_arg_indirect_ref (addr); + + addr = build_va_arg_indirect_ref (addr); + + return addr; +} /* Addressing Modes */ @@ -2185,8 +2276,32 @@ msp430_print_operand (FILE * file, rtx op, int letter) msp430_print_operand_addr (file, addr); break; - case CONST_INT: case CONST: + if (GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT) + { + op = XEXP (op, 0); + switch (INTVAL (XEXP (op, 2))) + { + case 0: + fprintf (file, "#lo ("); + msp430_print_operand_raw (file, XEXP (op, 0)); + fprintf (file, ")"); + break; + + case 16: + fprintf (file, "#hi ("); + msp430_print_operand_raw (file, XEXP (op, 0)); + fprintf (file, ")"); + break; + + default: + output_operand_lossage ("invalid zero extract"); + break; + } + break; + } + /* Fall through. */ + case CONST_INT: case SYMBOL_REF: case LABEL_REF: if (letter == 0) diff --git a/gcc-4.9/gcc/config/msp430/msp430.h b/gcc-4.9/gcc/config/msp430/msp430.h index 65d6ad66d..044e558a3 100644 --- a/gcc-4.9/gcc/config/msp430/msp430.h +++ b/gcc-4.9/gcc/config/msp430/msp430.h @@ -55,8 +55,8 @@ extern bool msp430x; "%{mcpu=*:-mcpu=%*}%{!mcpu=*:%{mmcu=*:-mmcu=%*}} " /* Pass the CPU type on to the assembler. */ \ "%{mrelax=-mQ} " /* Pass the relax option on to the assembler. */ \ "%{mlarge:-ml} " /* Tell the assembler if we are building for the LARGE pointer model. */ \ - "%{!msim:-md} %{msim:%{mlarge:-md}}" /* Copy data from ROM to RAM if necessary. */ \ - "%{ffunction-sections:-gdwarf-sections}" /* If function sections are being created then create DWARF line number sections as well. */ + "%{!msim:-md} %{msim:%{mlarge:-md}} " /* Copy data from ROM to RAM if necessary. */ \ + "%{ffunction-sections:-gdwarf-sections} " /* If function sections are being created then create DWARF line number sections as well. */ /* Enable linker section garbage collection by default, unless we are creating a relocatable binary (gc does not work) or debugging diff --git a/gcc-4.9/gcc/config/msp430/msp430.md b/gcc-4.9/gcc/config/msp430/msp430.md index 74a98b480..5e890eced 100644 --- a/gcc-4.9/gcc/config/msp430/msp430.md +++ b/gcc-4.9/gcc/config/msp430/msp430.md @@ -362,8 +362,8 @@ ; halves. (define_split [(set (match_operand:SI 0 "msp430_nonsubreg_operand") - (plus:SI (match_operand:SI 1 "nonimmediate_operand") - (match_operand:SI 2 "general_operand"))) + (plus:SI (match_operand:SI 1 "msp430_nonsubreg_operand") + (match_operand:SI 2 "msp430_nonsubreg_or_imm_operand"))) ] "" [(parallel [(set (match_operand:HI 3 "nonimmediate_operand" "=&rm") @@ -609,9 +609,15 @@ ; when the PSI value is negative.. ; ; Note: using PUSHM.A #1 is two bytes smaller than using PUSHX.A.... +; +; Note: We use a + constraint on operand 0 as otherwise GCC gets confused +; about extending a single PSI mode register into a pair of SImode registers +; with the same starting register. It thinks that the upper register of +; the pair is unused and so it can clobber it. Try compiling 20050826-2.c +; at -O2 to see this. (define_insn "zero_extendpsisi2" - [(set (match_operand:SI 0 "register_operand" "=r") + [(set (match_operand:SI 0 "register_operand" "+r") (zero_extend:SI (match_operand:PSI 1 "register_operand" "r")))] "" "* @@ -1311,9 +1317,9 @@ "" "* if (REGNO (operands[0]) != REGNO (operands[1])) - return \"MOV.W\t%1, %0 { SUB.W\t#0, %0 { AND.W\t%2, %0\"; + return \"MOV.W\t%1, %0 { INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\"; else - return \"SUB.W\t#0, %0 { AND.W\t%2, %0\"; + return \"INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\"; " ) @@ -1324,9 +1330,9 @@ "optimize > 2 && msp430_hwmult_type != NONE" "* if (msp430_use_f5_series_hwmult ()) - return \"PUSH.W sr { DINT { MOV.W %1, &0x04C2 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\"; + return \"PUSH.W sr { DINT { NOP { MOV.W %1, &0x04C2 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\"; else - return \"PUSH.W sr { DINT { MOV.W %1, &0x0132 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0 { POP.W sr\"; + return \"PUSH.W sr { DINT { NOP { MOV.W %1, &0x0132 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0 { POP.W sr\"; " ) @@ -1337,9 +1343,9 @@ "optimize > 2 && msp430_hwmult_type != NONE" "* if (msp430_use_f5_series_hwmult ()) - return \"PUSH.W sr { DINT { MOV.W %1, &0x04C0 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\"; + return \"PUSH.W sr { DINT { NOP { MOV.W %1, &0x04C0 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\"; else - return \"PUSH.W sr { DINT { MOV.W %1, &0x0130 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0 { POP.W sr\"; + return \"PUSH.W sr { DINT { NOP { MOV.W %1, &0x0130 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0 { POP.W sr\"; " ) @@ -1350,9 +1356,9 @@ "optimize > 2 && msp430_hwmult_type != NONE" "* if (msp430_use_f5_series_hwmult ()) - return \"PUSH.W sr { DINT { MOV.W %L1, &0x04D4 { MOV.W %H1, &0x04D6 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\"; + return \"PUSH.W sr { DINT { NOP { MOV.W %L1, &0x04D4 { MOV.W %H1, &0x04D6 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\"; else - return \"PUSH.W sr { DINT { MOV.W %L1, &0x0144 { MOV.W %H1, &0x0146 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\"; + return \"PUSH.W sr { DINT { NOP { MOV.W %L1, &0x0144 { MOV.W %H1, &0x0146 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\"; " ) @@ -1363,8 +1369,8 @@ "optimize > 2 && msp430_hwmult_type != NONE" "* if (msp430_use_f5_series_hwmult ()) - return \"PUSH.W sr { DINT { MOV.W %L1, &0x04D0 { MOV.W %H1, &0x04D2 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\"; + return \"PUSH.W sr { DINT { NOP { MOV.W %L1, &0x04D0 { MOV.W %H1, &0x04D2 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\"; else - return \"PUSH.W sr { DINT { MOV.W %L1, &0x0140 { MOV.W %H1, &0x0142 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\"; + return \"PUSH.W sr { DINT { NOP { MOV.W %L1, &0x0140 { MOV.W %H1, &0x0142 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\"; " ) diff --git a/gcc-4.9/gcc/config/msp430/predicates.md b/gcc-4.9/gcc/config/msp430/predicates.md index 9a8e2da0a..94a628cd5 100644 --- a/gcc-4.9/gcc/config/msp430/predicates.md +++ b/gcc-4.9/gcc/config/msp430/predicates.md @@ -73,6 +73,10 @@ (define_predicate "msp430_nonsubreg_operand" (match_code "reg,mem")) +(define_predicate "msp430_nonsubreg_or_imm_operand" + (ior (match_operand 0 "msp430_nonsubreg_operand") + (match_operand 0 "immediate_operand"))) + ; TRUE for constants which are bit positions for zero_extract (define_predicate "msp430_bitpos" (and (match_code "const_int") diff --git a/gcc-4.9/gcc/config/newlib-stdint.h b/gcc-4.9/gcc/config/newlib-stdint.h index f4a78a544..47445e42d 100644 --- a/gcc-4.9/gcc/config/newlib-stdint.h +++ b/gcc-4.9/gcc/config/newlib-stdint.h @@ -13,8 +13,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* newlib uses 32-bit long in certain cases for all non-SPU diff --git a/gcc-4.9/gcc/config/rs6000/htm.md b/gcc-4.9/gcc/config/rs6000/htm.md index 9dbb499ce..ca7f7fdf4 100644 --- a/gcc-4.9/gcc/config/rs6000/htm.md +++ b/gcc-4.9/gcc/config/rs6000/htm.md @@ -179,7 +179,7 @@ (const_int 0)] UNSPECV_HTM_TABORTWCI)) (set (subreg:CC (match_dup 2) 0) (match_dup 1)) - (set (match_dup 3) (lshiftrt:SI (match_dup 2) (const_int 24))) + (set (match_dup 3) (lshiftrt:SI (match_dup 2) (const_int 28))) (parallel [(set (match_operand:SI 0 "int_reg_operand" "") (and:SI (match_dup 3) (const_int 15))) (clobber (scratch:CC))])] diff --git a/gcc-4.9/gcc/config/rs6000/htmxlintrin.h b/gcc-4.9/gcc/config/rs6000/htmxlintrin.h index 38dc066d3..bf7fe3a75 100644 --- a/gcc-4.9/gcc/config/rs6000/htmxlintrin.h +++ b/gcc-4.9/gcc/config/rs6000/htmxlintrin.h @@ -46,12 +46,17 @@ extern "C" { typedef char TM_buff_type[16]; +/* Compatibility macro with s390. This macro can be used to determine + whether a transaction was successfully started from the __TM_begin() + and __TM_simple_begin() intrinsic functions below. */ +#define _HTM_TBEGIN_STARTED 1 + extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_simple_begin (void) { if (__builtin_expect (__builtin_tbegin (0), 1)) - return 1; + return _HTM_TBEGIN_STARTED; return 0; } @@ -61,7 +66,7 @@ __TM_begin (void* const TM_buff) { *_TEXASRL_PTR (TM_buff) = 0; if (__builtin_expect (__builtin_tbegin (0), 1)) - return 1; + return _HTM_TBEGIN_STARTED; #ifdef __powerpc64__ *_TEXASR_PTR (TM_buff) = __builtin_get_texasr (); #else diff --git a/gcc-4.9/gcc/config/rs6000/linux-grte.h b/gcc-4.9/gcc/config/rs6000/linux-grte.h new file mode 100644 index 000000000..53997f027 --- /dev/null +++ b/gcc-4.9/gcc/config/rs6000/linux-grte.h @@ -0,0 +1,41 @@ +/* Definitions for Linux-based GRTE (Google RunTime Environment). + Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc. + Contributed by Chris Demetriou and Ollie Wild. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Overrides LIB_LINUX_SPEC from sysv4.h. */ +#undef LIB_LINUX_SPEC +#define LIB_LINUX_SPEC \ + "%{pthread:-lpthread} \ + %{shared:-lc} \ + %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}" + +/* When GRTE links statically, it needs its NSS and resolver libraries + linked in as well. Note that when linking statically, these are + enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC. */ +#undef LINUX_GRTE_EXTRA_SPECS +#define LINUX_GRTE_EXTRA_SPECS \ + { "libc", "%{static:%(libc_static);:-lc}" }, \ + { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \ + { "libc_static", "-lc -lresolv" }, \ + { "libc_p_static", "-lc_p -lresolv_p" }, diff --git a/gcc-4.9/gcc/config/rs6000/linux64.h b/gcc-4.9/gcc/config/rs6000/linux64.h index dbc9a527e..52c233b7d 100644 --- a/gcc-4.9/gcc/config/rs6000/linux64.h +++ b/gcc-4.9/gcc/config/rs6000/linux64.h @@ -367,11 +367,11 @@ extern int dot_symbols; #undef LINK_OS_DEFAULT_SPEC #define LINK_OS_DEFAULT_SPEC "%(link_os_linux)" -#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1" +#define GLIBC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/lib/ld.so.1" #ifdef LINUX64_DEFAULT_ABI_ELFv2 -#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv1:/lib64/ld64.so.1;:/lib64/ld64.so.2}" +#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv1:" RUNTIME_ROOT_PREFIX "/lib64/ld64.so.1;:" RUNTIME_ROOT_PREFIX "/lib64/ld64.so.2}" #else -#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv2:/lib64/ld64.so.2;:/lib64/ld64.so.1}" +#define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "%{mabi=elfv2:"RUNTIME_ROOT_PREFIX "/lib64/ld64.so.2;:" RUNTIME_ROOT_PREFIX "/lib64/ld64.so.1}" #endif #define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0" #define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0" diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def index 8e15bdf16..220d1e970 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def @@ -622,19 +622,12 @@ | RS6000_BTC_TERNARY), \ CODE_FOR_ ## ICODE) /* ICODE */ -/* Miscellaneous builtins. */ -#define BU_MISC_1(ENUM, NAME, ATTR, ICODE) \ - RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ - "__builtin_" NAME, /* NAME */ \ - RS6000_BTM_HARD_FLOAT, /* MASK */ \ - (RS6000_BTC_ ## ATTR /* ATTR */ \ - | RS6000_BTC_UNARY), \ - CODE_FOR_ ## ICODE) /* ICODE */ - -#define BU_MISC_2(ENUM, NAME, ATTR, ICODE) \ +/* 128-bit long double floating point builtins. */ +#define BU_LDBL128_2(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ "__builtin_" NAME, /* NAME */ \ - RS6000_BTM_HARD_FLOAT, /* MASK */ \ + (RS6000_BTM_HARD_FLOAT /* MASK */ \ + | RS6000_BTM_LDBL128), \ (RS6000_BTC_ ## ATTR /* ATTR */ \ | RS6000_BTC_BINARY), \ CODE_FOR_ ## ICODE) /* ICODE */ @@ -1593,10 +1586,8 @@ BU_P8V_MISC_3 (BCDSUB_OV, "bcdsub_ov", CONST, bcdsub_unordered) BU_DFP_MISC_2 (PACK_TD, "pack_dec128", CONST, packtd) BU_DFP_MISC_2 (UNPACK_TD, "unpack_dec128", CONST, unpacktd) -BU_MISC_2 (PACK_TF, "pack_longdouble", CONST, packtf) -BU_MISC_2 (UNPACK_TF, "unpack_longdouble", CONST, unpacktf) -BU_MISC_1 (UNPACK_TF_0, "longdouble_dw0", CONST, unpacktf_0) -BU_MISC_1 (UNPACK_TF_1, "longdouble_dw1", CONST, unpacktf_1) +BU_LDBL128_2 (PACK_TF, "pack_longdouble", CONST, packtf) +BU_LDBL128_2 (UNPACK_TF, "unpack_longdouble", CONST, unpacktf) BU_P7_MISC_2 (PACK_V1TI, "pack_vector_int128", CONST, packv1ti) BU_P7_MISC_2 (UNPACK_V1TI, "unpack_vector_int128", CONST, unpackv1ti) diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h index 69bb26331..785f6ce1b 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h +++ b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h @@ -163,7 +163,7 @@ extern tree altivec_resolve_overloaded_builtin (location_t, tree, void *); extern rtx rs6000_libcall_value (enum machine_mode); extern rtx rs6000_va_arg (tree, tree); extern int function_ok_for_sibcall (tree); -extern int rs6000_reg_parm_stack_space (tree); +extern int rs6000_reg_parm_stack_space (tree, bool); extern void rs6000_elf_declare_function_name (FILE *, const char *, tree); extern bool rs6000_elf_in_small_data_p (const_tree); #ifdef ARGS_SIZE_RTX diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.c b/gcc-4.9/gcc/config/rs6000/rs6000.c index 4f1a399a2..bf67e7298 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000.c +++ b/gcc-4.9/gcc/config/rs6000/rs6000.c @@ -3037,7 +3037,8 @@ rs6000_builtin_mask_calculate (void) | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0) | ((TARGET_HTM) ? RS6000_BTM_HTM : 0) | ((TARGET_DFP) ? RS6000_BTM_DFP : 0) - | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)); + | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0) + | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)); } /* Override command line options. Mostly we process the processor type and @@ -6118,7 +6119,8 @@ mem_operand_gpr (rtx op, enum machine_mode mode) return false; extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; - gcc_assert (extra >= 0); + if (extra < 0) + extra = 0; if (GET_CODE (addr) == LO_SUM) /* For lo_sum addresses, we must allow any offset except one that @@ -10477,35 +10479,65 @@ rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type) list, or passes any parameter in memory. */ static bool -rs6000_function_parms_need_stack (tree fun) +rs6000_function_parms_need_stack (tree fun, bool incoming) { - function_args_iterator args_iter; - tree arg_type; + tree fntype, result; CUMULATIVE_ARGS args_so_far_v; cumulative_args_t args_so_far; if (!fun) /* Must be a libcall, all of which only use reg parms. */ return false; + + fntype = fun; if (!TYPE_P (fun)) - fun = TREE_TYPE (fun); + fntype = TREE_TYPE (fun); /* Varargs functions need the parameter save area. */ - if (!prototype_p (fun) || stdarg_p (fun)) + if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype)) return true; - INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fun, NULL_RTX); + INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX); args_so_far = pack_cumulative_args (&args_so_far_v); - if (aggregate_value_p (TREE_TYPE (fun), fun)) + /* When incoming, we will have been passed the function decl. + It is necessary to use the decl to handle K&R style functions, + where TYPE_ARG_TYPES may not be available. */ + if (incoming) { - tree type = build_pointer_type (TREE_TYPE (fun)); - rs6000_parm_needs_stack (args_so_far, type); + gcc_assert (DECL_P (fun)); + result = DECL_RESULT (fun); } + else + result = TREE_TYPE (fntype); - FOREACH_FUNCTION_ARGS (fun, arg_type, args_iter) - if (rs6000_parm_needs_stack (args_so_far, arg_type)) - return true; + if (result && aggregate_value_p (result, fntype)) + { + if (!TYPE_P (result)) + result = TREE_TYPE (result); + result = build_pointer_type (result); + rs6000_parm_needs_stack (args_so_far, result); + } + + if (incoming) + { + tree parm; + + for (parm = DECL_ARGUMENTS (fun); + parm && parm != void_list_node; + parm = TREE_CHAIN (parm)) + if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm))) + return true; + } + else + { + function_args_iterator args_iter; + tree arg_type; + + FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter) + if (rs6000_parm_needs_stack (args_so_far, arg_type)) + return true; + } return false; } @@ -10517,7 +10549,7 @@ rs6000_function_parms_need_stack (tree fun) all parameters in registers. */ int -rs6000_reg_parm_stack_space (tree fun) +rs6000_reg_parm_stack_space (tree fun, bool incoming) { int reg_parm_stack_space; @@ -10535,7 +10567,7 @@ rs6000_reg_parm_stack_space (tree fun) case ABI_ELFv2: /* ??? Recomputing this every time is a bit expensive. Is there a place to cache this information? */ - if (rs6000_function_parms_need_stack (fun)) + if (rs6000_function_parms_need_stack (fun, incoming)) reg_parm_stack_space = TARGET_64BIT ? 64 : 32; else reg_parm_stack_space = 0; @@ -13559,11 +13591,15 @@ rs6000_invalid_builtin (enum rs6000_builtins fncode) else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR)) == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR)) error ("Builtin function %s requires the -mhard-dfp and" - "-mpower8-vector options", name); + " -mpower8-vector options", name); else if ((fnmask & RS6000_BTM_DFP) != 0) error ("Builtin function %s requires the -mhard-dfp option", name); else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0) error ("Builtin function %s requires the -mpower8-vector option", name); + else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128)) + == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128)) + error ("Builtin function %s requires the -mhard-float and" + " -mlong-double-128 options", name); else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0) error ("Builtin function %s requires the -mhard-float option", name); else @@ -31313,6 +31349,7 @@ static struct rs6000_opt_mask const rs6000_builtin_mask_names[] = { "htm", RS6000_BTM_HTM, false, false }, { "hard-dfp", RS6000_BTM_DFP, false, false }, { "hard-float", RS6000_BTM_HARD_FLOAT, false, false }, + { "long-double-128", RS6000_BTM_LDBL128, false, false }, }; /* Option variables that we want to support inside attribute((target)) and diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.h b/gcc-4.9/gcc/config/rs6000/rs6000.h index 21330dc65..2b5d033f8 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000.h +++ b/gcc-4.9/gcc/config/rs6000/rs6000.h @@ -1602,7 +1602,14 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; /* Define this if stack space is still allocated for a parameter passed in a register. The value is the number of bytes allocated to this area. */ -#define REG_PARM_STACK_SPACE(FNDECL) rs6000_reg_parm_stack_space((FNDECL)) +#define REG_PARM_STACK_SPACE(FNDECL) \ + rs6000_reg_parm_stack_space ((FNDECL), false) + +/* Define this macro if space guaranteed when compiling a function body + is different to space required when making a call, a situation that + can arise with K&R style function definitions. */ +#define INCOMING_REG_PARM_STACK_SPACE(FNDECL) \ + rs6000_reg_parm_stack_space ((FNDECL), true) /* Define this if the above stack space is to be considered part of the space allocated by the caller. */ @@ -2501,8 +2508,8 @@ extern int frame_pointer_needed; #define RS6000_BTC_SAT RS6000_BTC_MISC /* saturate sets VSCR. */ /* Builtin targets. For now, we reuse the masks for those options that are in - target flags, and pick two random bits for SPE and paired which aren't in - target_flags. */ + target flags, and pick three random bits for SPE, paired and ldbl128 which + aren't in target_flags. */ #define RS6000_BTM_ALWAYS 0 /* Always enabled. */ #define RS6000_BTM_ALTIVEC MASK_ALTIVEC /* VMX/altivec vectors. */ #define RS6000_BTM_VSX MASK_VSX /* VSX (vector/scalar). */ @@ -2519,6 +2526,7 @@ extern int frame_pointer_needed; #define RS6000_BTM_CELL MASK_FPRND /* Target is cell powerpc. */ #define RS6000_BTM_DFP MASK_DFP /* Decimal floating point. */ #define RS6000_BTM_HARD_FLOAT MASK_SOFT_FLOAT /* Hardware floating point. */ +#define RS6000_BTM_LDBL128 MASK_MULTIPLE /* 128-bit long double. */ #define RS6000_BTM_COMMON (RS6000_BTM_ALTIVEC \ | RS6000_BTM_VSX \ @@ -2532,7 +2540,8 @@ extern int frame_pointer_needed; | RS6000_BTM_POPCNTD \ | RS6000_BTM_CELL \ | RS6000_BTM_DFP \ - | RS6000_BTM_HARD_FLOAT) + | RS6000_BTM_HARD_FLOAT \ + | RS6000_BTM_LDBL128) /* Define builtin enum index. */ diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.md b/gcc-4.9/gcc/config/rs6000/rs6000.md index e853bc4f9..26d0d1530 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000.md +++ b/gcc-4.9/gcc/config/rs6000/rs6000.md @@ -746,7 +746,7 @@ (define_insn "*extendsidi2_lfiwax" [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wu") - (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))] + (sign_extend:DI (match_operand:SI 1 "lwa_operand" "Y,r,r,Z,Z")))] "TARGET_POWERPC64 && TARGET_LFIWAX" "@ lwa%U1%X1 %0,%1 @@ -769,7 +769,7 @@ (define_insn "*extendsidi2_nocell" [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") - (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))] + (sign_extend:DI (match_operand:SI 1 "lwa_operand" "Y,r")))] "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX" "@ lwa%U1%X1 %0,%1 @@ -15806,26 +15806,6 @@ "" "") -;; The Advance Toolchain 7.0-3 added private builtins: __builtin_longdouble_dw0 -;; and __builtin_longdouble_dw1 to optimize glibc. Add support for these -;; builtins here. - -(define_expand "unpacktf_0" - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (unspec:DF [(match_operand:TF 1 "register_operand" "") - (const_int 0)] - UNSPEC_UNPACK_128BIT))] - "" - "") - -(define_expand "unpacktf_1" - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (unspec:DF [(match_operand:TF 1 "register_operand" "") - (const_int 1)] - UNSPEC_UNPACK_128BIT))] - "" - "") - (define_insn_and_split "unpack_dm" [(set (match_operand: 0 "nonimmediate_operand" "=d,m,d,r,m") (unspec: diff --git a/gcc-4.9/gcc/config/rs6000/sysv4.h b/gcc-4.9/gcc/config/rs6000/sysv4.h index 3fb6bd59f..afbd2892e 100644 --- a/gcc-4.9/gcc/config/rs6000/sysv4.h +++ b/gcc-4.9/gcc/config/rs6000/sysv4.h @@ -761,7 +761,10 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN) #define LINK_START_LINUX_SPEC "" -#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1" +#ifndef RUNTIME_ROOT_PREFIX +#define RUNTIME_ROOT_PREFIX "" +#endif +#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld.so.1" #define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0" #if DEFAULT_LIBC == LIBC_UCLIBC #define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}" @@ -843,6 +846,11 @@ ncrtn.o%s" #define CPP_OS_OPENBSD_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}" #endif +/* These may be provided by rs6000/linux-grtev2.h. */ +#ifndef LINUX_GRTE_EXTRA_SPECS +#define LINUX_GRTE_EXTRA_SPECS +#endif + /* Define any extra SPECS that the compiler needs to generate. */ /* Override rs6000.h definition. */ #undef SUBTARGET_EXTRA_SPECS @@ -908,6 +916,7 @@ ncrtn.o%s" { "cpp_os_openbsd", CPP_OS_OPENBSD_SPEC }, \ { "cpp_os_default", CPP_OS_DEFAULT_SPEC }, \ { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }, \ + LINUX_GRTE_EXTRA_SPECS \ SUBSUBTARGET_EXTRA_SPECS #define SUBSUBTARGET_EXTRA_SPECS diff --git a/gcc-4.9/gcc/config/rs6000/vsx.md b/gcc-4.9/gcc/config/rs6000/vsx.md index 23d85ab06..6d20eab11 100644 --- a/gcc-4.9/gcc/config/rs6000/vsx.md +++ b/gcc-4.9/gcc/config/rs6000/vsx.md @@ -24,6 +24,13 @@ ;; Iterator for the 2 64-bit vector types (define_mode_iterator VSX_D [V2DF V2DI]) +;; Iterator for the 2 64-bit vector types + 128-bit types that are loaded with +;; lxvd2x to properly handle swapping words on little endian +(define_mode_iterator VSX_LE [V2DF + V2DI + V1TI + (TI "VECTOR_MEM_VSX_P (TImode)")]) + ;; Iterator for the 2 32-bit vector types (define_mode_iterator VSX_W [V4SF V4SI]) @@ -228,8 +235,8 @@ ;; The patterns for LE permuted loads and stores come before the general ;; VSX moves so they match first. (define_insn_and_split "*vsx_le_perm_load_" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") - (match_operand:VSX_D 1 "memory_operand" "Z"))] + [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=wa") + (match_operand:VSX_LE 1 "memory_operand" "Z"))] "!BYTES_BIG_ENDIAN && TARGET_VSX" "#" "!BYTES_BIG_ENDIAN && TARGET_VSX" @@ -342,16 +349,16 @@ (set_attr "length" "8")]) (define_insn "*vsx_le_perm_store_" - [(set (match_operand:VSX_D 0 "memory_operand" "=Z") - (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))] + [(set (match_operand:VSX_LE 0 "memory_operand" "=Z") + (match_operand:VSX_LE 1 "vsx_register_operand" "+wa"))] "!BYTES_BIG_ENDIAN && TARGET_VSX" "#" [(set_attr "type" "vecstore") (set_attr "length" "12")]) (define_split - [(set (match_operand:VSX_D 0 "memory_operand" "") - (match_operand:VSX_D 1 "vsx_register_operand" ""))] + [(set (match_operand:VSX_LE 0 "memory_operand" "") + (match_operand:VSX_LE 1 "vsx_register_operand" ""))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed" [(set (match_dup 2) (vec_select: @@ -369,8 +376,8 @@ ;; The post-reload split requires that we re-permute the source ;; register in case it is still live. (define_split - [(set (match_operand:VSX_D 0 "memory_operand" "") - (match_operand:VSX_D 1 "vsx_register_operand" ""))] + [(set (match_operand:VSX_LE 0 "memory_operand" "") + (match_operand:VSX_LE 1 "vsx_register_operand" ""))] "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed" [(set (match_dup 1) (vec_select: @@ -1352,9 +1359,9 @@ ;; xxpermdi for little endian loads and stores. We need several of ;; these since the form of the PARALLEL differs by mode. (define_insn "*vsx_xxpermdi2_le_" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") - (vec_select:VSX_D - (match_operand:VSX_D 1 "vsx_register_operand" "wa") + [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=wa") + (vec_select:VSX_LE + (match_operand:VSX_LE 1 "vsx_register_operand" "wa") (parallel [(const_int 1) (const_int 0)])))] "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (mode)" "xxpermdi %x0,%x1,%x1,2" @@ -1401,9 +1408,9 @@ ;; lxvd2x for little endian loads. We need several of ;; these since the form of the PARALLEL differs by mode. (define_insn "*vsx_lxvd2x2_le_" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") - (vec_select:VSX_D - (match_operand:VSX_D 1 "memory_operand" "Z") + [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=wa") + (vec_select:VSX_LE + (match_operand:VSX_LE 1 "memory_operand" "Z") (parallel [(const_int 1) (const_int 0)])))] "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (mode)" "lxvd2x %x0,%y1" @@ -1450,9 +1457,9 @@ ;; stxvd2x for little endian stores. We need several of ;; these since the form of the PARALLEL differs by mode. (define_insn "*vsx_stxvd2x2_le_" - [(set (match_operand:VSX_D 0 "memory_operand" "=Z") - (vec_select:VSX_D - (match_operand:VSX_D 1 "vsx_register_operand" "wa") + [(set (match_operand:VSX_LE 0 "memory_operand" "=Z") + (vec_select:VSX_LE + (match_operand:VSX_LE 1 "vsx_register_operand" "wa") (parallel [(const_int 1) (const_int 0)])))] "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (mode)" "stxvd2x %x1,%y0" @@ -1683,7 +1690,7 @@ { if (GET_CODE (op3) == SCRATCH) op3 = gen_reg_rtx (V4SFmode); - emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, op2)); + emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele))); tmp = op3; } emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp)); diff --git a/gcc-4.9/gcc/config/rtems.h b/gcc-4.9/gcc/config/rtems.h index 3da27c57e..f14aed36a 100644 --- a/gcc-4.9/gcc/config/rtems.h +++ b/gcc-4.9/gcc/config/rtems.h @@ -13,8 +13,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* The system headers under RTEMS are C++-aware. */ diff --git a/gcc-4.9/gcc/config/sol2-clearcap.map b/gcc-4.9/gcc/config/sol2-clearcap.map new file mode 100644 index 000000000..2d880c983 --- /dev/null +++ b/gcc-4.9/gcc/config/sol2-clearcap.map @@ -0,0 +1,2 @@ +# Clear all hardware capabilities emitted by Sun as. +hwcap_1 = V0x0 OVERRIDE; diff --git a/gcc-4.9/gcc/config/sol2-clearcapv2.map b/gcc-4.9/gcc/config/sol2-clearcapv2.map new file mode 100644 index 000000000..3c0cacedb --- /dev/null +++ b/gcc-4.9/gcc/config/sol2-clearcapv2.map @@ -0,0 +1,7 @@ +# Clear all hardware capabilities emitted by Sun as. +# +# Uses mapfile v2 syntax which is the only way to clear AT_SUN_CAP_HW2 flags. +$mapfile_version 2 +CAPABILITY { + HW = ; +}; diff --git a/gcc-4.9/gcc/config/sol2.h b/gcc-4.9/gcc/config/sol2.h index eb8e0b91f..db7844b29 100644 --- a/gcc-4.9/gcc/config/sol2.h +++ b/gcc-4.9/gcc/config/sol2.h @@ -183,12 +183,21 @@ along with GCC; see the file COPYING3. If not see #define LINK_LIBGCC_MAPFILE_SPEC "" #endif +/* Clear hardware capabilities, either explicitly or with OpenMP: + #pragma openmp declare simd creates clones for SSE2, AVX, and AVX2. */ +#ifdef HAVE_LD_CLEARCAP +#define LINK_CLEARCAP_SPEC " %{mclear-hwcap|fopenmp*:-M %sclearcap.map}" +#else +#define LINK_CLEARCAP_SPEC "" +#endif + #undef LINK_SPEC #define LINK_SPEC \ "%{h*} %{v:-V} \ %{!shared:%{!static:%{rdynamic: " RDYNAMIC_SPEC "}}} \ %{static:-dn -Bstatic} \ - %{shared:-G -dy %{!mimpure-text:-z text}} " LINK_LIBGCC_MAPFILE_SPEC " \ + %{shared:-G -dy %{!mimpure-text:-z text}} " \ + LINK_LIBGCC_MAPFILE_SPEC LINK_CLEARCAP_SPEC " \ %{symbolic:-Bsymbolic -G -dy -z text} \ %(link_arch) \ %{Qy:} %{!Qn:-Qy}" diff --git a/gcc-4.9/gcc/config/sol2.opt b/gcc-4.9/gcc/config/sol2.opt index a5ae7c510..16a3e5f58 100644 --- a/gcc-4.9/gcc/config/sol2.opt +++ b/gcc-4.9/gcc/config/sol2.opt @@ -27,6 +27,10 @@ Driver Joined Ym, Driver Joined +mclear-hwcap +Target Report +Clear hardware capabilities when linking + mimpure-text Target Report Pass -z text to linker diff --git a/gcc-4.9/gcc/config/t-sol2 b/gcc-4.9/gcc/config/t-sol2 index a4c4af4ad..25feb0486 100644 --- a/gcc-4.9/gcc/config/t-sol2 +++ b/gcc-4.9/gcc/config/t-sol2 @@ -35,3 +35,10 @@ sol2-stubs.o: $(srcdir)/config/sol2-stubs.c sol2.o: $(srcdir)/config/sol2.c $(COMPILE) $< $(POSTCOMPILE) + +# Install clearcap.map if present. +install: install-clearcap-map + +# Ignore failures: file only exists if linker supports it. +install-clearcap-map: + -$(INSTALL_DATA) clearcap.map $(DESTDIR)$(libdir) diff --git a/gcc-4.9/gcc/config/v850/rtems.h b/gcc-4.9/gcc/config/v850/rtems.h index 01dff3e52..42ebb9fc0 100644 --- a/gcc-4.9/gcc/config/v850/rtems.h +++ b/gcc-4.9/gcc/config/v850/rtems.h @@ -13,8 +13,13 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* Specify predefined symbols in preprocessor. */ diff --git a/gcc-4.9/gcc/config/v850/v850-opts.h b/gcc-4.9/gcc/config/v850/v850-opts.h index a91b1b059..c3d9b7aac 100644 --- a/gcc-4.9/gcc/config/v850/v850-opts.h +++ b/gcc-4.9/gcc/config/v850/v850-opts.h @@ -13,8 +13,13 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #ifndef V850_OPTS_H diff --git a/gcc-4.9/gcc/config/v850/v850.h b/gcc-4.9/gcc/config/v850/v850.h index 92db20a44..6ccdc5d65 100644 --- a/gcc-4.9/gcc/config/v850/v850.h +++ b/gcc-4.9/gcc/config/v850/v850.h @@ -14,8 +14,13 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #ifndef GCC_V850_H -- cgit v1.2.3