diff options
author | Ben Cheng <bccheng@google.com> | 2014-03-25 22:37:19 -0700 |
---|---|---|
committer | Ben Cheng <bccheng@google.com> | 2014-03-25 22:37:19 -0700 |
commit | 1bc5aee63eb72b341f506ad058502cd0361f0d10 (patch) | |
tree | c607e8252f3405424ff15bc2d00aa38dadbb2518 /gcc-4.9/gcc/config | |
parent | 283a0bf58fcf333c58a2a92c3ebbc41fb9eb1fdb (diff) | |
download | toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.gz toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.bz2 toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.zip |
Initial checkin of GCC 4.9.0 from trunk (r208799).
Change-Id: I48a3c08bb98542aa215912a75f03c0890e497dba
Diffstat (limited to 'gcc-4.9/gcc/config')
1354 files changed, 1042837 insertions, 0 deletions
diff --git a/gcc-4.9/gcc/config/README b/gcc-4.9/gcc/config/README new file mode 100644 index 000000000..60328ec5b --- /dev/null +++ b/gcc-4.9/gcc/config/README @@ -0,0 +1,5 @@ +This directory contains machine-specific files for the GNU C compiler. +It has a subdirectory for each basic CPU type. +The only files in this directory itself +are some .h files that pertain to particular operating systems +and are used for more than one CPU type. diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-arches.def b/gcc-4.9/gcc/config/aarch64/aarch64-arches.def new file mode 100644 index 000000000..4b796d8c9 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-arches.def @@ -0,0 +1,29 @@ +/* Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Before using #include to read this file, define a macro: + + AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) + + The NAME is the name of the architecture, represented as a string + constant. The CORE is the identifier for a core representative of + this architecture. ARCH is the architecture revision. FLAGS are + the flags implied by the architecture. */ + +AARCH64_ARCH("armv8-a", generic, 8, AARCH64_FL_FOR_ARCH8) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c new file mode 100644 index 000000000..55cfe0ab2 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c @@ -0,0 +1,1253 @@ +/* Builtins' description for AArch64 SIMD architecture. + Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "stor-layout.h" +#include "stringpool.h" +#include "calls.h" +#include "expr.h" +#include "tm_p.h" +#include "recog.h" +#include "langhooks.h" +#include "diagnostic-core.h" +#include "optabs.h" +#include "pointer-set.h" +#include "hash-table.h" +#include "vec.h" +#include "ggc.h" +#include "basic-block.h" +#include "tree-ssa-alias.h" +#include "internal-fn.h" +#include "gimple-fold.h" +#include "tree-eh.h" +#include "gimple-expr.h" +#include "is-a.h" +#include "gimple.h" +#include "gimple-iterator.h" + +enum aarch64_simd_builtin_type_mode +{ + T_V8QI, + T_V4HI, + T_V2SI, + T_V2SF, + T_DI, + T_DF, + T_V16QI, + T_V8HI, + T_V4SI, + T_V4SF, + T_V2DI, + T_V2DF, + T_TI, + T_EI, + T_OI, + T_XI, + T_SI, + T_SF, + T_HI, + T_QI, + T_MAX +}; + +#define v8qi_UP T_V8QI +#define v4hi_UP T_V4HI +#define v2si_UP T_V2SI +#define v2sf_UP T_V2SF +#define di_UP T_DI +#define df_UP T_DF +#define v16qi_UP T_V16QI +#define v8hi_UP T_V8HI +#define v4si_UP T_V4SI +#define v4sf_UP T_V4SF +#define v2di_UP T_V2DI +#define v2df_UP T_V2DF +#define ti_UP T_TI +#define ei_UP T_EI +#define oi_UP T_OI +#define xi_UP T_XI +#define si_UP T_SI +#define sf_UP T_SF +#define hi_UP T_HI +#define qi_UP T_QI + +#define UP(X) X##_UP + +#define SIMD_MAX_BUILTIN_ARGS 5 + +enum aarch64_type_qualifiers +{ + /* T foo. */ + qualifier_none = 0x0, + /* unsigned T foo. */ + qualifier_unsigned = 0x1, /* 1 << 0 */ + /* const T foo. */ + qualifier_const = 0x2, /* 1 << 1 */ + /* T *foo. */ + qualifier_pointer = 0x4, /* 1 << 2 */ + /* const T *foo. */ + qualifier_const_pointer = 0x6, /* qualifier_const | qualifier_pointer */ + /* Used when expanding arguments if an operand could + be an immediate. */ + qualifier_immediate = 0x8, /* 1 << 3 */ + qualifier_maybe_immediate = 0x10, /* 1 << 4 */ + /* void foo (...). */ + qualifier_void = 0x20, /* 1 << 5 */ + /* Some patterns may have internal operands, this qualifier is an + instruction to the initialisation code to skip this operand. */ + qualifier_internal = 0x40, /* 1 << 6 */ + /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum + rather than using the type of the operand. */ + qualifier_map_mode = 0x80, /* 1 << 7 */ + /* qualifier_pointer | qualifier_map_mode */ + qualifier_pointer_map_mode = 0x84, + /* qualifier_const_pointer | qualifier_map_mode */ + qualifier_const_pointer_map_mode = 0x86, + /* Polynomial types. */ + qualifier_poly = 0x100 +}; + +typedef struct +{ + const char *name; + enum aarch64_simd_builtin_type_mode mode; + const enum insn_code code; + unsigned int fcode; + enum aarch64_type_qualifiers *qualifiers; +} aarch64_simd_builtin_datum; + +static enum aarch64_type_qualifiers +aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none }; +#define TYPES_UNOP (aarch64_types_unop_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned }; +#define TYPES_UNOPU (aarch64_types_unopu_qualifiers) +#define TYPES_CREATE (aarch64_types_unop_qualifiers) +#define TYPES_REINTERP (aarch64_types_unop_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_maybe_immediate }; +#define TYPES_BINOP (aarch64_types_binop_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned }; +#define TYPES_BINOPU (aarch64_types_binopu_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_poly, qualifier_poly, qualifier_poly }; +#define TYPES_BINOPP (aarch64_types_binopp_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, qualifier_none }; +#define TYPES_TERNOP (aarch64_types_ternop_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, + qualifier_unsigned, qualifier_unsigned }; +#define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_quadop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, + qualifier_none, qualifier_none }; +#define TYPES_QUADOP (aarch64_types_quadop_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_immediate }; +#define TYPES_GETLANE (aarch64_types_getlane_qualifiers) +#define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; +#define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate }; +#define TYPES_SETLANE (aarch64_types_setlane_qualifiers) +#define TYPES_SHIFTINSERT (aarch64_types_setlane_qualifiers) +#define TYPES_SHIFTACC (aarch64_types_setlane_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none }; +#define TYPES_COMBINE (aarch64_types_combine_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_const_pointer_map_mode }; +#define TYPES_LOAD1 (aarch64_types_load1_qualifiers) +#define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_poly, qualifier_unsigned, + qualifier_poly, qualifier_poly }; +#define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_unsigned, + qualifier_none, qualifier_none }; +#define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, + qualifier_unsigned, qualifier_unsigned }; +#define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers) + +/* The first argument (return type) of a store should be void type, + which we represent with qualifier_void. Their first operand will be + a DImode pointer to the location to store to, so we must use + qualifier_map_mode | qualifier_pointer to build a pointer to the + element type of the vector. */ +static enum aarch64_type_qualifiers +aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_pointer_map_mode, qualifier_none }; +#define TYPES_STORE1 (aarch64_types_store1_qualifiers) +#define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers) + +#define CF0(N, X) CODE_FOR_aarch64_##N##X +#define CF1(N, X) CODE_FOR_##N##X##1 +#define CF2(N, X) CODE_FOR_##N##X##2 +#define CF3(N, X) CODE_FOR_##N##X##3 +#define CF4(N, X) CODE_FOR_##N##X##4 +#define CF10(N, X) CODE_FOR_##N##X + +#define VAR1(T, N, MAP, A) \ + {#N, UP (A), CF##MAP (N, A), 0, TYPES_##T}, +#define VAR2(T, N, MAP, A, B) \ + VAR1 (T, N, MAP, A) \ + VAR1 (T, N, MAP, B) +#define VAR3(T, N, MAP, A, B, C) \ + VAR2 (T, N, MAP, A, B) \ + VAR1 (T, N, MAP, C) +#define VAR4(T, N, MAP, A, B, C, D) \ + VAR3 (T, N, MAP, A, B, C) \ + VAR1 (T, N, MAP, D) +#define VAR5(T, N, MAP, A, B, C, D, E) \ + VAR4 (T, N, MAP, A, B, C, D) \ + VAR1 (T, N, MAP, E) +#define VAR6(T, N, MAP, A, B, C, D, E, F) \ + VAR5 (T, N, MAP, A, B, C, D, E) \ + VAR1 (T, N, MAP, F) +#define VAR7(T, N, MAP, A, B, C, D, E, F, G) \ + VAR6 (T, N, MAP, A, B, C, D, E, F) \ + VAR1 (T, N, MAP, G) +#define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \ + VAR7 (T, N, MAP, A, B, C, D, E, F, G) \ + VAR1 (T, N, MAP, H) +#define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \ + VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \ + VAR1 (T, N, MAP, I) +#define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ + VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \ + VAR1 (T, N, MAP, J) +#define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ + VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ + VAR1 (T, N, MAP, K) +#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \ + VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ + VAR1 (T, N, MAP, L) + +/* BUILTIN_<ITERATOR> macros should expand to cover the same range of + modes as is given for each define_mode_iterator in + config/aarch64/iterators.md. */ + +#define BUILTIN_DX(T, N, MAP) \ + VAR2 (T, N, MAP, di, df) +#define BUILTIN_GPF(T, N, MAP) \ + VAR2 (T, N, MAP, sf, df) +#define BUILTIN_SDQ_I(T, N, MAP) \ + VAR4 (T, N, MAP, qi, hi, si, di) +#define BUILTIN_SD_HSI(T, N, MAP) \ + VAR2 (T, N, MAP, hi, si) +#define BUILTIN_V2F(T, N, MAP) \ + VAR2 (T, N, MAP, v2sf, v2df) +#define BUILTIN_VALL(T, N, MAP) \ + VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \ + v4si, v2di, v2sf, v4sf, v2df) +#define BUILTIN_VALLDI(T, N, MAP) \ + VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \ + v4si, v2di, v2sf, v4sf, v2df, di) +#define BUILTIN_VALLDIF(T, N, MAP) \ + VAR12 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \ + v4si, v2di, v2sf, v4sf, v2df, di, df) +#define BUILTIN_VB(T, N, MAP) \ + VAR2 (T, N, MAP, v8qi, v16qi) +#define BUILTIN_VD(T, N, MAP) \ + VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf) +#define BUILTIN_VDC(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df) +#define BUILTIN_VDIC(T, N, MAP) \ + VAR3 (T, N, MAP, v8qi, v4hi, v2si) +#define BUILTIN_VDN(T, N, MAP) \ + VAR3 (T, N, MAP, v4hi, v2si, di) +#define BUILTIN_VDQ(T, N, MAP) \ + VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) +#define BUILTIN_VDQF(T, N, MAP) \ + VAR3 (T, N, MAP, v2sf, v4sf, v2df) +#define BUILTIN_VDQH(T, N, MAP) \ + VAR2 (T, N, MAP, v4hi, v8hi) +#define BUILTIN_VDQHS(T, N, MAP) \ + VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VDQIF(T, N, MAP) \ + VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df) +#define BUILTIN_VDQM(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VDQV(T, N, MAP) \ + VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si) +#define BUILTIN_VDQQH(T, N, MAP) \ + VAR4 (T, N, MAP, v8qi, v16qi, v4hi, v8hi) +#define BUILTIN_VDQ_BHSI(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VDQ_I(T, N, MAP) \ + VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) +#define BUILTIN_VDW(T, N, MAP) \ + VAR3 (T, N, MAP, v8qi, v4hi, v2si) +#define BUILTIN_VD_BHSI(T, N, MAP) \ + VAR3 (T, N, MAP, v8qi, v4hi, v2si) +#define BUILTIN_VD_HSI(T, N, MAP) \ + VAR2 (T, N, MAP, v4hi, v2si) +#define BUILTIN_VD_RE(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df) +#define BUILTIN_VQ(T, N, MAP) \ + VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df) +#define BUILTIN_VQN(T, N, MAP) \ + VAR3 (T, N, MAP, v8hi, v4si, v2di) +#define BUILTIN_VQW(T, N, MAP) \ + VAR3 (T, N, MAP, v16qi, v8hi, v4si) +#define BUILTIN_VQ_HSI(T, N, MAP) \ + VAR2 (T, N, MAP, v8hi, v4si) +#define BUILTIN_VQ_S(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VSDQ_HSI(T, N, MAP) \ + VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si) +#define BUILTIN_VSDQ_I(T, N, MAP) \ + VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di) +#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \ + VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si) +#define BUILTIN_VSDQ_I_DI(T, N, MAP) \ + VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di) +#define BUILTIN_VSD_HSI(T, N, MAP) \ + VAR4 (T, N, MAP, v4hi, v2si, hi, si) +#define BUILTIN_VSQN_HSDI(T, N, MAP) \ + VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di) +#define BUILTIN_VSTRUCT(T, N, MAP) \ + VAR3 (T, N, MAP, oi, ci, xi) + +static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { +#include "aarch64-simd-builtins.def" +}; + +#undef VAR1 +#define VAR1(T, N, MAP, A) \ + AARCH64_SIMD_BUILTIN_##T##_##N##A, + +enum aarch64_builtins +{ + AARCH64_BUILTIN_MIN, + AARCH64_SIMD_BUILTIN_BASE, +#include "aarch64-simd-builtins.def" + AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE + + ARRAY_SIZE (aarch64_simd_builtin_data), + AARCH64_BUILTIN_MAX +}; + +static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; + +#define NUM_DREG_TYPES 6 +#define NUM_QREG_TYPES 6 + +/* Return a tree for a signed or unsigned argument of either + the mode specified by MODE, or the inner mode of MODE. */ +tree +aarch64_build_scalar_type (enum machine_mode mode, + bool unsigned_p, + bool poly_p) +{ +#undef INT_TYPES +#define INT_TYPES \ + AARCH64_TYPE_BUILDER (QI) \ + AARCH64_TYPE_BUILDER (HI) \ + AARCH64_TYPE_BUILDER (SI) \ + AARCH64_TYPE_BUILDER (DI) \ + AARCH64_TYPE_BUILDER (EI) \ + AARCH64_TYPE_BUILDER (OI) \ + AARCH64_TYPE_BUILDER (CI) \ + AARCH64_TYPE_BUILDER (XI) \ + AARCH64_TYPE_BUILDER (TI) \ + +/* Statically declare all the possible types we might need. */ +#undef AARCH64_TYPE_BUILDER +#define AARCH64_TYPE_BUILDER(X) \ + static tree X##_aarch64_type_node_p = NULL; \ + static tree X##_aarch64_type_node_s = NULL; \ + static tree X##_aarch64_type_node_u = NULL; + + INT_TYPES + + static tree float_aarch64_type_node = NULL; + static tree double_aarch64_type_node = NULL; + + gcc_assert (!VECTOR_MODE_P (mode)); + +/* If we've already initialised this type, don't initialise it again, + otherwise ask for a new type of the correct size. */ +#undef AARCH64_TYPE_BUILDER +#define AARCH64_TYPE_BUILDER(X) \ + case X##mode: \ + if (unsigned_p) \ + return (X##_aarch64_type_node_u \ + ? X##_aarch64_type_node_u \ + : X##_aarch64_type_node_u \ + = make_unsigned_type (GET_MODE_PRECISION (mode))); \ + else if (poly_p) \ + return (X##_aarch64_type_node_p \ + ? X##_aarch64_type_node_p \ + : X##_aarch64_type_node_p \ + = make_unsigned_type (GET_MODE_PRECISION (mode))); \ + else \ + return (X##_aarch64_type_node_s \ + ? X##_aarch64_type_node_s \ + : X##_aarch64_type_node_s \ + = make_signed_type (GET_MODE_PRECISION (mode))); \ + break; + + switch (mode) + { + INT_TYPES + case SFmode: + if (!float_aarch64_type_node) + { + float_aarch64_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (float_aarch64_type_node) = FLOAT_TYPE_SIZE; + layout_type (float_aarch64_type_node); + } + return float_aarch64_type_node; + break; + case DFmode: + if (!double_aarch64_type_node) + { + double_aarch64_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (double_aarch64_type_node) = DOUBLE_TYPE_SIZE; + layout_type (double_aarch64_type_node); + } + return double_aarch64_type_node; + break; + default: + gcc_unreachable (); + } +} + +tree +aarch64_build_vector_type (enum machine_mode mode, + bool unsigned_p, + bool poly_p) +{ + tree eltype; + +#define VECTOR_TYPES \ + AARCH64_TYPE_BUILDER (V16QI) \ + AARCH64_TYPE_BUILDER (V8HI) \ + AARCH64_TYPE_BUILDER (V4SI) \ + AARCH64_TYPE_BUILDER (V2DI) \ + AARCH64_TYPE_BUILDER (V8QI) \ + AARCH64_TYPE_BUILDER (V4HI) \ + AARCH64_TYPE_BUILDER (V2SI) \ + \ + AARCH64_TYPE_BUILDER (V4SF) \ + AARCH64_TYPE_BUILDER (V2DF) \ + AARCH64_TYPE_BUILDER (V2SF) \ +/* Declare our "cache" of values. */ +#undef AARCH64_TYPE_BUILDER +#define AARCH64_TYPE_BUILDER(X) \ + static tree X##_aarch64_type_node_s = NULL; \ + static tree X##_aarch64_type_node_u = NULL; \ + static tree X##_aarch64_type_node_p = NULL; + + VECTOR_TYPES + + gcc_assert (VECTOR_MODE_P (mode)); + +#undef AARCH64_TYPE_BUILDER +#define AARCH64_TYPE_BUILDER(X) \ + case X##mode: \ + if (unsigned_p) \ + return X##_aarch64_type_node_u \ + ? X##_aarch64_type_node_u \ + : X##_aarch64_type_node_u \ + = build_vector_type_for_mode (aarch64_build_scalar_type \ + (GET_MODE_INNER (mode), \ + unsigned_p, poly_p), mode); \ + else if (poly_p) \ + return X##_aarch64_type_node_p \ + ? X##_aarch64_type_node_p \ + : X##_aarch64_type_node_p \ + = build_vector_type_for_mode (aarch64_build_scalar_type \ + (GET_MODE_INNER (mode), \ + unsigned_p, poly_p), mode); \ + else \ + return X##_aarch64_type_node_s \ + ? X##_aarch64_type_node_s \ + : X##_aarch64_type_node_s \ + = build_vector_type_for_mode (aarch64_build_scalar_type \ + (GET_MODE_INNER (mode), \ + unsigned_p, poly_p), mode); \ + break; + + switch (mode) + { + default: + eltype = aarch64_build_scalar_type (GET_MODE_INNER (mode), + unsigned_p, poly_p); + return build_vector_type_for_mode (eltype, mode); + break; + VECTOR_TYPES + } +} + +tree +aarch64_build_type (enum machine_mode mode, bool unsigned_p, bool poly_p) +{ + if (VECTOR_MODE_P (mode)) + return aarch64_build_vector_type (mode, unsigned_p, poly_p); + else + return aarch64_build_scalar_type (mode, unsigned_p, poly_p); +} + +tree +aarch64_build_signed_type (enum machine_mode mode) +{ + return aarch64_build_type (mode, false, false); +} + +tree +aarch64_build_unsigned_type (enum machine_mode mode) +{ + return aarch64_build_type (mode, true, false); +} + +tree +aarch64_build_poly_type (enum machine_mode mode) +{ + return aarch64_build_type (mode, false, true); +} + +static void +aarch64_init_simd_builtins (void) +{ + unsigned int i, fcode = AARCH64_SIMD_BUILTIN_BASE + 1; + + /* Signed scalar type nodes. */ + tree aarch64_simd_intQI_type_node = aarch64_build_signed_type (QImode); + tree aarch64_simd_intHI_type_node = aarch64_build_signed_type (HImode); + tree aarch64_simd_intSI_type_node = aarch64_build_signed_type (SImode); + tree aarch64_simd_intDI_type_node = aarch64_build_signed_type (DImode); + tree aarch64_simd_intTI_type_node = aarch64_build_signed_type (TImode); + tree aarch64_simd_intEI_type_node = aarch64_build_signed_type (EImode); + tree aarch64_simd_intOI_type_node = aarch64_build_signed_type (OImode); + tree aarch64_simd_intCI_type_node = aarch64_build_signed_type (CImode); + tree aarch64_simd_intXI_type_node = aarch64_build_signed_type (XImode); + + /* Unsigned scalar type nodes. */ + tree aarch64_simd_intUQI_type_node = aarch64_build_unsigned_type (QImode); + tree aarch64_simd_intUHI_type_node = aarch64_build_unsigned_type (HImode); + tree aarch64_simd_intUSI_type_node = aarch64_build_unsigned_type (SImode); + tree aarch64_simd_intUDI_type_node = aarch64_build_unsigned_type (DImode); + + /* Poly scalar type nodes. */ + tree aarch64_simd_polyQI_type_node = aarch64_build_poly_type (QImode); + tree aarch64_simd_polyHI_type_node = aarch64_build_poly_type (HImode); + tree aarch64_simd_polyDI_type_node = aarch64_build_poly_type (DImode); + tree aarch64_simd_polyTI_type_node = aarch64_build_poly_type (TImode); + + /* Float type nodes. */ + tree aarch64_simd_float_type_node = aarch64_build_signed_type (SFmode); + tree aarch64_simd_double_type_node = aarch64_build_signed_type (DFmode); + + /* Define typedefs which exactly correspond to the modes we are basing vector + types on. If you change these names you'll need to change + the table used by aarch64_mangle_type too. */ + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intQI_type_node, + "__builtin_aarch64_simd_qi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intHI_type_node, + "__builtin_aarch64_simd_hi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intSI_type_node, + "__builtin_aarch64_simd_si"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_float_type_node, + "__builtin_aarch64_simd_sf"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intDI_type_node, + "__builtin_aarch64_simd_di"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_double_type_node, + "__builtin_aarch64_simd_df"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyQI_type_node, + "__builtin_aarch64_simd_poly8"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyHI_type_node, + "__builtin_aarch64_simd_poly16"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyDI_type_node, + "__builtin_aarch64_simd_poly64"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyTI_type_node, + "__builtin_aarch64_simd_poly128"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intTI_type_node, + "__builtin_aarch64_simd_ti"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intEI_type_node, + "__builtin_aarch64_simd_ei"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intOI_type_node, + "__builtin_aarch64_simd_oi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intCI_type_node, + "__builtin_aarch64_simd_ci"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intXI_type_node, + "__builtin_aarch64_simd_xi"); + + /* Unsigned integer types for various mode sizes. */ + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUQI_type_node, + "__builtin_aarch64_simd_uqi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUHI_type_node, + "__builtin_aarch64_simd_uhi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUSI_type_node, + "__builtin_aarch64_simd_usi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUDI_type_node, + "__builtin_aarch64_simd_udi"); + + for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) + { + bool print_type_signature_p = false; + char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 }; + aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i]; + const char *const modenames[] = + { + "v8qi", "v4hi", "v2si", "v2sf", "di", "df", + "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df", + "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi" + }; + const enum machine_mode modes[] = + { + V8QImode, V4HImode, V2SImode, V2SFmode, DImode, DFmode, + V16QImode, V8HImode, V4SImode, V4SFmode, V2DImode, + V2DFmode, TImode, EImode, OImode, XImode, SImode, + SFmode, HImode, QImode + }; + char namebuf[60]; + tree ftype = NULL; + tree fndecl = NULL; + + gcc_assert (ARRAY_SIZE (modenames) == T_MAX); + + d->fcode = fcode; + + /* We must track two variables here. op_num is + the operand number as in the RTL pattern. This is + required to access the mode (e.g. V4SF mode) of the + argument, from which the base type can be derived. + arg_num is an index in to the qualifiers data, which + gives qualifiers to the type (e.g. const unsigned). + The reason these two variables may differ by one is the + void return type. While all return types take the 0th entry + in the qualifiers array, there is no operand for them in the + RTL pattern. */ + int op_num = insn_data[d->code].n_operands - 1; + int arg_num = d->qualifiers[0] & qualifier_void + ? op_num + 1 + : op_num; + tree return_type = void_type_node, args = void_list_node; + tree eltype; + + /* Build a function type directly from the insn_data for this + builtin. The build_function_type () function takes care of + removing duplicates for us. */ + for (; op_num >= 0; arg_num--, op_num--) + { + enum machine_mode op_mode = insn_data[d->code].operand[op_num].mode; + enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num]; + + if (qualifiers & qualifier_unsigned) + { + type_signature[arg_num] = 'u'; + print_type_signature_p = true; + } + else if (qualifiers & qualifier_poly) + { + type_signature[arg_num] = 'p'; + print_type_signature_p = true; + } + else + type_signature[arg_num] = 's'; + + /* Skip an internal operand for vget_{low, high}. */ + if (qualifiers & qualifier_internal) + continue; + + /* Some builtins have different user-facing types + for certain arguments, encoded in d->mode. */ + if (qualifiers & qualifier_map_mode) + op_mode = modes[d->mode]; + + /* For pointers, we want a pointer to the basic type + of the vector. */ + if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) + op_mode = GET_MODE_INNER (op_mode); + + eltype = aarch64_build_type (op_mode, + qualifiers & qualifier_unsigned, + qualifiers & qualifier_poly); + + /* Add qualifiers. */ + if (qualifiers & qualifier_const) + eltype = build_qualified_type (eltype, TYPE_QUAL_CONST); + + if (qualifiers & qualifier_pointer) + eltype = build_pointer_type (eltype); + + /* If we have reached arg_num == 0, we are at a non-void + return type. Otherwise, we are still processing + arguments. */ + if (arg_num == 0) + return_type = eltype; + else + args = tree_cons (NULL_TREE, eltype, args); + } + + ftype = build_function_type (return_type, args); + + gcc_assert (ftype != NULL); + + if (print_type_signature_p) + snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s_%s", + d->name, modenames[d->mode], type_signature); + else + snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s", + d->name, modenames[d->mode]); + + fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, + NULL, NULL_TREE); + aarch64_builtin_decls[fcode] = fndecl; + } +} + +void +aarch64_init_builtins (void) +{ + if (TARGET_SIMD) + aarch64_init_simd_builtins (); +} + +tree +aarch64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) +{ + if (code >= AARCH64_BUILTIN_MAX) + return error_mark_node; + + return aarch64_builtin_decls[code]; +} + +typedef enum +{ + SIMD_ARG_COPY_TO_REG, + SIMD_ARG_CONSTANT, + SIMD_ARG_STOP +} builtin_simd_arg; + +static rtx +aarch64_simd_expand_args (rtx target, int icode, int have_retval, + tree exp, ...) +{ + va_list ap; + rtx pat; + tree arg[SIMD_MAX_BUILTIN_ARGS]; + rtx op[SIMD_MAX_BUILTIN_ARGS]; + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode[SIMD_MAX_BUILTIN_ARGS]; + int argc = 0; + + if (have_retval + && (!target + || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode))) + target = gen_reg_rtx (tmode); + + va_start (ap, exp); + + for (;;) + { + builtin_simd_arg thisarg = (builtin_simd_arg) va_arg (ap, int); + + if (thisarg == SIMD_ARG_STOP) + break; + else + { + arg[argc] = CALL_EXPR_ARG (exp, argc); + op[argc] = expand_normal (arg[argc]); + mode[argc] = insn_data[icode].operand[argc + have_retval].mode; + + switch (thisarg) + { + case SIMD_ARG_COPY_TO_REG: + if (POINTER_TYPE_P (TREE_TYPE (arg[argc]))) + op[argc] = convert_memory_address (Pmode, op[argc]); + /*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */ + if (!(*insn_data[icode].operand[argc + have_retval].predicate) + (op[argc], mode[argc])) + op[argc] = copy_to_mode_reg (mode[argc], op[argc]); + break; + + case SIMD_ARG_CONSTANT: + if (!(*insn_data[icode].operand[argc + have_retval].predicate) + (op[argc], mode[argc])) + error_at (EXPR_LOCATION (exp), "incompatible type for argument %d, " + "expected %<const int%>", argc + 1); + break; + + case SIMD_ARG_STOP: + gcc_unreachable (); + } + + argc++; + } + } + + va_end (ap); + + if (have_retval) + switch (argc) + { + case 1: + pat = GEN_FCN (icode) (target, op[0]); + break; + + case 2: + pat = GEN_FCN (icode) (target, op[0], op[1]); + break; + + case 3: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2]); + break; + + case 4: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]); + break; + + case 5: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]); + break; + + default: + gcc_unreachable (); + } + else + switch (argc) + { + case 1: + pat = GEN_FCN (icode) (op[0]); + break; + + case 2: + pat = GEN_FCN (icode) (op[0], op[1]); + break; + + case 3: + pat = GEN_FCN (icode) (op[0], op[1], op[2]); + break; + + case 4: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); + break; + + case 5: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); + break; + + default: + gcc_unreachable (); + } + + if (!pat) + return 0; + + emit_insn (pat); + + return target; +} + +/* Expand an AArch64 AdvSIMD builtin(intrinsic). */ +rtx +aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) +{ + aarch64_simd_builtin_datum *d = + &aarch64_simd_builtin_data[fcode - (AARCH64_SIMD_BUILTIN_BASE + 1)]; + enum insn_code icode = d->code; + builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS]; + int num_args = insn_data[d->code].n_operands; + int is_void = 0; + int k; + + is_void = !!(d->qualifiers[0] & qualifier_void); + + num_args += is_void; + + for (k = 1; k < num_args; k++) + { + /* We have four arrays of data, each indexed in a different fashion. + qualifiers - element 0 always describes the function return type. + operands - element 0 is either the operand for return value (if + the function has a non-void return type) or the operand for the + first argument. + expr_args - element 0 always holds the first argument. + args - element 0 is always used for the return type. */ + int qualifiers_k = k; + int operands_k = k - is_void; + int expr_args_k = k - 1; + + if (d->qualifiers[qualifiers_k] & qualifier_immediate) + args[k] = SIMD_ARG_CONSTANT; + else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) + { + rtx arg + = expand_normal (CALL_EXPR_ARG (exp, + (expr_args_k))); + /* Handle constants only if the predicate allows it. */ + bool op_const_int_p = + (CONST_INT_P (arg) + && (*insn_data[icode].operand[operands_k].predicate) + (arg, insn_data[icode].operand[operands_k].mode)); + args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG; + } + else + args[k] = SIMD_ARG_COPY_TO_REG; + + } + args[k] = SIMD_ARG_STOP; + + /* The interface to aarch64_simd_expand_args expects a 0 if + the function is void, and a 1 if it is not. */ + return aarch64_simd_expand_args + (target, icode, !is_void, exp, + args[1], + args[2], + args[3], + args[4], + SIMD_ARG_STOP); +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient. */ +rtx +aarch64_expand_builtin (tree exp, + rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + int fcode = DECL_FUNCTION_CODE (fndecl); + + if (fcode >= AARCH64_SIMD_BUILTIN_BASE) + return aarch64_simd_expand_builtin (fcode, exp, target); + + return NULL_RTX; +} + +tree +aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) +{ + enum machine_mode in_mode, out_mode; + int in_n, out_n; + + if (TREE_CODE (type_out) != VECTOR_TYPE + || TREE_CODE (type_in) != VECTOR_TYPE) + return NULL_TREE; + + out_mode = TYPE_MODE (TREE_TYPE (type_out)); + out_n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 +#define AARCH64_FIND_FRINT_VARIANT(N) \ + (AARCH64_CHECK_BUILTIN_MODE (2, D) \ + ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \ + : (AARCH64_CHECK_BUILTIN_MODE (4, S) \ + ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \ + : (AARCH64_CHECK_BUILTIN_MODE (2, S) \ + ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \ + : NULL_TREE))) + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) + { + enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); + switch (fn) + { +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == N##Fmode && out_n == C \ + && in_mode == N##Fmode && in_n == C) + case BUILT_IN_FLOOR: + case BUILT_IN_FLOORF: + return AARCH64_FIND_FRINT_VARIANT (floor); + case BUILT_IN_CEIL: + case BUILT_IN_CEILF: + return AARCH64_FIND_FRINT_VARIANT (ceil); + case BUILT_IN_TRUNC: + case BUILT_IN_TRUNCF: + return AARCH64_FIND_FRINT_VARIANT (btrunc); + case BUILT_IN_ROUND: + case BUILT_IN_ROUNDF: + return AARCH64_FIND_FRINT_VARIANT (round); + case BUILT_IN_NEARBYINT: + case BUILT_IN_NEARBYINTF: + return AARCH64_FIND_FRINT_VARIANT (nearbyint); + case BUILT_IN_SQRT: + case BUILT_IN_SQRTF: + return AARCH64_FIND_FRINT_VARIANT (sqrt); +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == SImode && out_n == C \ + && in_mode == N##Imode && in_n == C) + case BUILT_IN_CLZ: + { + if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si]; + return NULL_TREE; + } +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == N##Imode && out_n == C \ + && in_mode == N##Fmode && in_n == C) + case BUILT_IN_LFLOOR: + case BUILT_IN_LFLOORF: + case BUILT_IN_LLFLOOR: + case BUILT_IN_IFLOORF: + { + enum aarch64_builtins builtin; + if (AARCH64_CHECK_BUILTIN_MODE (2, D)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di; + else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si; + else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si; + else + return NULL_TREE; + + return aarch64_builtin_decls[builtin]; + } + case BUILT_IN_LCEIL: + case BUILT_IN_LCEILF: + case BUILT_IN_LLCEIL: + case BUILT_IN_ICEILF: + { + enum aarch64_builtins builtin; + if (AARCH64_CHECK_BUILTIN_MODE (2, D)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di; + else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si; + else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si; + else + return NULL_TREE; + + return aarch64_builtin_decls[builtin]; + } + case BUILT_IN_LROUND: + case BUILT_IN_IROUNDF: + { + enum aarch64_builtins builtin; + if (AARCH64_CHECK_BUILTIN_MODE (2, D)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di; + else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si; + else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si; + else + return NULL_TREE; + + return aarch64_builtin_decls[builtin]; + } + + default: + return NULL_TREE; + } + } + + return NULL_TREE; +} + +#undef VAR1 +#define VAR1(T, N, MAP, A) \ + case AARCH64_SIMD_BUILTIN_##T##_##N##A: + +tree +aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, + bool ignore ATTRIBUTE_UNUSED) +{ + int fcode = DECL_FUNCTION_CODE (fndecl); + tree type = TREE_TYPE (TREE_TYPE (fndecl)); + + switch (fcode) + { + BUILTIN_VALLDI (UNOP, abs, 2) + return fold_build1 (ABS_EXPR, type, args[0]); + break; + BUILTIN_VALLDI (BINOP, cmge, 0) + return fold_build2 (GE_EXPR, type, args[0], args[1]); + break; + BUILTIN_VALLDI (BINOP, cmgt, 0) + return fold_build2 (GT_EXPR, type, args[0], args[1]); + break; + BUILTIN_VALLDI (BINOP, cmeq, 0) + return fold_build2 (EQ_EXPR, type, args[0], args[1]); + break; + BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0) + { + tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]); + tree vec_zero_node = build_zero_cst (type); + return fold_build2 (NE_EXPR, type, and_node, vec_zero_node); + break; + } + VAR1 (UNOP, floatv2si, 2, v2sf) + VAR1 (UNOP, floatv4si, 2, v4sf) + VAR1 (UNOP, floatv2di, 2, v2df) + return fold_build1 (FLOAT_EXPR, type, args[0]); + default: + break; + } + + return NULL_TREE; +} + +bool +aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) +{ + bool changed = false; + gimple stmt = gsi_stmt (*gsi); + tree call = gimple_call_fn (stmt); + tree fndecl; + gimple new_stmt = NULL; + if (call) + { + fndecl = gimple_call_fndecl (stmt); + if (fndecl) + { + int fcode = DECL_FUNCTION_CODE (fndecl); + int nargs = gimple_call_num_args (stmt); + tree *args = (nargs > 0 + ? gimple_call_arg_ptr (stmt, 0) + : &error_mark_node); + + switch (fcode) + { + BUILTIN_VALL (UNOP, reduc_splus_, 10) + new_stmt = gimple_build_assign_with_ops ( + REDUC_PLUS_EXPR, + gimple_call_lhs (stmt), + args[0], + NULL_TREE); + break; + BUILTIN_VDQIF (UNOP, reduc_smax_, 10) + new_stmt = gimple_build_assign_with_ops ( + REDUC_MAX_EXPR, + gimple_call_lhs (stmt), + args[0], + NULL_TREE); + break; + BUILTIN_VDQIF (UNOP, reduc_smin_, 10) + new_stmt = gimple_build_assign_with_ops ( + REDUC_MIN_EXPR, + gimple_call_lhs (stmt), + args[0], + NULL_TREE); + break; + + default: + break; + } + } + } + + if (new_stmt) + { + gsi_replace (gsi, new_stmt, true); + changed = true; + } + + return changed; +} + +#undef AARCH64_CHECK_BUILTIN_MODE +#undef AARCH64_FIND_FRINT_VARIANT +#undef BUILTIN_DX +#undef BUILTIN_SDQ_I +#undef BUILTIN_SD_HSI +#undef BUILTIN_V2F +#undef BUILTIN_VALL +#undef BUILTIN_VB +#undef BUILTIN_VD +#undef BUILTIN_VDC +#undef BUILTIN_VDIC +#undef BUILTIN_VDN +#undef BUILTIN_VDQ +#undef BUILTIN_VDQF +#undef BUILTIN_VDQH +#undef BUILTIN_VDQHS +#undef BUILTIN_VDQIF +#undef BUILTIN_VDQM +#undef BUILTIN_VDQV +#undef BUILTIN_VDQ_BHSI +#undef BUILTIN_VDQ_I +#undef BUILTIN_VDW +#undef BUILTIN_VD_BHSI +#undef BUILTIN_VD_HSI +#undef BUILTIN_VD_RE +#undef BUILTIN_VQ +#undef BUILTIN_VQN +#undef BUILTIN_VQW +#undef BUILTIN_VQ_HSI +#undef BUILTIN_VQ_S +#undef BUILTIN_VSDQ_HSI +#undef BUILTIN_VSDQ_I +#undef BUILTIN_VSDQ_I_BHSI +#undef BUILTIN_VSDQ_I_DI +#undef BUILTIN_VSD_HSI +#undef BUILTIN_VSQN_HSDI +#undef BUILTIN_VSTRUCT +#undef CF0 +#undef CF1 +#undef CF2 +#undef CF3 +#undef CF4 +#undef CF10 +#undef VAR1 +#undef VAR2 +#undef VAR3 +#undef VAR4 +#undef VAR5 +#undef VAR6 +#undef VAR7 +#undef VAR8 +#undef VAR9 +#undef VAR10 +#undef VAR11 + diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-cores.def b/gcc-4.9/gcc/config/aarch64/aarch64-cores.def new file mode 100644 index 000000000..9319249e6 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-cores.def @@ -0,0 +1,42 @@ +/* Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* This is a list of cores that implement AArch64. + + Before using #include to read this file, define a macro: + + AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH, FLAGS, COSTS) + + The CORE_NAME is the name of the core, represented as a string constant. + The CORE_IDENT is the name of the core, represented as an identifier. + The SCHEDULER_IDENT is the name of the core for which scheduling decisions + will be made, represented as an identifier. + ARCH is the architecture revision implemented by the chip. + FLAGS are the bitwise-or of the traits that apply to that core. + This need not include flags implied by the architecture. + COSTS is the name of the rtx_costs routine to use. */ + +/* V8 Architecture Processors. */ + +AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa53) +AARCH64_CORE("cortex-a57", cortexa15, cortexa15, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57) + +/* V8 big.LITTLE implementations. */ + +AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h new file mode 100644 index 000000000..adec7e7ba --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h @@ -0,0 +1,33 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Support for bare-metal builds. */ +#ifndef GCC_AARCH64_ELF_RAW_H +#define GCC_AARCH64_ELF_RAW_H + +#define STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s" +#define ENDFILE_SPEC " crtend%O%s crtn%O%s" + +#ifndef LINK_SPEC +#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X \ + -maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}" +#endif + +#endif /* GCC_AARCH64_ELF_RAW_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-elf.h b/gcc-4.9/gcc/config/aarch64/aarch64-elf.h new file mode 100644 index 000000000..15ab630de --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-elf.h @@ -0,0 +1,161 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef GCC_AARCH64_ELF_H +#define GCC_AARCH64_ELF_H + + +#define ASM_OUTPUT_LABELREF(FILE, NAME) \ + aarch64_asm_output_labelref (FILE, NAME) + +#define ASM_OUTPUT_DEF(FILE, NAME1, NAME2) \ + do \ + { \ + assemble_name (FILE, NAME1); \ + fputs (" = ", FILE); \ + assemble_name (FILE, NAME2); \ + fputc ('\n', FILE); \ + } while (0) + +#define TEXT_SECTION_ASM_OP "\t.text" +#define DATA_SECTION_ASM_OP "\t.data" +#define BSS_SECTION_ASM_OP "\t.bss" + +#define CTORS_SECTION_ASM_OP "\t.section\t.init_array,\"aw\",%init_array" +#define DTORS_SECTION_ASM_OP "\t.section\t.fini_array,\"aw\",%fini_array" + +#undef INIT_SECTION_ASM_OP +#undef FINI_SECTION_ASM_OP +#define INIT_ARRAY_SECTION_ASM_OP CTORS_SECTION_ASM_OP +#define FINI_ARRAY_SECTION_ASM_OP DTORS_SECTION_ASM_OP + +/* Since we use .init_array/.fini_array we don't need the markers at + the start and end of the ctors/dtors arrays. */ +#define CTOR_LIST_BEGIN asm (CTORS_SECTION_ASM_OP) +#define CTOR_LIST_END /* empty */ +#define DTOR_LIST_BEGIN asm (DTORS_SECTION_ASM_OP) +#define DTOR_LIST_END /* empty */ + +#undef TARGET_ASM_CONSTRUCTOR +#define TARGET_ASM_CONSTRUCTOR aarch64_elf_asm_constructor + +#undef TARGET_ASM_DESTRUCTOR +#define TARGET_ASM_DESTRUCTOR aarch64_elf_asm_destructor + +#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN +/* Support for -falign-* switches. Use .p2align to ensure that code + sections are padded with NOP instructions, rather than zeros. */ +#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP) \ + do \ + { \ + if ((LOG) != 0) \ + { \ + if ((MAX_SKIP) == 0) \ + fprintf ((FILE), "\t.p2align %d\n", (int) (LOG)); \ + else \ + fprintf ((FILE), "\t.p2align %d,,%d\n", \ + (int) (LOG), (int) (MAX_SKIP)); \ + } \ + } while (0) + +#endif /* HAVE_GAS_MAX_SKIP_P2ALIGN */ + +#define JUMP_TABLES_IN_TEXT_SECTION 0 + +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ + do { \ + switch (GET_MODE (BODY)) \ + { \ + case QImode: \ + asm_fprintf (STREAM, "\t.byte\t(%LL%d - %LLrtx%d) / 4\n", \ + VALUE, REL); \ + break; \ + case HImode: \ + asm_fprintf (STREAM, "\t.2byte\t(%LL%d - %LLrtx%d) / 4\n", \ + VALUE, REL); \ + break; \ + case SImode: \ + case DImode: /* See comment in aarch64_output_casesi. */ \ + asm_fprintf (STREAM, "\t.word\t(%LL%d - %LLrtx%d) / 4\n", \ + VALUE, REL); \ + break; \ + default: \ + gcc_unreachable (); \ + } \ + } while (0) + +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + fprintf(STREAM, "\t.align\t%d\n", (int)POWER) + +#define ASM_COMMENT_START "//" + +#define LOCAL_LABEL_PREFIX "." +#define USER_LABEL_PREFIX "" + +#define GLOBAL_ASM_OP "\t.global\t" + +#ifdef TARGET_BIG_ENDIAN_DEFAULT +#define ENDIAN_SPEC "-mbig-endian" +#else +#define ENDIAN_SPEC "-mlittle-endian" +#endif + +#if TARGET_DATA_MODEL == 1 +#define ABI_SPEC "-mabi=lp64" +#define MULTILIB_DEFAULTS { "mabi=lp64" } +#elif TARGET_DATA_MODEL == 2 +#define ABI_SPEC "-mabi=ilp32" +#define MULTILIB_DEFAULTS { "mabi=ilp32" } +#else +#error "Unknown or undefined TARGET_DATA_MODEL!" +#endif + +/* Force the default endianness and ABI flags onto the command line + in order to make the other specs easier to write. */ +#undef DRIVER_SELF_SPECS +#define DRIVER_SELF_SPECS \ + " %{!mbig-endian:%{!mlittle-endian:" ENDIAN_SPEC "}}" \ + " %{!mabi=*:" ABI_SPEC "}" + +#ifdef HAVE_AS_MABI_OPTION +#define ASM_MABI_SPEC "%{mabi=*:-mabi=%*}" +#else +#define ASM_MABI_SPEC "%{mabi=lp64:}" +#endif + +#ifndef ASM_SPEC +#define ASM_SPEC "\ +%{mbig-endian:-EB} \ +%{mlittle-endian:-EL} \ +%{march=*:-march=%*} \ +%(asm_cpu_spec)" \ +ASM_MABI_SPEC +#endif + +#undef TYPE_OPERAND_FMT +#define TYPE_OPERAND_FMT "%%%s" + +#undef TARGET_ASM_NAMED_SECTION +#define TARGET_ASM_NAMED_SECTION aarch64_elf_asm_named_section + +/* Stabs debug not required. */ +#undef DBX_DEBUGGING_INFO + +#endif /* GCC_AARCH64_ELF_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h new file mode 100644 index 000000000..a8f077156 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h @@ -0,0 +1,47 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef GCC_AARCH64_LINUX_H +#define GCC_AARCH64_LINUX_H + +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1" + +#define CPP_SPEC "%{pthread:-D_REENTRANT}" + +#define LINUX_TARGET_LINK_SPEC "%{h*} \ + %{static:-Bstatic} \ + %{shared:-shared} \ + %{symbolic:-Bsymbolic} \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER " \ + -X \ + %{mbig-endian:-EB} %{mlittle-endian:-EL} \ + -maarch64linux%{mbig-endian:b}" + +#define LINK_SPEC LINUX_TARGET_LINK_SPEC + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + } \ + while (0) + +#endif /* GCC_AARCH64_LINUX_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def new file mode 100644 index 000000000..1d2cc7679 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def @@ -0,0 +1,55 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +CC_MODE (CCFP); +CC_MODE (CCFPE); +CC_MODE (CC_SWP); +CC_MODE (CC_ZESWP); /* zero-extend LHS (but swap to make it RHS). */ +CC_MODE (CC_SESWP); /* sign-extend LHS (but swap to make it RHS). */ +CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ +CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ + +/* Vector modes. */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */ +VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */ +VECTOR_MODES (FLOAT, 8); /* V2SF. */ +VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */ + +/* Oct Int: 256-bit integer mode needed for 32-byte vector arguments. */ +INT_MODE (OI, 32); + +/* Opaque integer modes for 3, 6 or 8 Neon double registers (2 is + TImode). */ +INT_MODE (EI, 24); +INT_MODE (CI, 48); +INT_MODE (XI, 64); + +/* Vector modes for register lists. */ +VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI. */ +VECTOR_MODES (FLOAT, 32); /* V8SF V4DF. */ + +VECTOR_MODES (INT, 48); /* V32QI V16HI V8SI V4DI. */ +VECTOR_MODES (FLOAT, 48); /* V8SF V4DF. */ + +VECTOR_MODES (INT, 64); /* V32QI V16HI V8SI V4DI. */ +VECTOR_MODES (FLOAT, 64); /* V8SF V4DF. */ + +/* Quad float: 128-bit floating mode for long doubles. */ +FLOAT_MODE (TF, 16, ieee_quad_format); diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-option-extensions.def b/gcc-4.9/gcc/config/aarch64/aarch64-option-extensions.def new file mode 100644 index 000000000..1aa65d32a --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-option-extensions.def @@ -0,0 +1,38 @@ +/* Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* This is a list of ISA extentsions in AArch64. + + Before using #include to read this file, define a macro: + + AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF) + + EXT_NAME is the name of the extension, represented as a string constant. + FLAGS_ON are the bitwise-or of the features that the extension adds. + FLAGS_OFF are the bitwise-or of the features that the extension removes. */ + +/* V8 Architecture Extensions. + This list currently contains example extensions for CPUs that implement + AArch64, and therefore serves as a template for adding more CPUs in the + future. */ + +AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO) +AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO) +AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO) +AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-opts.h b/gcc-4.9/gcc/config/aarch64/aarch64-opts.h new file mode 100644 index 000000000..370931536 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-opts.h @@ -0,0 +1,64 @@ +/* Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Definitions for option handling for AArch64. */ + +#ifndef GCC_AARCH64_OPTS_H +#define GCC_AARCH64_OPTS_H + +/* The various cores that implement AArch64. */ +enum aarch64_processor +{ +#define AARCH64_CORE(NAME, INTERNAL_IDENT, IDENT, ARCH, FLAGS, COSTS) \ + INTERNAL_IDENT, +#include "aarch64-cores.def" +#undef AARCH64_CORE + /* Used to indicate that no processor has been specified. */ + generic, + /* Used to mark the end of the processor table. */ + aarch64_none +}; + +/* TLS types. */ +enum aarch64_tls_type { + TLS_TRADITIONAL, + TLS_DESCRIPTORS +}; + +/* The code model defines the address generation strategy. + Most have a PIC and non-PIC variant. */ +enum aarch64_code_model { + /* Static code and data fit within a 1MB region. + Not fully implemented, mostly treated as SMALL. */ + AARCH64_CMODEL_TINY, + /* Static code, data and GOT/PLT fit within a 1MB region. + Not fully implemented, mostly treated as SMALL_PIC. */ + AARCH64_CMODEL_TINY_PIC, + /* Static code and data fit within a 4GB region. + The default non-PIC code model. */ + AARCH64_CMODEL_SMALL, + /* Static code, data and GOT/PLT fit within a 4GB region. + The default PIC code model. */ + AARCH64_CMODEL_SMALL_PIC, + /* No assumptions about addresses of code and data. + The PIC variant is not yet implemented. */ + AARCH64_CMODEL_LARGE +}; + +#endif diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h new file mode 100644 index 000000000..5542f023b --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h @@ -0,0 +1,292 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + + +#ifndef GCC_AARCH64_PROTOS_H +#define GCC_AARCH64_PROTOS_H + +/* + SYMBOL_CONTEXT_ADR + The symbol is used in a load-address operation. + SYMBOL_CONTEXT_MEM + The symbol is used as the address in a MEM. + */ +enum aarch64_symbol_context +{ + SYMBOL_CONTEXT_MEM, + SYMBOL_CONTEXT_ADR +}; + +/* SYMBOL_SMALL_ABSOLUTE: Generate symbol accesses through + high and lo relocs that calculate the base address using a PC + relative reloc. + So to get the address of foo, we generate + adrp x0, foo + add x0, x0, :lo12:foo + + To load or store something to foo, we could use the corresponding + load store variants that generate an + ldr x0, [x0,:lo12:foo] + or + str x1, [x0, :lo12:foo] + + This corresponds to the small code model of the compiler. + + SYMBOL_SMALL_GOT: Similar to the one above but this + gives us the GOT entry of the symbol being referred to : + Thus calculating the GOT entry for foo is done using the + following sequence of instructions. The ADRP instruction + gets us to the page containing the GOT entry of the symbol + and the got_lo12 gets us the actual offset in it. + + adrp x0, :got:foo + ldr x0, [x0, :gotoff_lo12:foo] + + This corresponds to the small PIC model of the compiler. + + SYMBOL_SMALL_TLSGD + SYMBOL_SMALL_TLSDESC + SYMBOL_SMALL_GOTTPREL + SYMBOL_SMALL_TPREL + Each of of these represents a thread-local symbol, and corresponds to the + thread local storage relocation operator for the symbol being referred to. + + SYMBOL_TINY_ABSOLUTE + + Generate symbol accesses as a PC relative address using a single + instruction. To compute the address of symbol foo, we generate: + + ADR x0, foo + + SYMBOL_TINY_GOT + + Generate symbol accesses via the GOT using a single PC relative + instruction. To compute the address of symbol foo, we generate: + + ldr t0, :got:foo + + The value of foo can subsequently read using: + + ldrb t0, [t0] + + SYMBOL_FORCE_TO_MEM : Global variables are addressed using + constant pool. All variable addresses are spilled into constant + pools. The constant pools themselves are addressed using PC + relative accesses. This only works for the large code model. + */ +enum aarch64_symbol_type +{ + SYMBOL_SMALL_ABSOLUTE, + SYMBOL_SMALL_GOT, + SYMBOL_SMALL_TLSGD, + SYMBOL_SMALL_TLSDESC, + SYMBOL_SMALL_GOTTPREL, + SYMBOL_SMALL_TPREL, + SYMBOL_TINY_ABSOLUTE, + SYMBOL_TINY_GOT, + SYMBOL_FORCE_TO_MEM +}; + +/* A set of tuning parameters contains references to size and time + cost models and vectors for address cost calculations, register + move costs and memory move costs. */ + +/* Additional cost for addresses. */ +struct cpu_addrcost_table +{ + const int pre_modify; + const int post_modify; + const int register_offset; + const int register_extend; + const int imm_offset; +}; + +/* Additional costs for register copies. Cost is for one register. */ +struct cpu_regmove_cost +{ + const int GP2GP; + const int GP2FP; + const int FP2GP; + const int FP2FP; +}; + +/* Cost for vector insn classes. */ +struct cpu_vector_cost +{ + const int scalar_stmt_cost; /* Cost of any scalar operation, + excluding load and store. */ + const int scalar_load_cost; /* Cost of scalar load. */ + const int scalar_store_cost; /* Cost of scalar store. */ + const int vec_stmt_cost; /* Cost of any vector operation, + excluding load, store, + vector-to-scalar and + scalar-to-vector operation. */ + const int vec_to_scalar_cost; /* Cost of vec-to-scalar operation. */ + const int scalar_to_vec_cost; /* Cost of scalar-to-vector + operation. */ + const int vec_align_load_cost; /* Cost of aligned vector load. */ + const int vec_unalign_load_cost; /* Cost of unaligned vector load. */ + const int vec_unalign_store_cost; /* Cost of unaligned vector store. */ + const int vec_store_cost; /* Cost of vector store. */ + const int cond_taken_branch_cost; /* Cost of taken branch. */ + const int cond_not_taken_branch_cost; /* Cost of not taken branch. */ +}; + +struct tune_params +{ + const struct cpu_cost_table *const insn_extra_cost; + const struct cpu_addrcost_table *const addr_cost; + const struct cpu_regmove_cost *const regmove_cost; + const struct cpu_vector_cost *const vec_costs; + const int memmov_cost; + const int issue_rate; +}; + +HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); +bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode); +bool aarch64_cannot_change_mode_class (enum machine_mode, + enum machine_mode, + enum reg_class); +enum aarch64_symbol_type +aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context); +bool aarch64_constant_address_p (rtx); +bool aarch64_float_const_zero_rtx_p (rtx); +bool aarch64_function_arg_regno_p (unsigned); +bool aarch64_gen_movmemqi (rtx *); +bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *); +bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx); +bool aarch64_is_long_call_p (rtx); +bool aarch64_label_mentioned_p (rtx); +bool aarch64_legitimate_pic_operand_p (rtx); +bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode); +bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context, + enum machine_mode); +char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode); +char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned); +bool aarch64_pad_arg_upward (enum machine_mode, const_tree); +bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool); +bool aarch64_regno_ok_for_base_p (int, bool); +bool aarch64_regno_ok_for_index_p (int, bool); +bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode); +bool aarch64_simd_imm_zero_p (rtx, enum machine_mode); +bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode); +bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool); +bool aarch64_simd_valid_immediate (rtx, enum machine_mode, bool, + struct simd_immediate_info *); +bool aarch64_symbolic_address_p (rtx); +bool aarch64_uimm12_shift (HOST_WIDE_INT); +const char *aarch64_output_casesi (rtx *); +const char *aarch64_rewrite_selected_cpu (const char *name); + +enum aarch64_symbol_type aarch64_classify_symbol (rtx, + enum aarch64_symbol_context); +enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); +enum reg_class aarch64_regno_regclass (unsigned); +int aarch64_asm_preferred_eh_data_format (int, int); +int aarch64_hard_regno_mode_ok (unsigned, enum machine_mode); +int aarch64_hard_regno_nregs (unsigned, enum machine_mode); +int aarch64_simd_attr_length_move (rtx); +int aarch64_uxt_size (int, HOST_WIDE_INT); +rtx aarch64_final_eh_return_addr (void); +rtx aarch64_legitimize_reload_address (rtx *, enum machine_mode, int, int, int); +const char *aarch64_output_move_struct (rtx *operands); +rtx aarch64_return_addr (int, rtx); +rtx aarch64_simd_gen_const_vector_dup (enum machine_mode, int); +bool aarch64_simd_mem_operand_p (rtx); +rtx aarch64_simd_vect_par_cnst_half (enum machine_mode, bool); +rtx aarch64_tls_get_addr (void); +tree aarch64_fold_builtin (tree, int, tree *, bool); +unsigned aarch64_dbx_register_number (unsigned); +unsigned aarch64_trampoline_size (void); +void aarch64_asm_output_labelref (FILE *, const char *); +void aarch64_elf_asm_named_section (const char *, unsigned, tree); +void aarch64_expand_epilogue (bool); +void aarch64_expand_mov_immediate (rtx, rtx); +void aarch64_expand_prologue (void); +void aarch64_expand_vector_init (rtx, rtx); +void aarch64_function_profiler (FILE *, int); +void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, + const_tree, unsigned); +void aarch64_init_expanders (void); +void aarch64_print_operand (FILE *, rtx, char); +void aarch64_print_operand_address (FILE *, rtx); + +/* Initialize builtins for SIMD intrinsics. */ +void init_aarch64_simd_builtins (void); + +void aarch64_simd_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); +void aarch64_simd_disambiguate_copy (rtx *, rtx *, rtx *, unsigned int); + +/* Emit code to place a AdvSIMD pair result in memory locations (with equal + registers). */ +void aarch64_simd_emit_pair_result_insn (enum machine_mode, + rtx (*intfn) (rtx, rtx, rtx), rtx, + rtx); + +/* Expand builtins for SIMD intrinsics. */ +rtx aarch64_simd_expand_builtin (int, tree, rtx); + +void aarch64_simd_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); + +/* Emit code for reinterprets. */ +void aarch64_simd_reinterpret (rtx, rtx); + +void aarch64_split_128bit_move (rtx, rtx); + +bool aarch64_split_128bit_move_p (rtx, rtx); + +void aarch64_split_simd_combine (rtx, rtx, rtx); + +void aarch64_split_simd_move (rtx, rtx); + +/* Check for a legitimate floating point constant for FMOV. */ +bool aarch64_float_const_representable_p (rtx); + +#if defined (RTX_CODE) + +bool aarch64_legitimate_address_p (enum machine_mode, rtx, RTX_CODE, bool); +enum machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); +rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); +rtx aarch64_load_tp (rtx); + +void aarch64_expand_compare_and_swap (rtx op[]); +void aarch64_split_compare_and_swap (rtx op[]); +void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); + +#endif /* RTX_CODE */ + +void aarch64_init_builtins (void); +rtx aarch64_expand_builtin (tree exp, + rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED); +tree aarch64_builtin_decl (unsigned, bool ATTRIBUTE_UNUSED); + +tree +aarch64_builtin_vectorized_function (tree fndecl, + tree type_out, + tree type_in); + +extern void aarch64_split_combinev16qi (rtx operands[3]); +extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); +extern bool +aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); +#endif /* GCC_AARCH64_PROTOS_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def new file mode 100644 index 000000000..c9b7570e5 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def @@ -0,0 +1,395 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* In the list below, the BUILTIN_<ITERATOR> macros expand to create + builtins for each of the modes described by <ITERATOR>. When adding + new builtins to this list, a helpful idiom to follow is to add + a line for each pattern in the md file. Thus, ADDP, which has one + pattern defined for the VD_BHSI iterator, and one for DImode, has two + entries below. + + Parameter 1 is the 'type' of the intrinsic. This is used to + describe the type modifiers (for example; unsigned) applied to + each of the parameters to the intrinsic function. + + Parameter 2 is the name of the intrinsic. This is appended + to `__builtin_aarch64_<name><mode>` to give the intrinsic name + as exported to the front-ends. + + Parameter 3 describes how to map from the name to the CODE_FOR_ + macro holding the RTL pattern for the intrinsic. This mapping is: + 0 - CODE_FOR_aarch64_<name><mode> + 1-9 - CODE_FOR_<name><mode><1-9> + 10 - CODE_FOR_<name><mode>. */ + + BUILTIN_VD_RE (CREATE, create, 0) + BUILTIN_VDC (COMBINE, combine, 0) + BUILTIN_VB (BINOP, pmul, 0) + BUILTIN_VDQF (UNOP, sqrt, 2) + BUILTIN_VD_BHSI (BINOP, addp, 0) + VAR1 (UNOP, addp, 0, di) + BUILTIN_VDQ_BHSI (UNOP, clz, 2) + + BUILTIN_VALL (GETLANE, get_lane, 0) + VAR1 (GETLANE, get_lane, 0, di) + BUILTIN_VALL (GETLANE, be_checked_get_lane, 0) + + BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) + BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) + BUILTIN_VDC (REINTERP, reinterpretv4hi, 0) + BUILTIN_VDC (REINTERP, reinterpretv2si, 0) + BUILTIN_VDC (REINTERP, reinterpretv2sf, 0) + BUILTIN_VQ (REINTERP, reinterpretv16qi, 0) + BUILTIN_VQ (REINTERP, reinterpretv8hi, 0) + BUILTIN_VQ (REINTERP, reinterpretv4si, 0) + BUILTIN_VQ (REINTERP, reinterpretv4sf, 0) + BUILTIN_VQ (REINTERP, reinterpretv2di, 0) + BUILTIN_VQ (REINTERP, reinterpretv2df, 0) + + BUILTIN_VDQ_I (BINOP, dup_lane, 0) + /* Implemented by aarch64_<sur>q<r>shl<mode>. */ + BUILTIN_VSDQ_I (BINOP, sqshl, 0) + BUILTIN_VSDQ_I (BINOP, uqshl, 0) + BUILTIN_VSDQ_I (BINOP, sqrshl, 0) + BUILTIN_VSDQ_I (BINOP, uqrshl, 0) + /* Implemented by aarch64_<su_optab><optab><mode>. */ + BUILTIN_VSDQ_I (BINOP, sqadd, 0) + BUILTIN_VSDQ_I (BINOP, uqadd, 0) + BUILTIN_VSDQ_I (BINOP, sqsub, 0) + BUILTIN_VSDQ_I (BINOP, uqsub, 0) + /* Implemented by aarch64_<sur>qadd<mode>. */ + BUILTIN_VSDQ_I (BINOP, suqadd, 0) + BUILTIN_VSDQ_I (BINOP, usqadd, 0) + + /* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>. */ + BUILTIN_VDC (GETLANE, get_dregoi, 0) + BUILTIN_VDC (GETLANE, get_dregci, 0) + BUILTIN_VDC (GETLANE, get_dregxi, 0) + /* Implemented by aarch64_get_qreg<VSTRUCT:mode><VQ:mode>. */ + BUILTIN_VQ (GETLANE, get_qregoi, 0) + BUILTIN_VQ (GETLANE, get_qregci, 0) + BUILTIN_VQ (GETLANE, get_qregxi, 0) + /* Implemented by aarch64_set_qreg<VSTRUCT:mode><VQ:mode>. */ + BUILTIN_VQ (SETLANE, set_qregoi, 0) + BUILTIN_VQ (SETLANE, set_qregci, 0) + BUILTIN_VQ (SETLANE, set_qregxi, 0) + /* Implemented by aarch64_ld<VSTRUCT:nregs><VDC:mode>. */ + BUILTIN_VDC (LOADSTRUCT, ld2, 0) + BUILTIN_VDC (LOADSTRUCT, ld3, 0) + BUILTIN_VDC (LOADSTRUCT, ld4, 0) + /* Implemented by aarch64_ld<VSTRUCT:nregs><VQ:mode>. */ + BUILTIN_VQ (LOADSTRUCT, ld2, 0) + BUILTIN_VQ (LOADSTRUCT, ld3, 0) + BUILTIN_VQ (LOADSTRUCT, ld4, 0) + /* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>. */ + BUILTIN_VDC (STORESTRUCT, st2, 0) + BUILTIN_VDC (STORESTRUCT, st3, 0) + BUILTIN_VDC (STORESTRUCT, st4, 0) + /* Implemented by aarch64_st<VSTRUCT:nregs><VQ:mode>. */ + BUILTIN_VQ (STORESTRUCT, st2, 0) + BUILTIN_VQ (STORESTRUCT, st3, 0) + BUILTIN_VQ (STORESTRUCT, st4, 0) + + BUILTIN_VQW (BINOP, saddl2, 0) + BUILTIN_VQW (BINOP, uaddl2, 0) + BUILTIN_VQW (BINOP, ssubl2, 0) + BUILTIN_VQW (BINOP, usubl2, 0) + BUILTIN_VQW (BINOP, saddw2, 0) + BUILTIN_VQW (BINOP, uaddw2, 0) + BUILTIN_VQW (BINOP, ssubw2, 0) + BUILTIN_VQW (BINOP, usubw2, 0) + /* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>. */ + BUILTIN_VDW (BINOP, saddl, 0) + BUILTIN_VDW (BINOP, uaddl, 0) + BUILTIN_VDW (BINOP, ssubl, 0) + BUILTIN_VDW (BINOP, usubl, 0) + /* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>. */ + BUILTIN_VDW (BINOP, saddw, 0) + BUILTIN_VDW (BINOP, uaddw, 0) + BUILTIN_VDW (BINOP, ssubw, 0) + BUILTIN_VDW (BINOP, usubw, 0) + /* Implemented by aarch64_<sur>h<addsub><mode>. */ + BUILTIN_VQ_S (BINOP, shadd, 0) + BUILTIN_VQ_S (BINOP, uhadd, 0) + BUILTIN_VQ_S (BINOP, srhadd, 0) + BUILTIN_VQ_S (BINOP, urhadd, 0) + /* Implemented by aarch64_<sur><addsub>hn<mode>. */ + BUILTIN_VQN (BINOP, addhn, 0) + BUILTIN_VQN (BINOP, raddhn, 0) + /* Implemented by aarch64_<sur><addsub>hn2<mode>. */ + BUILTIN_VQN (TERNOP, addhn2, 0) + BUILTIN_VQN (TERNOP, raddhn2, 0) + + BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0) + /* Implemented by aarch64_<sur>qmovn<mode>. */ + BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0) + BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0) + /* Implemented by aarch64_s<optab><mode>. */ + BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0) + BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0) + + BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0) + /* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>. */ + BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0) + BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0) + /* Implemented by aarch64_sqdml<SBINQOPS:as>l_n<mode>. */ + BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0) + BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0) + + BUILTIN_VSD_HSI (BINOP, sqdmull, 0) + BUILTIN_VSD_HSI (TERNOP, sqdmull_lane, 0) + BUILTIN_VD_HSI (TERNOP, sqdmull_laneq, 0) + BUILTIN_VD_HSI (BINOP, sqdmull_n, 0) + BUILTIN_VQ_HSI (BINOP, sqdmull2, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq, 0) + BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0) + /* Implemented by aarch64_sq<r>dmulh<mode>. */ + BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0) + BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0) + /* Implemented by aarch64_sq<r>dmulh_lane<q><mode>. */ + BUILTIN_VDQHS (TERNOP, sqdmulh_lane, 0) + BUILTIN_VDQHS (TERNOP, sqdmulh_laneq, 0) + BUILTIN_VDQHS (TERNOP, sqrdmulh_lane, 0) + BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq, 0) + BUILTIN_SD_HSI (TERNOP, sqdmulh_lane, 0) + BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane, 0) + + BUILTIN_VSDQ_I_DI (BINOP, ashl, 3) + /* Implemented by aarch64_<sur>shl<mode>. */ + BUILTIN_VSDQ_I_DI (BINOP, sshl, 0) + BUILTIN_VSDQ_I_DI (BINOP, ushl, 0) + BUILTIN_VSDQ_I_DI (BINOP, srshl, 0) + BUILTIN_VSDQ_I_DI (BINOP, urshl, 0) + + BUILTIN_VDQ_I (SHIFTIMM, ashr, 3) + VAR1 (SHIFTIMM, ashr_simd, 0, di) + BUILTIN_VDQ_I (SHIFTIMM, lshr, 3) + VAR1 (USHIFTIMM, lshr_simd, 0, di) + /* Implemented by aarch64_<sur>shr_n<mode>. */ + BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0) + /* Implemented by aarch64_<sur>sra_n<mode>. */ + BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0) + /* Implemented by aarch64_<sur>shll_n<mode>. */ + BUILTIN_VDW (SHIFTIMM, sshll_n, 0) + BUILTIN_VDW (SHIFTIMM, ushll_n, 0) + /* Implemented by aarch64_<sur>shll2_n<mode>. */ + BUILTIN_VQW (SHIFTIMM, sshll2_n, 0) + BUILTIN_VQW (SHIFTIMM, ushll2_n, 0) + /* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>. */ + BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0) + /* Implemented by aarch64_<sur>s<lr>i_n<mode>. */ + BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0) + /* Implemented by aarch64_<sur>qshl<u>_n<mode>. */ + BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0) + BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0) + BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0) + + /* Implemented by aarch64_cm<cmp><mode>. */ + BUILTIN_VALLDI (BINOP, cmeq, 0) + BUILTIN_VALLDI (BINOP, cmge, 0) + BUILTIN_VALLDI (BINOP, cmgt, 0) + BUILTIN_VALLDI (BINOP, cmle, 0) + BUILTIN_VALLDI (BINOP, cmlt, 0) + /* Implemented by aarch64_cm<cmp><mode>. */ + BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0) + BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0) + BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0) + + /* Implemented by reduc_<sur>plus_<mode>. */ + BUILTIN_VALL (UNOP, reduc_splus_, 10) + BUILTIN_VDQ (UNOP, reduc_uplus_, 10) + + /* Implemented by reduc_<maxmin_uns>_<mode>. */ + BUILTIN_VDQIF (UNOP, reduc_smax_, 10) + BUILTIN_VDQIF (UNOP, reduc_smin_, 10) + BUILTIN_VDQ_BHSI (UNOP, reduc_umax_, 10) + BUILTIN_VDQ_BHSI (UNOP, reduc_umin_, 10) + BUILTIN_VDQF (UNOP, reduc_smax_nan_, 10) + BUILTIN_VDQF (UNOP, reduc_smin_nan_, 10) + + /* Implemented by <maxmin><mode>3. + smax variants map to fmaxnm, + smax_nan variants map to fmax. */ + BUILTIN_VDQIF (BINOP, smax, 3) + BUILTIN_VDQIF (BINOP, smin, 3) + BUILTIN_VDQ_BHSI (BINOP, umax, 3) + BUILTIN_VDQ_BHSI (BINOP, umin, 3) + BUILTIN_VDQF (BINOP, smax_nan, 3) + BUILTIN_VDQF (BINOP, smin_nan, 3) + + /* Implemented by <frint_pattern><mode>2. */ + BUILTIN_VDQF (UNOP, btrunc, 2) + BUILTIN_VDQF (UNOP, ceil, 2) + BUILTIN_VDQF (UNOP, floor, 2) + BUILTIN_VDQF (UNOP, nearbyint, 2) + BUILTIN_VDQF (UNOP, rint, 2) + BUILTIN_VDQF (UNOP, round, 2) + BUILTIN_VDQF (UNOP, frintn, 2) + + /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */ + VAR1 (UNOP, lbtruncv2sf, 2, v2si) + VAR1 (UNOP, lbtruncv4sf, 2, v4si) + VAR1 (UNOP, lbtruncv2df, 2, v2di) + + VAR1 (UNOP, lbtruncuv2sf, 2, v2si) + VAR1 (UNOP, lbtruncuv4sf, 2, v4si) + VAR1 (UNOP, lbtruncuv2df, 2, v2di) + + VAR1 (UNOP, lroundv2sf, 2, v2si) + VAR1 (UNOP, lroundv4sf, 2, v4si) + VAR1 (UNOP, lroundv2df, 2, v2di) + /* Implemented by l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2. */ + VAR1 (UNOP, lroundsf, 2, si) + VAR1 (UNOP, lrounddf, 2, di) + + VAR1 (UNOP, lrounduv2sf, 2, v2si) + VAR1 (UNOP, lrounduv4sf, 2, v4si) + VAR1 (UNOP, lrounduv2df, 2, v2di) + VAR1 (UNOP, lroundusf, 2, si) + VAR1 (UNOP, lroundudf, 2, di) + + VAR1 (UNOP, lceilv2sf, 2, v2si) + VAR1 (UNOP, lceilv4sf, 2, v4si) + VAR1 (UNOP, lceilv2df, 2, v2di) + + VAR1 (UNOP, lceiluv2sf, 2, v2si) + VAR1 (UNOP, lceiluv4sf, 2, v4si) + VAR1 (UNOP, lceiluv2df, 2, v2di) + VAR1 (UNOP, lceilusf, 2, si) + VAR1 (UNOP, lceiludf, 2, di) + + VAR1 (UNOP, lfloorv2sf, 2, v2si) + VAR1 (UNOP, lfloorv4sf, 2, v4si) + VAR1 (UNOP, lfloorv2df, 2, v2di) + + VAR1 (UNOP, lflooruv2sf, 2, v2si) + VAR1 (UNOP, lflooruv4sf, 2, v4si) + VAR1 (UNOP, lflooruv2df, 2, v2di) + VAR1 (UNOP, lfloorusf, 2, si) + VAR1 (UNOP, lfloorudf, 2, di) + + VAR1 (UNOP, lfrintnv2sf, 2, v2si) + VAR1 (UNOP, lfrintnv4sf, 2, v4si) + VAR1 (UNOP, lfrintnv2df, 2, v2di) + VAR1 (UNOP, lfrintnsf, 2, si) + VAR1 (UNOP, lfrintndf, 2, di) + + VAR1 (UNOP, lfrintnuv2sf, 2, v2si) + VAR1 (UNOP, lfrintnuv4sf, 2, v4si) + VAR1 (UNOP, lfrintnuv2df, 2, v2di) + VAR1 (UNOP, lfrintnusf, 2, si) + VAR1 (UNOP, lfrintnudf, 2, di) + + /* Implemented by <optab><fcvt_target><VDQF:mode>2. */ + VAR1 (UNOP, floatv2si, 2, v2sf) + VAR1 (UNOP, floatv4si, 2, v4sf) + VAR1 (UNOP, floatv2di, 2, v2df) + + VAR1 (UNOP, floatunsv2si, 2, v2sf) + VAR1 (UNOP, floatunsv4si, 2, v4sf) + VAR1 (UNOP, floatunsv2di, 2, v2df) + + /* Implemented by + aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */ + BUILTIN_VALL (BINOP, zip1, 0) + BUILTIN_VALL (BINOP, zip2, 0) + BUILTIN_VALL (BINOP, uzp1, 0) + BUILTIN_VALL (BINOP, uzp2, 0) + BUILTIN_VALL (BINOP, trn1, 0) + BUILTIN_VALL (BINOP, trn2, 0) + + /* Implemented by + aarch64_frecp<FRECP:frecp_suffix><mode>. */ + BUILTIN_GPF (UNOP, frecpe, 0) + BUILTIN_GPF (BINOP, frecps, 0) + BUILTIN_GPF (UNOP, frecpx, 0) + + BUILTIN_VDQF (UNOP, frecpe, 0) + BUILTIN_VDQF (BINOP, frecps, 0) + + BUILTIN_VALLDI (UNOP, abs, 2) + + VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf) + VAR1 (BINOP, float_truncate_hi_, 0, v4sf) + + VAR1 (UNOP, float_extend_lo_, 0, v2df) + VAR1 (UNOP, float_truncate_lo_, 0, v2sf) + + /* Implemented by aarch64_ld1<VALL:mode>. */ + BUILTIN_VALL (LOAD1, ld1, 0) + + /* Implemented by aarch64_st1<VALL:mode>. */ + BUILTIN_VALL (STORE1, st1, 0) + + /* Implemented by fma<mode>4. */ + BUILTIN_VDQF (TERNOP, fma, 4) + + /* Implemented by aarch64_simd_bsl<mode>. */ + BUILTIN_VDQQH (BSL_P, simd_bsl, 0) + BUILTIN_VSDQ_I_DI (BSL_U, simd_bsl, 0) + BUILTIN_VALLDIF (BSL_S, simd_bsl, 0) + + /* Implemented by aarch64_crypto_aes<op><mode>. */ + VAR1 (BINOPU, crypto_aese, 0, v16qi) + VAR1 (BINOPU, crypto_aesd, 0, v16qi) + VAR1 (UNOPU, crypto_aesmc, 0, v16qi) + VAR1 (UNOPU, crypto_aesimc, 0, v16qi) + + /* Implemented by aarch64_crypto_sha1<op><mode>. */ + VAR1 (UNOPU, crypto_sha1h, 0, si) + VAR1 (BINOPU, crypto_sha1su1, 0, v4si) + VAR1 (TERNOPU, crypto_sha1c, 0, v4si) + VAR1 (TERNOPU, crypto_sha1m, 0, v4si) + VAR1 (TERNOPU, crypto_sha1p, 0, v4si) + VAR1 (TERNOPU, crypto_sha1su0, 0, v4si) + + /* Implemented by aarch64_crypto_sha256<op><mode>. */ + VAR1 (TERNOPU, crypto_sha256h, 0, v4si) + VAR1 (TERNOPU, crypto_sha256h2, 0, v4si) + VAR1 (BINOPU, crypto_sha256su0, 0, v4si) + VAR1 (TERNOPU, crypto_sha256su1, 0, v4si) + + /* Implemented by aarch64_crypto_pmull<mode>. */ + VAR1 (BINOPP, crypto_pmull, 0, di) + VAR1 (BINOPP, crypto_pmull, 0, v2di) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md new file mode 100644 index 000000000..6048d605c --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md @@ -0,0 +1,4363 @@ +;; Machine description for AArch64 AdvSIMD architecture. +;; Copyright (C) 2011-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_expand "mov<mode>" + [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "") + (match_operand:VALL 1 "aarch64_simd_general_operand" ""))] + "TARGET_SIMD" + " + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (<MODE>mode, operands[1]); + " +) + +(define_expand "movmisalign<mode>" + [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "") + (match_operand:VALL 1 "aarch64_simd_general_operand" ""))] + "TARGET_SIMD" +{ + /* This pattern is not permitted to fail during expansion: if both arguments + are non-registers (e.g. memory := constant, which can be created by the + auto-vectorizer), force operand 1 into a register. */ + if (!register_operand (operands[0], <MODE>mode) + && !register_operand (operands[1], <MODE>mode)) + operands[1] = force_reg (<MODE>mode, operands[1]); +}) + +(define_insn "aarch64_simd_dup<mode>" + [(set (match_operand:VDQ 0 "register_operand" "=w, w") + (vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r, w")))] + "TARGET_SIMD" + "@ + dup\\t%0.<Vtype>, %<vw>1 + dup\\t%0.<Vtype>, %1.<Vetype>[0]" + [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")] +) + +(define_insn "aarch64_simd_dup<mode>" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))] + "TARGET_SIMD" + "dup\\t%0.<Vtype>, %1.<Vetype>[0]" + [(set_attr "type" "neon_dup<q>")] +) + +(define_insn "aarch64_dup_lane<mode>" + [(set (match_operand:VALL 0 "register_operand" "=w") + (vec_duplicate:VALL + (vec_select:<VEL> + (match_operand:VALL 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]) + )))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; + } + [(set_attr "type" "neon_dup<q>")] +) + +(define_insn "aarch64_dup_lane_<vswap_width_name><mode>" + [(set (match_operand:VALL 0 "register_operand" "=w") + (vec_duplicate:VALL + (vec_select:<VEL> + (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]) + )))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, + INTVAL (operands[2]))); + return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; + } + [(set_attr "type" "neon_dup<q>")] +) + +(define_insn "*aarch64_simd_mov<mode>" + [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand" + "=w, m, w, ?r, ?w, ?r, w") + (match_operand:VD 1 "aarch64_simd_general_operand" + "m, w, w, w, r, r, Dn"))] + "TARGET_SIMD + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[1], <MODE>mode))" +{ + switch (which_alternative) + { + case 0: return "ldr\\t%d0, %1"; + case 1: return "str\\t%d1, %0"; + case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>"; + case 3: return "umov\t%0, %1.d[0]"; + case 4: return "ins\t%0.d[0], %1"; + case 5: return "mov\t%0, %1"; + case 6: + return aarch64_output_simd_mov_immediate (operands[1], + <MODE>mode, 64); + default: gcc_unreachable (); + } +} + [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\ + neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\ + mov_reg, neon_move<q>")] +) + +(define_insn "*aarch64_simd_mov<mode>" + [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand" + "=w, m, w, ?r, ?w, ?r, w") + (match_operand:VQ 1 "aarch64_simd_general_operand" + "m, w, w, w, r, r, Dn"))] + "TARGET_SIMD + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[1], <MODE>mode))" +{ + switch (which_alternative) + { + case 0: + return "ldr\\t%q0, %1"; + case 1: + return "str\\t%q1, %0"; + case 2: + return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>"; + case 3: + case 4: + case 5: + return "#"; + case 6: + return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128); + default: + gcc_unreachable (); + } +} + [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\ + neon_logic<q>, multiple, multiple, multiple,\ + neon_move<q>") + (set_attr "length" "4,4,4,8,8,8,4")] +) + +(define_split + [(set (match_operand:VQ 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[2], src[2]; + + dest[0] = gen_rtx_REG (DImode, rdest); + src[0] = gen_rtx_REG (DImode, rsrc); + dest[1] = gen_rtx_REG (DImode, rdest + 1); + src[1] = gen_rtx_REG (DImode, rsrc + 1); + + aarch64_simd_disambiguate_copy (operands, dest, src, 2); +}) + +(define_split + [(set (match_operand:VQ 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed + && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) + || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" + [(const_int 0)] +{ + aarch64_split_simd_move (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_split_simd_mov<mode>" + [(set (match_operand:VQ 0) + (match_operand:VQ 1))] + "TARGET_SIMD" + { + rtx dst = operands[0]; + rtx src = operands[1]; + + if (GP_REGNUM_P (REGNO (src))) + { + rtx src_low_part = gen_lowpart (<VHALF>mode, src); + rtx src_high_part = gen_highpart (<VHALF>mode, src); + + emit_insn + (gen_move_lo_quad_<mode> (dst, src_low_part)); + emit_insn + (gen_move_hi_quad_<mode> (dst, src_high_part)); + } + + else + { + rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); + rtx dst_high_part = gen_highpart (<VHALF>mode, dst); + rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); + rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + + emit_insn + (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo)); + emit_insn + (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi)); + } + DONE; + } +) + +(define_insn "aarch64_simd_mov_from_<mode>low" + [(set (match_operand:<VHALF> 0 "register_operand" "=r") + (vec_select:<VHALF> + (match_operand:VQ 1 "register_operand" "w") + (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))] + "TARGET_SIMD && reload_completed" + "umov\t%0, %1.d[0]" + [(set_attr "type" "neon_to_gp<q>") + (set_attr "length" "4") + ]) + +(define_insn "aarch64_simd_mov_from_<mode>high" + [(set (match_operand:<VHALF> 0 "register_operand" "=r") + (vec_select:<VHALF> + (match_operand:VQ 1 "register_operand" "w") + (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))] + "TARGET_SIMD && reload_completed" + "umov\t%0, %1.d[1]" + [(set_attr "type" "neon_to_gp<q>") + (set_attr "length" "4") + ]) + +(define_insn "orn<mode>3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (ior:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w")) + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" + [(set_attr "type" "neon_logic<q>")] +) + +(define_insn "bic<mode>3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (and:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w")) + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" + [(set_attr "type" "neon_logic<q>")] +) + +(define_insn "add<mode>3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (plus:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_add<q>")] +) + +(define_insn "sub<mode>3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (minus:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_sub<q>")] +) + +(define_insn "mul<mode>3" + [(set (match_operand:VDQM 0 "register_operand" "=w") + (mult:VDQM (match_operand:VDQM 1 "register_operand" "w") + (match_operand:VDQM 2 "register_operand" "w")))] + "TARGET_SIMD" + "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_mul_<Vetype><q>")] +) + +(define_insn "*aarch64_mul3_elt<mode>" + [(set (match_operand:VMUL 0 "register_operand" "=w") + (mult:VMUL + (vec_duplicate:VMUL + (vec_select:<VEL> + (match_operand:VMUL 1 "register_operand" "<h_con>") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VMUL 3 "register_operand" "w")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; + } + [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] +) + +(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>" + [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w") + (mult:VMUL_CHANGE_NLANES + (vec_duplicate:VMUL_CHANGE_NLANES + (vec_select:<VEL> + (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, + INTVAL (operands[2]))); + return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; + } + [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] +) + +(define_insn "*aarch64_mul3_elt_to_128df" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (mult:V2DF + (vec_duplicate:V2DF + (match_operand:DF 2 "register_operand" "w")) + (match_operand:V2DF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fmul\\t%0.2d, %1.2d, %2.d[0]" + [(set_attr "type" "neon_fp_mul_d_scalar_q")] +) + +(define_insn "*aarch64_mul3_elt_to_64v2df" + [(set (match_operand:DF 0 "register_operand" "=w") + (mult:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand")])) + (match_operand:DF 3 "register_operand" "w")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); + return "fmul\\t%0.2d, %3.2d, %1.d[%2]"; + } + [(set_attr "type" "neon_fp_mul_d_scalar_q")] +) + +(define_insn "neg<mode>2" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))] + "TARGET_SIMD" + "neg\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "type" "neon_neg<q>")] +) + +(define_insn "abs<mode>2" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (abs:VDQ (match_operand:VDQ 1 "register_operand" "w")))] + "TARGET_SIMD" + "abs\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "type" "neon_abs<q>")] +) + +(define_insn "abd<mode>_3" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (abs:VDQ_BHSI (minus:VDQ_BHSI + (match_operand:VDQ_BHSI 1 "register_operand" "w") + (match_operand:VDQ_BHSI 2 "register_operand" "w"))))] + "TARGET_SIMD" + "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_abd<q>")] +) + +(define_insn "aba<mode>_3" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI + (match_operand:VDQ_BHSI 1 "register_operand" "w") + (match_operand:VDQ_BHSI 2 "register_operand" "w"))) + (match_operand:VDQ_BHSI 3 "register_operand" "0")))] + "TARGET_SIMD" + "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_arith_acc<q>")] +) + +(define_insn "fabd<mode>_3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (abs:VDQF (minus:VDQF + (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w"))))] + "TARGET_SIMD" + "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_fp_abd_<Vetype><q>")] +) + +(define_insn "*fabd_scalar<mode>3" + [(set (match_operand:GPF 0 "register_operand" "=w") + (abs:GPF (minus:GPF + (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w"))))] + "TARGET_SIMD" + "fabd\t%<s>0, %<s>1, %<s>2" + [(set_attr "type" "neon_fp_abd_<Vetype><q>")] +) + +(define_insn "and<mode>3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (and:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" + [(set_attr "type" "neon_logic<q>")] +) + +(define_insn "ior<mode>3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (ior:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" + [(set_attr "type" "neon_logic<q>")] +) + +(define_insn "xor<mode>3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (xor:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" + [(set_attr "type" "neon_logic<q>")] +) + +(define_insn "one_cmpl<mode>2" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (not:VDQ (match_operand:VDQ 1 "register_operand" "w")))] + "TARGET_SIMD" + "not\t%0.<Vbtype>, %1.<Vbtype>" + [(set_attr "type" "neon_logic<q>")] +) + +(define_insn "aarch64_simd_vec_set<mode>" + [(set (match_operand:VQ_S 0 "register_operand" "=w,w") + (vec_merge:VQ_S + (vec_duplicate:VQ_S + (match_operand:<VEL> 1 "register_operand" "r,w")) + (match_operand:VQ_S 3 "register_operand" "0,0") + (match_operand:SI 2 "immediate_operand" "i,i")))] + "TARGET_SIMD" + { + int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); + operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); + switch (which_alternative) + { + case 0: + return "ins\\t%0.<Vetype>[%p2], %w1"; + case 1: + return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; + default: + gcc_unreachable (); + } + } + [(set_attr "type" "neon_from_gp<q>, neon_ins<q>")] +) + +(define_insn "aarch64_simd_lshr<mode>" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (lshiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "aarch64_simd_rshift_imm" "Dr")))] + "TARGET_SIMD" + "ushr\t%0.<Vtype>, %1.<Vtype>, %2" + [(set_attr "type" "neon_shift_imm<q>")] +) + +(define_insn "aarch64_simd_ashr<mode>" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (ashiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "aarch64_simd_rshift_imm" "Dr")))] + "TARGET_SIMD" + "sshr\t%0.<Vtype>, %1.<Vtype>, %2" + [(set_attr "type" "neon_shift_imm<q>")] +) + +(define_insn "aarch64_simd_imm_shl<mode>" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (ashift:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "aarch64_simd_lshift_imm" "Dl")))] + "TARGET_SIMD" + "shl\t%0.<Vtype>, %1.<Vtype>, %2" + [(set_attr "type" "neon_shift_imm<q>")] +) + +(define_insn "aarch64_simd_reg_sshl<mode>" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (ashift:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_shift_reg<q>")] +) + +(define_insn "aarch64_simd_reg_shl<mode>_unsigned" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")] + UNSPEC_ASHIFT_UNSIGNED))] + "TARGET_SIMD" + "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_shift_reg<q>")] +) + +(define_insn "aarch64_simd_reg_shl<mode>_signed" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")] + UNSPEC_ASHIFT_SIGNED))] + "TARGET_SIMD" + "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_shift_reg<q>")] +) + +(define_expand "ashl<mode>3" + [(match_operand:VDQ 0 "register_operand" "") + (match_operand:VDQ 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")] + "TARGET_SIMD" +{ + int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; + int shift_amount; + + if (CONST_INT_P (operands[2])) + { + shift_amount = INTVAL (operands[2]); + if (shift_amount >= 0 && shift_amount < bit_width) + { + rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, + shift_amount); + emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0], + operands[1], + tmp)); + DONE; + } + else + { + operands[2] = force_reg (SImode, operands[2]); + } + } + else if (MEM_P (operands[2])) + { + operands[2] = force_reg (SImode, operands[2]); + } + + if (REG_P (operands[2])) + { + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_aarch64_simd_dup<mode> (tmp, + convert_to_mode (<VEL>mode, + operands[2], + 0))); + emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], + tmp)); + DONE; + } + else + FAIL; +} +) + +(define_expand "lshr<mode>3" + [(match_operand:VDQ 0 "register_operand" "") + (match_operand:VDQ 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")] + "TARGET_SIMD" +{ + int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; + int shift_amount; + + if (CONST_INT_P (operands[2])) + { + shift_amount = INTVAL (operands[2]); + if (shift_amount > 0 && shift_amount <= bit_width) + { + rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, + shift_amount); + emit_insn (gen_aarch64_simd_lshr<mode> (operands[0], + operands[1], + tmp)); + DONE; + } + else + operands[2] = force_reg (SImode, operands[2]); + } + else if (MEM_P (operands[2])) + { + operands[2] = force_reg (SImode, operands[2]); + } + + if (REG_P (operands[2])) + { + rtx tmp = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (<MODE>mode); + emit_insn (gen_negsi2 (tmp, operands[2])); + emit_insn (gen_aarch64_simd_dup<mode> (tmp1, + convert_to_mode (<VEL>mode, + tmp, 0))); + emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], + operands[1], + tmp1)); + DONE; + } + else + FAIL; +} +) + +(define_expand "ashr<mode>3" + [(match_operand:VDQ 0 "register_operand" "") + (match_operand:VDQ 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")] + "TARGET_SIMD" +{ + int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; + int shift_amount; + + if (CONST_INT_P (operands[2])) + { + shift_amount = INTVAL (operands[2]); + if (shift_amount > 0 && shift_amount <= bit_width) + { + rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, + shift_amount); + emit_insn (gen_aarch64_simd_ashr<mode> (operands[0], + operands[1], + tmp)); + DONE; + } + else + operands[2] = force_reg (SImode, operands[2]); + } + else if (MEM_P (operands[2])) + { + operands[2] = force_reg (SImode, operands[2]); + } + + if (REG_P (operands[2])) + { + rtx tmp = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (<MODE>mode); + emit_insn (gen_negsi2 (tmp, operands[2])); + emit_insn (gen_aarch64_simd_dup<mode> (tmp1, + convert_to_mode (<VEL>mode, + tmp, 0))); + emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], + operands[1], + tmp1)); + DONE; + } + else + FAIL; +} +) + +(define_expand "vashl<mode>3" + [(match_operand:VDQ 0 "register_operand" "") + (match_operand:VDQ 1 "register_operand" "") + (match_operand:VDQ 2 "register_operand" "")] + "TARGET_SIMD" +{ + emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], + operands[2])); + DONE; +}) + +;; Using mode VQ_S as there is no V2DImode neg! +;; Negating individual lanes most certainly offsets the +;; gain from vectorization. +(define_expand "vashr<mode>3" + [(match_operand:VQ_S 0 "register_operand" "") + (match_operand:VQ_S 1 "register_operand" "") + (match_operand:VQ_S 2 "register_operand" "")] + "TARGET_SIMD" +{ + rtx neg = gen_reg_rtx (<MODE>mode); + emit (gen_neg<mode>2 (neg, operands[2])); + emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1], + neg)); + DONE; +}) + +;; DI vector shift +(define_expand "aarch64_ashr_simddi" + [(match_operand:DI 0 "register_operand" "=w") + (match_operand:DI 1 "register_operand" "w") + (match_operand:SI 2 "aarch64_shift_imm64_di" "")] + "TARGET_SIMD" + { + if (INTVAL (operands[2]) == 64) + emit_insn (gen_aarch64_sshr_simddi (operands[0], operands[1])); + else + emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2])); + DONE; + } +) + +;; SIMD shift by 64. This pattern is a special case as standard pattern does +;; not handle NEON shifts by 64. +(define_insn "aarch64_sshr_simddi" + [(set (match_operand:DI 0 "register_operand" "=w") + (unspec:DI + [(match_operand:DI 1 "register_operand" "w")] UNSPEC_SSHR64))] + "TARGET_SIMD" + "sshr\t%d0, %d1, 64" + [(set_attr "type" "neon_shift_imm")] +) + +(define_expand "vlshr<mode>3" + [(match_operand:VQ_S 0 "register_operand" "") + (match_operand:VQ_S 1 "register_operand" "") + (match_operand:VQ_S 2 "register_operand" "")] + "TARGET_SIMD" +{ + rtx neg = gen_reg_rtx (<MODE>mode); + emit (gen_neg<mode>2 (neg, operands[2])); + emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1], + neg)); + DONE; +}) + +(define_expand "aarch64_lshr_simddi" + [(match_operand:DI 0 "register_operand" "=w") + (match_operand:DI 1 "register_operand" "w") + (match_operand:SI 2 "aarch64_shift_imm64_di" "")] + "TARGET_SIMD" + { + if (INTVAL (operands[2]) == 64) + emit_insn (gen_aarch64_ushr_simddi (operands[0], operands[1])); + else + emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2])); + DONE; + } +) + +;; SIMD shift by 64. This pattern is a special case as standard pattern does +;; not handle NEON shifts by 64. +(define_insn "aarch64_ushr_simddi" + [(set (match_operand:DI 0 "register_operand" "=w") + (unspec:DI + [(match_operand:DI 1 "register_operand" "w")] UNSPEC_USHR64))] + "TARGET_SIMD" + "ushr\t%d0, %d1, 64" + [(set_attr "type" "neon_shift_imm")] +) + +(define_expand "vec_set<mode>" + [(match_operand:VQ_S 0 "register_operand") + (match_operand:<VEL> 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); + emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], + GEN_INT (elem), operands[0])); + DONE; + } +) + +(define_insn "aarch64_simd_vec_setv2di" + [(set (match_operand:V2DI 0 "register_operand" "=w,w") + (vec_merge:V2DI + (vec_duplicate:V2DI + (match_operand:DI 1 "register_operand" "r,w")) + (match_operand:V2DI 3 "register_operand" "0,0") + (match_operand:SI 2 "immediate_operand" "i,i")))] + "TARGET_SIMD" + { + int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2]))); + operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); + switch (which_alternative) + { + case 0: + return "ins\\t%0.d[%p2], %1"; + case 1: + return "ins\\t%0.d[%p2], %1.d[0]"; + default: + gcc_unreachable (); + } + } + [(set_attr "type" "neon_from_gp, neon_ins_q")] +) + +(define_expand "vec_setv2di" + [(match_operand:V2DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); + emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1], + GEN_INT (elem), operands[0])); + DONE; + } +) + +(define_insn "aarch64_simd_vec_set<mode>" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (vec_merge:VDQF + (vec_duplicate:VDQF + (match_operand:<VEL> 1 "register_operand" "w")) + (match_operand:VDQF 3 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")))] + "TARGET_SIMD" + { + int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); + + operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt); + return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; + } + [(set_attr "type" "neon_ins<q>")] +) + +(define_expand "vec_set<mode>" + [(match_operand:VDQF 0 "register_operand" "+w") + (match_operand:<VEL> 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_SIMD" + { + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); + emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], + GEN_INT (elem), operands[0])); + DONE; + } +) + + +(define_insn "aarch64_mla<mode>" + [(set (match_operand:VQ_S 0 "register_operand" "=w") + (plus:VQ_S (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w") + (match_operand:VQ_S 3 "register_operand" "w")) + (match_operand:VQ_S 1 "register_operand" "0")))] + "TARGET_SIMD" + "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" + [(set_attr "type" "neon_mla_<Vetype><q>")] +) + +(define_insn "*aarch64_mla_elt<mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (plus:VDQHS + (mult:VDQHS + (vec_duplicate:VDQHS + (vec_select:<VEL> + (match_operand:VDQHS 1 "register_operand" "<h_con>") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQHS 3 "register_operand" "w")) + (match_operand:VDQHS 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; + } + [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] +) + +(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (plus:VDQHS + (mult:VDQHS + (vec_duplicate:VDQHS + (vec_select:<VEL> + (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQHS 3 "register_operand" "w")) + (match_operand:VDQHS 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, + INTVAL (operands[2]))); + return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; + } + [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] +) + +(define_insn "aarch64_mls<mode>" + [(set (match_operand:VQ_S 0 "register_operand" "=w") + (minus:VQ_S (match_operand:VQ_S 1 "register_operand" "0") + (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w") + (match_operand:VQ_S 3 "register_operand" "w"))))] + "TARGET_SIMD" + "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" + [(set_attr "type" "neon_mla_<Vetype><q>")] +) + +(define_insn "*aarch64_mls_elt<mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (minus:VDQHS + (match_operand:VDQHS 4 "register_operand" "0") + (mult:VDQHS + (vec_duplicate:VDQHS + (vec_select:<VEL> + (match_operand:VDQHS 1 "register_operand" "<h_con>") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQHS 3 "register_operand" "w"))))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; + } + [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] +) + +(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (minus:VDQHS + (match_operand:VDQHS 4 "register_operand" "0") + (mult:VDQHS + (vec_duplicate:VDQHS + (vec_select:<VEL> + (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQHS 3 "register_operand" "w"))))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, + INTVAL (operands[2]))); + return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; + } + [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] +) + +;; Max/Min operations. +(define_insn "<su><maxmin><mode>3" + [(set (match_operand:VQ_S 0 "register_operand" "=w") + (MAXMIN:VQ_S (match_operand:VQ_S 1 "register_operand" "w") + (match_operand:VQ_S 2 "register_operand" "w")))] + "TARGET_SIMD" + "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_minmax<q>")] +) + +;; Move into low-half clearing high half to 0. + +(define_insn "move_lo_quad_<mode>" + [(set (match_operand:VQ 0 "register_operand" "=w,w,w") + (vec_concat:VQ + (match_operand:<VHALF> 1 "register_operand" "w,r,r") + (vec_duplicate:<VHALF> (const_int 0))))] + "TARGET_SIMD" + "@ + dup\\t%d0, %1.d[0] + fmov\\t%d0, %1 + dup\\t%d0, %1" + [(set_attr "type" "neon_dup<q>,fmov,neon_dup<q>") + (set_attr "simd" "yes,*,yes") + (set_attr "fp" "*,yes,*") + (set_attr "length" "4")] +) + +;; Move into high-half. + +(define_insn "aarch64_simd_move_hi_quad_<mode>" + [(set (match_operand:VQ 0 "register_operand" "+w,w") + (vec_concat:VQ + (vec_select:<VHALF> + (match_dup 0) + (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) + (match_operand:<VHALF> 1 "register_operand" "w,r")))] + "TARGET_SIMD" + "@ + ins\\t%0.d[1], %1.d[0] + ins\\t%0.d[1], %1" + [(set_attr "type" "neon_ins") + (set_attr "length" "4")] +) + +(define_expand "move_hi_quad_<mode>" + [(match_operand:VQ 0 "register_operand" "") + (match_operand:<VHALF> 1 "register_operand" "")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); + emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], + operands[1], p)); + DONE; +}) + +;; Narrowing operations. + +;; For doubles. +(define_insn "aarch64_simd_vec_pack_trunc_<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") + (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] + "TARGET_SIMD" + "xtn\\t%0.<Vntype>, %1.<Vtype>" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_expand "vec_pack_trunc_<mode>" + [(match_operand:<VNARROWD> 0 "register_operand" "") + (match_operand:VDN 1 "register_operand" "") + (match_operand:VDN 2 "register_operand" "")] + "TARGET_SIMD" +{ + rtx tempreg = gen_reg_rtx (<VDBL>mode); + int lo = BYTES_BIG_ENDIAN ? 2 : 1; + int hi = BYTES_BIG_ENDIAN ? 1 : 2; + + emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo])); + emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi])); + emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg)); + DONE; +}) + +;; For quads. + +(define_insn "vec_pack_trunc_<mode>" + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "+&w") + (vec_concat:<VNARROWQ2> + (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")) + (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] + "TARGET_SIMD" + { + if (BYTES_BIG_ENDIAN) + return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>"; + else + return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>"; + } + [(set_attr "type" "multiple") + (set_attr "length" "8")] +) + +;; Widening operations. + +(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "vect_par_cnst_lo_half" "") + )))] + "TARGET_SIMD" + "<su>shll %0.<Vwtype>, %1.<Vhalftype>, 0" + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "vect_par_cnst_hi_half" "") + )))] + "TARGET_SIMD" + "<su>shll2 %0.<Vwtype>, %1.<Vtype>, 0" + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_expand "vec_unpack<su>_hi_<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "") + (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0], + operands[1], p)); + DONE; + } +) + +(define_expand "vec_unpack<su>_lo_<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "") + (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); + emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0], + operands[1], p)); + DONE; + } +) + +;; Widening arithmetic. + +(define_insn "*aarch64_<su>mlal_lo<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (plus:<VWIDE> + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3)))) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" + [(set_attr "type" "neon_mla_<Vetype>_long")] +) + +(define_insn "*aarch64_<su>mlal_hi<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (plus:<VWIDE> + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3)))) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" + [(set_attr "type" "neon_mla_<Vetype>_long")] +) + +(define_insn "*aarch64_<su>mlsl_lo<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3))))))] + "TARGET_SIMD" + "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" + [(set_attr "type" "neon_mla_<Vetype>_long")] +) + +(define_insn "*aarch64_<su>mlsl_hi<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3))))))] + "TARGET_SIMD" + "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" + [(set_attr "type" "neon_mla_<Vetype>_long")] +) + +(define_insn "*aarch64_<su>mlal<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (plus:<VWIDE> + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 1 "register_operand" "w")) + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 2 "register_operand" "w"))) + (match_operand:<VWIDE> 3 "register_operand" "0")))] + "TARGET_SIMD" + "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_mla_<Vetype>_long")] +) + +(define_insn "*aarch64_<su>mlsl<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 2 "register_operand" "w")) + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 3 "register_operand" "w")))))] + "TARGET_SIMD" + "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" + [(set_attr "type" "neon_mla_<Vetype>_long")] +) + +(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_dup 3)))))] + "TARGET_SIMD" + "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" + [(set_attr "type" "neon_mul_<Vetype>_long")] +) + +(define_expand "vec_widen_<su>mult_lo_<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "") + (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) + (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); + emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0], + operands[1], + operands[2], p)); + DONE; + } +) + +(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_dup 3)))))] + "TARGET_SIMD" + "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_mul_<Vetype>_long")] +) + +(define_expand "vec_widen_<su>mult_hi_<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "") + (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) + (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0], + operands[1], + operands[2], p)); + DONE; + + } +) + +;; FP vector operations. +;; AArch64 AdvSIMD supports single-precision (32-bit) and +;; double-precision (64-bit) floating-point data types and arithmetic as +;; defined by the IEEE 754-2008 standard. This makes them vectorizable +;; without the need for -ffast-math or -funsafe-math-optimizations. +;; +;; Floating-point operations can raise an exception. Vectorizing such +;; operations are safe because of reasons explained below. +;; +;; ARMv8 permits an extension to enable trapped floating-point +;; exception handling, however this is an optional feature. In the +;; event of a floating-point exception being raised by vectorised +;; code then: +;; 1. If trapped floating-point exceptions are available, then a trap +;; will be taken when any lane raises an enabled exception. A trap +;; handler may determine which lane raised the exception. +;; 2. Alternatively a sticky exception flag is set in the +;; floating-point status register (FPSR). Software may explicitly +;; test the exception flags, in which case the tests will either +;; prevent vectorisation, allowing precise identification of the +;; failing operation, or if tested outside of vectorisable regions +;; then the specific operation and lane are not of interest. + +;; FP arithmetic operations. + +(define_insn "add<mode>3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (plus:VDQF (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")))] + "TARGET_SIMD" + "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_fp_addsub_<Vetype><q>")] +) + +(define_insn "sub<mode>3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (minus:VDQF (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")))] + "TARGET_SIMD" + "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_fp_addsub_<Vetype><q>")] +) + +(define_insn "mul<mode>3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (mult:VDQF (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")))] + "TARGET_SIMD" + "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_fp_mul_<Vetype><q>")] +) + +(define_insn "div<mode>3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (div:VDQF (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")))] + "TARGET_SIMD" + "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_fp_div_<Vetype><q>")] +) + +(define_insn "neg<mode>2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fneg\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "type" "neon_fp_neg_<Vetype><q>")] +) + +(define_insn "abs<mode>2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fabs\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "type" "neon_fp_abs_<Vetype><q>")] +) + +(define_insn "fma<mode>4" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (fma:VDQF (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w") + (match_operand:VDQF 3 "register_operand" "0")))] + "TARGET_SIMD" + "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_fp_mla_<Vetype><q>")] +) + +(define_insn "*aarch64_fma4_elt<mode>" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (fma:VDQF + (vec_duplicate:VDQF + (vec_select:<VEL> + (match_operand:VDQF 1 "register_operand" "<h_con>") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQF 3 "register_operand" "w") + (match_operand:VDQF 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; + } + [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] +) + +(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>" + [(set (match_operand:VDQSF 0 "register_operand" "=w") + (fma:VDQSF + (vec_duplicate:VDQSF + (vec_select:<VEL> + (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQSF 3 "register_operand" "w") + (match_operand:VDQSF 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, + INTVAL (operands[2]))); + return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; + } + [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] +) + +(define_insn "*aarch64_fma4_elt_to_128df" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (fma:V2DF + (vec_duplicate:V2DF + (match_operand:DF 1 "register_operand" "w")) + (match_operand:V2DF 2 "register_operand" "w") + (match_operand:V2DF 3 "register_operand" "0")))] + "TARGET_SIMD" + "fmla\\t%0.2d, %2.2d, %1.2d[0]" + [(set_attr "type" "neon_fp_mla_d_scalar_q")] +) + +(define_insn "*aarch64_fma4_elt_to_64v2df" + [(set (match_operand:DF 0 "register_operand" "=w") + (fma:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand")])) + (match_operand:DF 3 "register_operand" "w") + (match_operand:DF 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); + return "fmla\\t%0.2d, %3.2d, %1.2d[%2]"; + } + [(set_attr "type" "neon_fp_mla_d_scalar_q")] +) + +(define_insn "fnma<mode>4" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (fma:VDQF + (match_operand:VDQF 1 "register_operand" "w") + (neg:VDQF + (match_operand:VDQF 2 "register_operand" "w")) + (match_operand:VDQF 3 "register_operand" "0")))] + "TARGET_SIMD" + "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_fp_mla_<Vetype><q>")] +) + +(define_insn "*aarch64_fnma4_elt<mode>" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (fma:VDQF + (neg:VDQF + (match_operand:VDQF 3 "register_operand" "w")) + (vec_duplicate:VDQF + (vec_select:<VEL> + (match_operand:VDQF 1 "register_operand" "<h_con>") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQF 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; + } + [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] +) + +(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>" + [(set (match_operand:VDQSF 0 "register_operand" "=w") + (fma:VDQSF + (neg:VDQSF + (match_operand:VDQSF 3 "register_operand" "w")) + (vec_duplicate:VDQSF + (vec_select:<VEL> + (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQSF 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, + INTVAL (operands[2]))); + return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; + } + [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] +) + +(define_insn "*aarch64_fnma4_elt_to_128df" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (fma:V2DF + (neg:V2DF + (match_operand:V2DF 2 "register_operand" "w")) + (vec_duplicate:V2DF + (match_operand:DF 1 "register_operand" "w")) + (match_operand:V2DF 3 "register_operand" "0")))] + "TARGET_SIMD" + "fmls\\t%0.2d, %2.2d, %1.2d[0]" + [(set_attr "type" "neon_fp_mla_d_scalar_q")] +) + +(define_insn "*aarch64_fnma4_elt_to_64v2df" + [(set (match_operand:DF 0 "register_operand" "=w") + (fma:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand")])) + (neg:DF + (match_operand:DF 3 "register_operand" "w")) + (match_operand:DF 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); + return "fmls\\t%0.2d, %3.2d, %1.2d[%2]"; + } + [(set_attr "type" "neon_fp_mla_d_scalar_q")] +) + +;; Vector versions of the floating-point frint patterns. +;; Expands to btrunc, ceil, floor, nearbyint, rint, round. +(define_insn "<frint_pattern><mode>2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] + FRINT))] + "TARGET_SIMD" + "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "type" "neon_fp_round_<Vetype><q>")] +) + +;; Vector versions of the fcvt standard patterns. +;; Expands to lbtrunc, lround, lceil, lfloor +(define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2" + [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") + (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> + [(match_operand:VDQF 1 "register_operand" "w")] + FCVT)))] + "TARGET_SIMD" + "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "type" "neon_fp_to_int_<Vetype><q>")] +) + +(define_expand "<optab><VDQF:mode><fcvt_target>2" + [(set (match_operand:<FCVT_TARGET> 0 "register_operand") + (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> + [(match_operand:VDQF 1 "register_operand")] + UNSPEC_FRINTZ)))] + "TARGET_SIMD" + {}) + +(define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2" + [(set (match_operand:<FCVT_TARGET> 0 "register_operand") + (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> + [(match_operand:VDQF 1 "register_operand")] + UNSPEC_FRINTZ)))] + "TARGET_SIMD" + {}) + +(define_expand "ftrunc<VDQF:mode>2" + [(set (match_operand:VDQF 0 "register_operand") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand")] + UNSPEC_FRINTZ))] + "TARGET_SIMD" + {}) + +(define_insn "<optab><fcvt_target><VDQF:mode>2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (FLOATUORS:VDQF + (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))] + "TARGET_SIMD" + "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "type" "neon_int_to_fp_<Vetype><q>")] +) + +;; Conversions between vectors of floats and doubles. +;; Contains a mix of patterns to match standard pattern names +;; and those for intrinsics. + +;; Float widening operations. + +(define_insn "vec_unpacks_lo_v4sf" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "register_operand" "w") + (parallel [(const_int 0) (const_int 1)]) + )))] + "TARGET_SIMD" + "fcvtl\\t%0.2d, %1.2s" + [(set_attr "type" "neon_fp_cvt_widen_s")] +) + +(define_insn "aarch64_float_extend_lo_v2df" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (float_extend:V2DF + (match_operand:V2SF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fcvtl\\t%0.2d, %1.2s" + [(set_attr "type" "neon_fp_cvt_widen_s")] +) + +(define_insn "vec_unpacks_hi_v4sf" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "register_operand" "w") + (parallel [(const_int 2) (const_int 3)]) + )))] + "TARGET_SIMD" + "fcvtl2\\t%0.2d, %1.4s" + [(set_attr "type" "neon_fp_cvt_widen_s")] +) + +;; Float narrowing operations. + +(define_insn "aarch64_float_truncate_lo_v2sf" + [(set (match_operand:V2SF 0 "register_operand" "=w") + (float_truncate:V2SF + (match_operand:V2DF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fcvtn\\t%0.2s, %1.2d" + [(set_attr "type" "neon_fp_cvt_narrow_d_q")] +) + +(define_insn "aarch64_float_truncate_hi_v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (vec_concat:V4SF + (match_operand:V2SF 1 "register_operand" "0") + (float_truncate:V2SF + (match_operand:V2DF 2 "register_operand" "w"))))] + "TARGET_SIMD" + "fcvtn2\\t%0.4s, %2.2d" + [(set_attr "type" "neon_fp_cvt_narrow_d_q")] +) + +(define_expand "vec_pack_trunc_v2df" + [(set (match_operand:V4SF 0 "register_operand") + (vec_concat:V4SF + (float_truncate:V2SF + (match_operand:V2DF 1 "register_operand")) + (float_truncate:V2SF + (match_operand:V2DF 2 "register_operand")) + ))] + "TARGET_SIMD" + { + rtx tmp = gen_reg_rtx (V2SFmode); + int lo = BYTES_BIG_ENDIAN ? 2 : 1; + int hi = BYTES_BIG_ENDIAN ? 1 : 2; + + emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo])); + emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0], + tmp, operands[hi])); + DONE; + } +) + +(define_expand "vec_pack_trunc_df" + [(set (match_operand:V2SF 0 "register_operand") + (vec_concat:V2SF + (float_truncate:SF + (match_operand:DF 1 "register_operand")) + (float_truncate:SF + (match_operand:DF 2 "register_operand")) + ))] + "TARGET_SIMD" + { + rtx tmp = gen_reg_rtx (V2SFmode); + int lo = BYTES_BIG_ENDIAN ? 2 : 1; + int hi = BYTES_BIG_ENDIAN ? 1 : 2; + + emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo])); + emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi])); + emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp)); + DONE; + } +) + +(define_insn "aarch64_vmls<mode>" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (minus:VDQF (match_operand:VDQF 1 "register_operand" "0") + (mult:VDQF (match_operand:VDQF 2 "register_operand" "w") + (match_operand:VDQF 3 "register_operand" "w"))))] + "TARGET_SIMD" + "fmls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" + [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] +) + +;; FP Max/Min +;; Max/Min are introduced by idiom recognition by GCC's mid-end. An +;; expression like: +;; a = (b < c) ? b : c; +;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled +;; either explicitly or indirectly via -ffast-math. +;; +;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. +;; The 'smax' and 'smin' RTL standard pattern names do not specify which +;; operand will be returned when both operands are zero (i.e. they may not +;; honour signed zeroes), or when either operand is NaN. Therefore GCC +;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring +;; NaNs. + +(define_insn "<su><maxmin><mode>3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")))] + "TARGET_SIMD" + "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_fp_minmax_<Vetype><q>")] +) + +(define_insn "<maxmin_uns><mode>3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")] + FMAXMIN_UNS))] + "TARGET_SIMD" + "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_fp_minmax_<Vetype><q>")] +) + +;; 'across lanes' add. + +(define_insn "reduc_<sur>plus_<mode>" + [(set (match_operand:VDQV 0 "register_operand" "=w") + (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] + SUADDV))] + "TARGET_SIMD" + "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>" + [(set_attr "type" "neon_reduc_add<q>")] +) + +(define_insn "reduc_<sur>plus_v2si" + [(set (match_operand:V2SI 0 "register_operand" "=w") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] + SUADDV))] + "TARGET_SIMD" + "addp\\t%0.2s, %1.2s, %1.2s" + [(set_attr "type" "neon_reduc_add")] +) + +(define_insn "reduc_splus_<mode>" + [(set (match_operand:V2F 0 "register_operand" "=w") + (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] + UNSPEC_FADDV))] + "TARGET_SIMD" + "faddp\\t%<Vetype>0, %1.<Vtype>" + [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")] +) + +(define_insn "aarch64_addpv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] + UNSPEC_FADDV))] + "TARGET_SIMD" + "faddp\\t%0.4s, %1.4s, %1.4s" + [(set_attr "type" "neon_fp_reduc_add_s_q")] +) + +(define_expand "reduc_splus_v4sf" + [(set (match_operand:V4SF 0 "register_operand") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] + UNSPEC_FADDV))] + "TARGET_SIMD" +{ + emit_insn (gen_aarch64_addpv4sf (operands[0], operands[1])); + emit_insn (gen_aarch64_addpv4sf (operands[0], operands[0])); + DONE; +}) + +(define_insn "clz<mode>2" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] + "TARGET_SIMD" + "clz\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "type" "neon_cls<q>")] +) + +;; 'across lanes' max and min ops. + +(define_insn "reduc_<maxmin_uns>_<mode>" + [(set (match_operand:VDQV_S 0 "register_operand" "=w") + (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")] + MAXMINV))] + "TARGET_SIMD" + "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>" + [(set_attr "type" "neon_reduc_minmax<q>")] +) + +(define_insn "reduc_<maxmin_uns>_v2si" + [(set (match_operand:V2SI 0 "register_operand" "=w") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] + MAXMINV))] + "TARGET_SIMD" + "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s" + [(set_attr "type" "neon_reduc_minmax")] +) + +(define_insn "reduc_<maxmin_uns>_<mode>" + [(set (match_operand:V2F 0 "register_operand" "=w") + (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] + FMAXMINV))] + "TARGET_SIMD" + "<maxmin_uns_op>p\\t%<Vetype>0, %1.<Vtype>" + [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")] +) + +(define_insn "reduc_<maxmin_uns>_v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] + FMAXMINV))] + "TARGET_SIMD" + "<maxmin_uns_op>v\\t%s0, %1.4s" + [(set_attr "type" "neon_fp_reduc_minmax_s_q")] +) + +;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register +;; allocation. +;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which +;; to select. +;; +;; Thus our BSL is of the form: +;; op0 = bsl (mask, op2, op3) +;; We can use any of: +;; +;; if (op0 = mask) +;; bsl mask, op1, op2 +;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0) +;; bit op0, op2, mask +;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0) +;; bif op0, op1, mask + +(define_insn "aarch64_simd_bsl<mode>_internal" + [(set (match_operand:VALLDIF 0 "register_operand" "=w,w,w") + (ior:VALLDIF + (and:VALLDIF + (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w") + (match_operand:VALLDIF 2 "register_operand" " w,w,0")) + (and:VALLDIF + (not:<V_cmp_result> + (match_dup:<V_cmp_result> 1)) + (match_operand:VALLDIF 3 "register_operand" " w,0,w")) + ))] + "TARGET_SIMD" + "@ + bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> + bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> + bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>" + [(set_attr "type" "neon_bsl<q>")] +) + +(define_expand "aarch64_simd_bsl<mode>" + [(match_operand:VALLDIF 0 "register_operand") + (match_operand:<V_cmp_result> 1 "register_operand") + (match_operand:VALLDIF 2 "register_operand") + (match_operand:VALLDIF 3 "register_operand")] + "TARGET_SIMD" +{ + /* We can't alias operands together if they have different modes. */ + operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]); + emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +(define_expand "aarch64_vcond_internal<mode><mode>" + [(set (match_operand:VDQ 0 "register_operand") + (if_then_else:VDQ + (match_operator 3 "comparison_operator" + [(match_operand:VDQ 4 "register_operand") + (match_operand:VDQ 5 "nonmemory_operand")]) + (match_operand:VDQ 1 "nonmemory_operand") + (match_operand:VDQ 2 "nonmemory_operand")))] + "TARGET_SIMD" +{ + int inverse = 0, has_zero_imm_form = 0; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx mask = gen_reg_rtx (<MODE>mode); + + switch (GET_CODE (operands[3])) + { + case LE: + case LT: + case NE: + inverse = 1; + /* Fall through. */ + case GE: + case GT: + case EQ: + has_zero_imm_form = 1; + break; + case LEU: + case LTU: + inverse = 1; + break; + default: + break; + } + + if (!REG_P (operands[5]) + && (operands[5] != CONST0_RTX (<MODE>mode) || !has_zero_imm_form)) + operands[5] = force_reg (<MODE>mode, operands[5]); + + switch (GET_CODE (operands[3])) + { + case LT: + case GE: + emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5])); + break; + + case LE: + case GT: + emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5])); + break; + + case LTU: + case GEU: + emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5])); + break; + + case LEU: + case GTU: + emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5])); + break; + + case NE: + case EQ: + emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5])); + break; + + default: + gcc_unreachable (); + } + + if (inverse) + { + op1 = operands[2]; + op2 = operands[1]; + } + + /* If we have (a = (b CMP c) ? -1 : 0); + Then we can simply move the generated mask. */ + + if (op1 == CONSTM1_RTX (<V_cmp_result>mode) + && op2 == CONST0_RTX (<V_cmp_result>mode)) + emit_move_insn (operands[0], mask); + else + { + if (!REG_P (op1)) + op1 = force_reg (<MODE>mode, op1); + if (!REG_P (op2)) + op2 = force_reg (<MODE>mode, op2); + emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, + op1, op2)); + } + + DONE; +}) + +(define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>" + [(set (match_operand:VDQF_COND 0 "register_operand") + (if_then_else:VDQF + (match_operator 3 "comparison_operator" + [(match_operand:VDQF 4 "register_operand") + (match_operand:VDQF 5 "nonmemory_operand")]) + (match_operand:VDQF_COND 1 "nonmemory_operand") + (match_operand:VDQF_COND 2 "nonmemory_operand")))] + "TARGET_SIMD" +{ + int inverse = 0; + int use_zero_form = 0; + int swap_bsl_operands = 0; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode); + rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode); + + rtx (*base_comparison) (rtx, rtx, rtx); + rtx (*complimentary_comparison) (rtx, rtx, rtx); + + switch (GET_CODE (operands[3])) + { + case GE: + case GT: + case LE: + case LT: + case EQ: + if (operands[5] == CONST0_RTX (<MODE>mode)) + { + use_zero_form = 1; + break; + } + /* Fall through. */ + default: + if (!REG_P (operands[5])) + operands[5] = force_reg (<VDQF:MODE>mode, operands[5]); + } + + switch (GET_CODE (operands[3])) + { + case LT: + case UNLT: + inverse = 1; + /* Fall through. */ + case GE: + case UNGE: + case ORDERED: + case UNORDERED: + base_comparison = gen_aarch64_cmge<VDQF:mode>; + complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>; + break; + case LE: + case UNLE: + inverse = 1; + /* Fall through. */ + case GT: + case UNGT: + base_comparison = gen_aarch64_cmgt<VDQF:mode>; + complimentary_comparison = gen_aarch64_cmge<VDQF:mode>; + break; + case EQ: + case NE: + case UNEQ: + base_comparison = gen_aarch64_cmeq<VDQF:mode>; + complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>; + break; + default: + gcc_unreachable (); + } + + switch (GET_CODE (operands[3])) + { + case LT: + case LE: + case GT: + case GE: + case EQ: + /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. + As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: + a GE b -> a GE b + a GT b -> a GT b + a LE b -> b GE a + a LT b -> b GT a + a EQ b -> a EQ b + Note that there also exist direct comparison against 0 forms, + so catch those as a special case. */ + if (use_zero_form) + { + inverse = 0; + switch (GET_CODE (operands[3])) + { + case LT: + base_comparison = gen_aarch64_cmlt<VDQF:mode>; + break; + case LE: + base_comparison = gen_aarch64_cmle<VDQF:mode>; + break; + default: + /* Do nothing, other zero form cases already have the correct + base_comparison. */ + break; + } + } + + if (!inverse) + emit_insn (base_comparison (mask, operands[4], operands[5])); + else + emit_insn (complimentary_comparison (mask, operands[5], operands[4])); + break; + case UNLT: + case UNLE: + case UNGT: + case UNGE: + case NE: + /* FCM returns false for lanes which are unordered, so if we use + the inverse of the comparison we actually want to emit, then + swap the operands to BSL, we will end up with the correct result. + Note that a NE NaN and NaN NE b are true for all a, b. + + Our transformations are: + a GE b -> !(b GT a) + a GT b -> !(b GE a) + a LE b -> !(a GT b) + a LT b -> !(a GE b) + a NE b -> !(a EQ b) */ + + if (inverse) + emit_insn (base_comparison (mask, operands[4], operands[5])); + else + emit_insn (complimentary_comparison (mask, operands[5], operands[4])); + + swap_bsl_operands = 1; + break; + case UNEQ: + /* We check (a > b || b > a). combining these comparisons give us + true iff !(a != b && a ORDERED b), swapping the operands to BSL + will then give us (a == b || a UNORDERED b) as intended. */ + + emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5])); + emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4])); + emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp)); + swap_bsl_operands = 1; + break; + case UNORDERED: + /* Operands are ORDERED iff (a > b || b >= a). + Swapping the operands to BSL will give the UNORDERED case. */ + swap_bsl_operands = 1; + /* Fall through. */ + case ORDERED: + emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5])); + emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4])); + emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp)); + break; + default: + gcc_unreachable (); + } + + if (swap_bsl_operands) + { + op1 = operands[2]; + op2 = operands[1]; + } + + /* If we have (a = (b CMP c) ? -1 : 0); + Then we can simply move the generated mask. */ + + if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode) + && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode)) + emit_move_insn (operands[0], mask); + else + { + if (!REG_P (op1)) + op1 = force_reg (<VDQF_COND:MODE>mode, op1); + if (!REG_P (op2)) + op2 = force_reg (<VDQF_COND:MODE>mode, op2); + emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask, + op1, op2)); + } + + DONE; +}) + +(define_expand "vcond<mode><mode>" + [(set (match_operand:VALL 0 "register_operand") + (if_then_else:VALL + (match_operator 3 "comparison_operator" + [(match_operand:VALL 4 "register_operand") + (match_operand:VALL 5 "nonmemory_operand")]) + (match_operand:VALL 1 "nonmemory_operand") + (match_operand:VALL 2 "nonmemory_operand")))] + "TARGET_SIMD" +{ + emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])); + DONE; +}) + +(define_expand "vcond<v_cmp_result><mode>" + [(set (match_operand:<V_cmp_result> 0 "register_operand") + (if_then_else:<V_cmp_result> + (match_operator 3 "comparison_operator" + [(match_operand:VDQF 4 "register_operand") + (match_operand:VDQF 5 "nonmemory_operand")]) + (match_operand:<V_cmp_result> 1 "nonmemory_operand") + (match_operand:<V_cmp_result> 2 "nonmemory_operand")))] + "TARGET_SIMD" +{ + emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> ( + operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])); + DONE; +}) + +(define_expand "vcondu<mode><mode>" + [(set (match_operand:VDQ 0 "register_operand") + (if_then_else:VDQ + (match_operator 3 "comparison_operator" + [(match_operand:VDQ 4 "register_operand") + (match_operand:VDQ 5 "nonmemory_operand")]) + (match_operand:VDQ 1 "nonmemory_operand") + (match_operand:VDQ 2 "nonmemory_operand")))] + "TARGET_SIMD" +{ + emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])); + DONE; +}) + +;; Patterns for AArch64 SIMD Intrinsics. + +(define_expand "aarch64_create<mode>" + [(match_operand:VD_RE 0 "register_operand" "") + (match_operand:DI 1 "general_operand" "")] + "TARGET_SIMD" +{ + rtx src = gen_lowpart (<MODE>mode, operands[1]); + emit_move_insn (operands[0], src); + DONE; +}) + +;; Lane extraction with sign extension to general purpose register. +(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>" + [(set (match_operand:GPI 0 "register_operand" "=r") + (sign_extend:GPI + (vec_select:<VEL> + (match_operand:VDQQH 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"; + } + [(set_attr "type" "neon_to_gp<q>")] +) + +(define_insn "*aarch64_get_lane_zero_extendsi<mode>" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI + (vec_select:<VEL> + (match_operand:VDQQH 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + return "umov\\t%w0, %1.<Vetype>[%2]"; + } + [(set_attr "type" "neon_to_gp<q>")] +) + +(define_expand "aarch64_be_checked_get_lane<mode>" + [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand") + (match_operand:VALL 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + emit_insn (gen_aarch64_get_lane<mode> (operands[0], + operands[1], + operands[2])); + DONE; + } +) + +;; Lane extraction of a value, neither sign nor zero extension +;; is guaranteed so upper bits should be considered undefined. +(define_insn "aarch64_get_lane<mode>" + [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") + (vec_select:<VEL> + (match_operand:VALL 1 "register_operand" "w, w, w") + (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); + switch (which_alternative) + { + case 0: + return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; + case 1: + return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; + case 2: + return "st1\\t{%1.<Vetype>}[%2], %0"; + default: + gcc_unreachable (); + } + } + [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")] +) + +(define_expand "aarch64_get_lanedi" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[2], 0, 1); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv8qi<mode>" + [(match_operand:V8QI 0 "register_operand" "") + (match_operand:VDC 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv4hi<mode>" + [(match_operand:V4HI 0 "register_operand" "") + (match_operand:VDC 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv2si<mode>" + [(match_operand:V2SI 0 "register_operand" "") + (match_operand:VDC 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv2sf<mode>" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:VDC 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretdi<mode>" + [(match_operand:DI 0 "register_operand" "") + (match_operand:VD_RE 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv16qi<mode>" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv8hi<mode>" + [(match_operand:V8HI 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv4si<mode>" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv4sf<mode>" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv2di<mode>" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv2df<mode>" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +;; In this insn, operand 1 should be low, and operand 2 the high part of the +;; dest vector. + +(define_insn "*aarch64_combinez<mode>" + [(set (match_operand:<VDBL> 0 "register_operand" "=&w") + (vec_concat:<VDBL> + (match_operand:VDIC 1 "register_operand" "w") + (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))] + "TARGET_SIMD" + "mov\\t%0.8b, %1.8b" + [(set_attr "type" "neon_move<q>")] +) + +(define_insn_and_split "aarch64_combine<mode>" + [(set (match_operand:<VDBL> 0 "register_operand" "=&w") + (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") + (match_operand:VDC 2 "register_operand" "w")))] + "TARGET_SIMD" + "#" + "&& reload_completed" + [(const_int 0)] +{ + aarch64_split_simd_combine (operands[0], operands[1], operands[2]); + DONE; +} +[(set_attr "type" "multiple")] +) + +(define_expand "aarch64_simd_combine<mode>" + [(set (match_operand:<VDBL> 0 "register_operand" "=&w") + (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") + (match_operand:VDC 2 "register_operand" "w")))] + "TARGET_SIMD" + { + emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1])); + emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2])); + DONE; + } +[(set_attr "type" "multiple")] +) + +;; <su><addsub>l<q>. + +(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_dup 3)))))] + "TARGET_SIMD" + "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_<ADDSUB:optab>_long")] +) + +(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_dup 3)))))] + "TARGET_SIMD" + "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" + [(set_attr "type" "neon_<ADDSUB:optab>_long")] +) + + +(define_expand "aarch64_saddl2<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "aarch64_uaddl2<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "aarch64_ssubl2<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "aarch64_usubl2<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> + (match_operand:VDW 1 "register_operand" "w")) + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 2 "register_operand" "w"))))] + "TARGET_SIMD" + "<ANY_EXTEND:su><ADDSUB:optab>l %0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_<ADDSUB:optab>_long")] +) + +;; <su><addsub>w<q>. + +(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 2 "register_operand" "w"))))] + "TARGET_SIMD" + "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" + [(set_attr "type" "neon_<ADDSUB:optab>_widen")] +) + +(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") + (ANY_EXTEND:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))] + "TARGET_SIMD" + "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" + [(set_attr "type" "neon_<ADDSUB:optab>_widen")] +) + +(define_expand "aarch64_saddw2<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "aarch64_uaddw2<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + + +(define_expand "aarch64_ssubw2<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "aarch64_usubw2<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +;; <su><r>h<addsub>. + +(define_insn "aarch64_<sur>h<addsub><mode>" + [(set (match_operand:VQ_S 0 "register_operand" "=w") + (unspec:VQ_S [(match_operand:VQ_S 1 "register_operand" "w") + (match_operand:VQ_S 2 "register_operand" "w")] + HADDSUB))] + "TARGET_SIMD" + "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_<addsub>_halve<q>")] +) + +;; <r><addsub>hn<q>. + +(define_insn "aarch64_<sur><addsub>hn<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") + (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w") + (match_operand:VQN 2 "register_operand" "w")] + ADDSUBHN))] + "TARGET_SIMD" + "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_<addsub>_halve_narrow_q")] +) + +(define_insn "aarch64_<sur><addsub>hn2<mode>" + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") + (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0") + (match_operand:VQN 2 "register_operand" "w") + (match_operand:VQN 3 "register_operand" "w")] + ADDSUBHN2))] + "TARGET_SIMD" + "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>" + [(set_attr "type" "neon_<addsub>_halve_narrow_q")] +) + +;; pmul. + +(define_insn "aarch64_pmul<mode>" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB [(match_operand:VB 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w")] + UNSPEC_PMUL))] + "TARGET_SIMD" + "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_mul_<Vetype><q>")] +) + +;; <su>q<addsub> + +(define_insn "aarch64_<su_optab><optab><mode>" + [(set (match_operand:VSDQ_I 0 "register_operand" "=w") + (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") + (match_operand:VSDQ_I 2 "register_operand" "w")))] + "TARGET_SIMD" + "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" + [(set_attr "type" "neon_<optab><q>")] +) + +;; suqadd and usqadd + +(define_insn "aarch64_<sur>qadd<mode>" + [(set (match_operand:VSDQ_I 0 "register_operand" "=w") + (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0") + (match_operand:VSDQ_I 2 "register_operand" "w")] + USSUQADD))] + "TARGET_SIMD" + "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>" + [(set_attr "type" "neon_qadd<q>")] +) + +;; sqmovun + +(define_insn "aarch64_sqmovun<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") + (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] + UNSPEC_SQXTUN))] + "TARGET_SIMD" + "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] + ) + +;; sqmovn and uqmovn + +(define_insn "aarch64_<sur>qmovn<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") + (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] + SUQMOVN))] + "TARGET_SIMD" + "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] + ) + +;; <su>q<absneg> + +(define_insn "aarch64_s<optab><mode>" + [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w") + (UNQOPS:VSDQ_I_BHSI + (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))] + "TARGET_SIMD" + "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>" + [(set_attr "type" "neon_<optab><q>")] +) + +;; sq<r>dmulh. + +(define_insn "aarch64_sq<r>dmulh<mode>" + [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") + (unspec:VSDQ_HSI + [(match_operand:VSDQ_HSI 1 "register_operand" "w") + (match_operand:VSDQ_HSI 2 "register_operand" "w")] + VQDMULH))] + "TARGET_SIMD" + "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" + [(set_attr "type" "neon_sat_mul_<Vetype><q>")] +) + +;; sq<r>dmulh_lane + +(define_insn "aarch64_sq<r>dmulh_lane<mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (unspec:VDQHS + [(match_operand:VDQHS 1 "register_operand" "w") + (vec_select:<VEL> + (match_operand:<VCOND> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] + VQDMULH))] + "TARGET_SIMD" + "* + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] +) + +(define_insn "aarch64_sq<r>dmulh_laneq<mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (unspec:VDQHS + [(match_operand:VDQHS 1 "register_operand" "w") + (vec_select:<VEL> + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] + VQDMULH))] + "TARGET_SIMD" + "* + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] +) + +(define_insn "aarch64_sq<r>dmulh_lane<mode>" + [(set (match_operand:SD_HSI 0 "register_operand" "=w") + (unspec:SD_HSI + [(match_operand:SD_HSI 1 "register_operand" "w") + (vec_select:<VEL> + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] + VQDMULH))] + "TARGET_SIMD" + "* + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] +) + +;; vqdml[sa]l + +(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VSD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (match_operand:VSD_HSI 3 "register_operand" "w"))) + (const_int 1))))] + "TARGET_SIMD" + "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" + [(set_attr "type" "neon_sat_mla_<Vetype>_long")] +) + +;; vqdml[sa]l_lane + +(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_duplicate:VD_HSI + (vec_select:<VEL> + (match_operand:<VCON> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + )) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + return + "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:SD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_select:<VEL> + (match_operand:<VCON> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + ) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + return + "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_expand "aarch64_sqdmlal_lane<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "0") + (match_operand:VSD_HSI 2 "register_operand" "w") + (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2); + emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + DONE; +}) + +(define_expand "aarch64_sqdmlal_laneq<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "0") + (match_operand:VSD_HSI 2 "register_operand" "w") + (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode)); + emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + DONE; +}) + +(define_expand "aarch64_sqdmlsl_lane<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "0") + (match_operand:VSD_HSI 2 "register_operand" "w") + (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2); + emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + DONE; +}) + +(define_expand "aarch64_sqdmlsl_laneq<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "0") + (match_operand:VSD_HSI 2 "register_operand" "w") + (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode)); + emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + DONE; +}) + +;; vqdml[sa]l_n + +(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_duplicate:VD_HSI + (match_operand:<VEL> 3 "register_operand" "<vwx>")))) + (const_int 1))))] + "TARGET_SIMD" + "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +;; sqdml[as]l2 + +(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 3 "register_operand" "w") + (match_dup 4)))) + (const_int 1))))] + "TARGET_SIMD" + "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_expand "aarch64_sqdmlal2<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1], + operands[2], operands[3], p)); + DONE; +}) + +(define_expand "aarch64_sqdmlsl2<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1], + operands[2], operands[3], p)); + DONE; +}) + +;; vqdml[sa]l2_lane + +(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_duplicate:<VHALF> + (vec_select:<VEL> + (match_operand:<VCON> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]) + )))) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + return + "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_expand "aarch64_sqdmlal2_lane<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2); + emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4], p)); + DONE; +}) + +(define_expand "aarch64_sqdmlal2_laneq<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode)); + emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4], p)); + DONE; +}) + +(define_expand "aarch64_sqdmlsl2_lane<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2); + emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4], p)); + DONE; +}) + +(define_expand "aarch64_sqdmlsl2_laneq<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode)); + emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4], p)); + DONE; +}) + +(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_duplicate:<VHALF> + (match_operand:<VEL> 3 "register_operand" "<vwx>")))) + (const_int 1))))] + "TARGET_SIMD" + "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_expand "aarch64_sqdmlal2_n<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:<VEL> 3 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + p)); + DONE; +}) + +(define_expand "aarch64_sqdmlsl2_n<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:<VWIDE> 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:<VEL> 3 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + p)); + DONE; +}) + +;; vqdmull + +(define_insn "aarch64_sqdmull<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VSD_HSI 1 "register_operand" "w")) + (sign_extend:<VWIDE> + (match_operand:VSD_HSI 2 "register_operand" "w"))) + (const_int 1)))] + "TARGET_SIMD" + "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" + [(set_attr "type" "neon_sat_mul_<Vetype>_long")] +) + +;; vqdmull_lane + +(define_insn "aarch64_sqdmull_lane<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VD_HSI 1 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_duplicate:VD_HSI + (vec_select:<VEL> + (match_operand:<VCON> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmull_lane<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:SD_HSI 1 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_select:<VEL> + (match_operand:<VCON> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")])) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] +) + +(define_expand "aarch64_sqdmull_lane<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VSD_HSI 1 "register_operand" "w") + (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode) / 2); + emit_insn (gen_aarch64_sqdmull_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +(define_expand "aarch64_sqdmull_laneq<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VD_HSI 1 "register_operand" "w") + (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode)); + emit_insn (gen_aarch64_sqdmull_lane<mode>_internal + (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +;; vqdmull_n + +(define_insn "aarch64_sqdmull_n<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VD_HSI 1 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_duplicate:VD_HSI + (match_operand:<VEL> 2 "register_operand" "<vwx>"))) + ) + (const_int 1)))] + "TARGET_SIMD" + "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] +) + +;; vqdmull2 + + + +(define_insn "aarch64_sqdmull2<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_dup 3))) + ) + (const_int 1)))] + "TARGET_SIMD" + "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] +) + +(define_expand "aarch64_sqdmull2<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:<VCON> 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +;; vqdmull2_lane + +(define_insn "aarch64_sqdmull2_lane<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_duplicate:<VHALF> + (vec_select:<VEL> + (match_operand:<VCON> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] +) + +(define_expand "aarch64_sqdmull2_lane<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode) / 2); + emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + p)); + DONE; +}) + +(define_expand "aarch64_sqdmull2_laneq<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); + emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + p)); + DONE; +}) + +;; vqdmull2_n + +(define_insn "aarch64_sqdmull2_n<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_duplicate:<VHALF> + (match_operand:<VEL> 2 "register_operand" "<vwx>"))) + ) + (const_int 1)))] + "TARGET_SIMD" + "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] +) + +(define_expand "aarch64_sqdmull2_n<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:<VEL> 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +;; vshl + +(define_insn "aarch64_<sur>shl<mode>" + [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") + (unspec:VSDQ_I_DI + [(match_operand:VSDQ_I_DI 1 "register_operand" "w") + (match_operand:VSDQ_I_DI 2 "register_operand" "w")] + VSHL))] + "TARGET_SIMD" + "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; + [(set_attr "type" "neon_shift_reg<q>")] +) + + +;; vqshl + +(define_insn "aarch64_<sur>q<r>shl<mode>" + [(set (match_operand:VSDQ_I 0 "register_operand" "=w") + (unspec:VSDQ_I + [(match_operand:VSDQ_I 1 "register_operand" "w") + (match_operand:VSDQ_I 2 "register_operand" "w")] + VQSHL))] + "TARGET_SIMD" + "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; + [(set_attr "type" "neon_sat_shift_reg<q>")] +) + +;; vshll_n + +(define_insn "aarch64_<sur>shll_n<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (unspec:<VWIDE> [(match_operand:VDW 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VSHLL))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[2], 0, bit_width + 1); + if (INTVAL (operands[2]) == bit_width) + { + return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\"; + } + else { + return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\"; + }" + [(set_attr "type" "neon_shift_imm_long")] +) + +;; vshll_high_n + +(define_insn "aarch64_<sur>shll2_n<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VSHLL))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[2], 0, bit_width + 1); + if (INTVAL (operands[2]) == bit_width) + { + return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\"; + } + else { + return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\"; + }" + [(set_attr "type" "neon_shift_imm_long")] +) + +;; vrshr_n + +(define_insn "aarch64_<sur>shr_n<mode>" + [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") + (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VRSHR_N))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[2], 1, bit_width + 1); + return \"<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";" + [(set_attr "type" "neon_sat_shift_imm<q>")] +) + +;; v(r)sra_n + +(define_insn "aarch64_<sur>sra_n<mode>" + [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") + (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") + (match_operand:VSDQ_I_DI 2 "register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + VSRA))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[3], 1, bit_width + 1); + return \"<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";" + [(set_attr "type" "neon_shift_acc<q>")] +) + +;; vs<lr>i_n + +(define_insn "aarch64_<sur>s<lr>i_n<mode>" + [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") + (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") + (match_operand:VSDQ_I_DI 2 "register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + VSLRI))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[3], 1 - <VSLRI:offsetlr>, + bit_width - <VSLRI:offsetlr> + 1); + return \"s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";" + [(set_attr "type" "neon_shift_imm<q>")] +) + +;; vqshl(u) + +(define_insn "aarch64_<sur>qshl<u>_n<mode>" + [(set (match_operand:VSDQ_I 0 "register_operand" "=w") + (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VQSHL_N))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[2], 0, bit_width); + return \"<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";" + [(set_attr "type" "neon_sat_shift_imm<q>")] +) + + +;; vq(r)shr(u)n_n + +(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") + (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VQSHRN_N))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[2], 1, bit_width + 1); + return \"<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2\";" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] +) + + +;; cm(eq|ge|gt|lt|le) +;; Note, we have constraints for Dz and Z as different expanders +;; have different ideas of what should be passed to this pattern. + +(define_insn "aarch64_cm<optab><mode>" + [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w") + (neg:<V_cmp_result> + (COMPARISONS:<V_cmp_result> + (match_operand:VDQ 1 "register_operand" "w,w") + (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz") + )))] + "TARGET_SIMD" + "@ + cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> + cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0" + [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")] +) + +(define_insn_and_split "aarch64_cm<optab>di" + [(set (match_operand:DI 0 "register_operand" "=w,w,r") + (neg:DI + (COMPARISONS:DI + (match_operand:DI 1 "register_operand" "w,w,r") + (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") + ))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SIMD" + "@ + cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> + cm<optab>\t%d0, %d1, #0 + #" + "reload_completed + /* We need to prevent the split from + happening in the 'w' constraint cases. */ + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(const_int 0)] + { + enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); + rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); + rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); + emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); + DONE; + } + [(set_attr "type" "neon_compare, neon_compare_zero, multiple")] +) + +;; cm(hs|hi) + +(define_insn "aarch64_cm<optab><mode>" + [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") + (neg:<V_cmp_result> + (UCOMPARISONS:<V_cmp_result> + (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w") + )))] + "TARGET_SIMD" + "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" + [(set_attr "type" "neon_compare<q>")] +) + +(define_insn_and_split "aarch64_cm<optab>di" + [(set (match_operand:DI 0 "register_operand" "=w,r") + (neg:DI + (UCOMPARISONS:DI + (match_operand:DI 1 "register_operand" "w,r") + (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") + ))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SIMD" + "@ + cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> + #" + "reload_completed + /* We need to prevent the split from + happening in the 'w' constraint cases. */ + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(const_int 0)] + { + enum machine_mode mode = CCmode; + rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); + rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); + emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); + DONE; + } + [(set_attr "type" "neon_compare, neon_compare_zero")] +) + +;; cmtst + +(define_insn "aarch64_cmtst<mode>" + [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") + (neg:<V_cmp_result> + (ne:<V_cmp_result> + (and:VDQ + (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")) + (vec_duplicate:<V_cmp_result> (const_int 0)))))] + "TARGET_SIMD" + "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" + [(set_attr "type" "neon_tst<q>")] +) + +(define_insn_and_split "aarch64_cmtstdi" + [(set (match_operand:DI 0 "register_operand" "=w,r") + (neg:DI + (ne:DI + (and:DI + (match_operand:DI 1 "register_operand" "w,r") + (match_operand:DI 2 "register_operand" "w,r")) + (const_int 0)))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SIMD" + "@ + cmtst\t%d0, %d1, %d2 + #" + "reload_completed + /* We need to prevent the split from + happening in the 'w' constraint cases. */ + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(const_int 0)] + { + rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); + enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); + rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); + rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); + emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); + DONE; + } + [(set_attr "type" "neon_tst")] +) + +;; fcm(eq|ge|gt|le|lt) + +(define_insn "aarch64_cm<optab><mode>" + [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w") + (neg:<V_cmp_result> + (COMPARISONS:<V_cmp_result> + (match_operand:VALLF 1 "register_operand" "w,w") + (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz") + )))] + "TARGET_SIMD" + "@ + fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> + fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" + [(set_attr "type" "neon_fp_compare_<Vetype><q>")] +) + +;; fac(ge|gt) +;; Note we can also handle what would be fac(le|lt) by +;; generating fac(ge|gt). + +(define_insn "*aarch64_fac<optab><mode>" + [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") + (neg:<V_cmp_result> + (FAC_COMPARISONS:<V_cmp_result> + (abs:VALLF (match_operand:VALLF 1 "register_operand" "w")) + (abs:VALLF (match_operand:VALLF 2 "register_operand" "w")) + )))] + "TARGET_SIMD" + "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" + [(set_attr "type" "neon_fp_compare_<Vetype><q>")] +) + +;; addp + +(define_insn "aarch64_addp<mode>" + [(set (match_operand:VD_BHSI 0 "register_operand" "=w") + (unspec:VD_BHSI + [(match_operand:VD_BHSI 1 "register_operand" "w") + (match_operand:VD_BHSI 2 "register_operand" "w")] + UNSPEC_ADDP))] + "TARGET_SIMD" + "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" + [(set_attr "type" "neon_reduc_add<q>")] +) + +(define_insn "aarch64_addpdi" + [(set (match_operand:DI 0 "register_operand" "=w") + (unspec:DI + [(match_operand:V2DI 1 "register_operand" "w")] + UNSPEC_ADDP))] + "TARGET_SIMD" + "addp\t%d0, %1.2d" + [(set_attr "type" "neon_reduc_add")] +) + +;; sqrt + +(define_insn "sqrt<mode>2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fsqrt\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")] +) + +;; Patterns for vector struct loads and stores. + +(define_insn "vec_load_lanesoi<mode>" + [(set (match_operand:OI 0 "register_operand" "=w") + (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD2))] + "TARGET_SIMD" + "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" + [(set_attr "type" "neon_load2_2reg<q>")] +) + +(define_insn "vec_store_lanesoi<mode>" + [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:OI [(match_operand:OI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST2))] + "TARGET_SIMD" + "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" + [(set_attr "type" "neon_store2_2reg<q>")] +) + +(define_insn "vec_load_lanesci<mode>" + [(set (match_operand:CI 0 "register_operand" "=w") + (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD3))] + "TARGET_SIMD" + "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" + [(set_attr "type" "neon_load3_3reg<q>")] +) + +(define_insn "vec_store_lanesci<mode>" + [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:CI [(match_operand:CI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST3))] + "TARGET_SIMD" + "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" + [(set_attr "type" "neon_store3_3reg<q>")] +) + +(define_insn "vec_load_lanesxi<mode>" + [(set (match_operand:XI 0 "register_operand" "=w") + (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD4))] + "TARGET_SIMD" + "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" + [(set_attr "type" "neon_load4_4reg<q>")] +) + +(define_insn "vec_store_lanesxi<mode>" + [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:XI [(match_operand:XI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST4))] + "TARGET_SIMD" + "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" + [(set_attr "type" "neon_store4_4reg<q>")] +) + +;; Reload patterns for AdvSIMD register list operands. + +(define_expand "mov<mode>" + [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "") + (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" ""))] + "TARGET_SIMD" +{ + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (<MODE>mode, operands[1]); + } +}) + +(define_insn "*aarch64_mov<mode>" + [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") + (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] + "TARGET_SIMD + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[1], <MODE>mode))" + +{ + switch (which_alternative) + { + case 0: return "#"; + case 1: return "st1\\t{%S1.16b - %<Vendreg>1.16b}, %0"; + case 2: return "ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "neon_move,neon_store<nregs>_<nregs>reg_q,\ + neon_load<nregs>_<nregs>reg_q") + (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] +) + +(define_insn "aarch64_be_ld1<mode>" + [(set (match_operand:VALLDI 0 "register_operand" "=w") + (unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD1))] + "TARGET_SIMD" + "ld1\\t{%0<Vmtype>}, %1" + [(set_attr "type" "neon_load1_1reg<q>")] +) + +(define_insn "aarch64_be_st1<mode>" + [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")] + UNSPEC_ST1))] + "TARGET_SIMD" + "st1\\t{%1<Vmtype>}, %0" + [(set_attr "type" "neon_store1_1reg<q>")] +) + +(define_split + [(set (match_operand:OI 0 "register_operand" "") + (match_operand:OI 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[2], src[2]; + + dest[0] = gen_rtx_REG (TFmode, rdest); + src[0] = gen_rtx_REG (TFmode, rsrc); + dest[1] = gen_rtx_REG (TFmode, rdest + 1); + src[1] = gen_rtx_REG (TFmode, rsrc + 1); + + aarch64_simd_disambiguate_copy (operands, dest, src, 2); +}) + +(define_split + [(set (match_operand:CI 0 "register_operand" "") + (match_operand:CI 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[3], src[3]; + + dest[0] = gen_rtx_REG (TFmode, rdest); + src[0] = gen_rtx_REG (TFmode, rsrc); + dest[1] = gen_rtx_REG (TFmode, rdest + 1); + src[1] = gen_rtx_REG (TFmode, rsrc + 1); + dest[2] = gen_rtx_REG (TFmode, rdest + 2); + src[2] = gen_rtx_REG (TFmode, rsrc + 2); + + aarch64_simd_disambiguate_copy (operands, dest, src, 3); +}) + +(define_split + [(set (match_operand:XI 0 "register_operand" "") + (match_operand:XI 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 6) (match_dup 7))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[4], src[4]; + + dest[0] = gen_rtx_REG (TFmode, rdest); + src[0] = gen_rtx_REG (TFmode, rsrc); + dest[1] = gen_rtx_REG (TFmode, rdest + 1); + src[1] = gen_rtx_REG (TFmode, rsrc + 1); + dest[2] = gen_rtx_REG (TFmode, rdest + 2); + src[2] = gen_rtx_REG (TFmode, rsrc + 2); + dest[3] = gen_rtx_REG (TFmode, rdest + 3); + src[3] = gen_rtx_REG (TFmode, rsrc + 3); + + aarch64_simd_disambiguate_copy (operands, dest, src, 4); +}) + +(define_insn "aarch64_ld2<mode>_dreg" + [(set (match_operand:OI 0 "register_operand" "=w") + (subreg:OI + (vec_concat:<VRL2> + (vec_concat:<VDBL> + (unspec:VD [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD2) + (vec_duplicate:VD (const_int 0))) + (vec_concat:<VDBL> + (unspec:VD [(match_dup 1)] + UNSPEC_LD2) + (vec_duplicate:VD (const_int 0)))) 0))] + "TARGET_SIMD" + "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" + [(set_attr "type" "neon_load2_2reg<q>")] +) + +(define_insn "aarch64_ld2<mode>_dreg" + [(set (match_operand:OI 0 "register_operand" "=w") + (subreg:OI + (vec_concat:<VRL2> + (vec_concat:<VDBL> + (unspec:DX [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD2) + (const_int 0)) + (vec_concat:<VDBL> + (unspec:DX [(match_dup 1)] + UNSPEC_LD2) + (const_int 0))) 0))] + "TARGET_SIMD" + "ld1\\t{%S0.1d - %T0.1d}, %1" + [(set_attr "type" "neon_load1_2reg<q>")] +) + +(define_insn "aarch64_ld3<mode>_dreg" + [(set (match_operand:CI 0 "register_operand" "=w") + (subreg:CI + (vec_concat:<VRL3> + (vec_concat:<VRL2> + (vec_concat:<VDBL> + (unspec:VD [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD3) + (vec_duplicate:VD (const_int 0))) + (vec_concat:<VDBL> + (unspec:VD [(match_dup 1)] + UNSPEC_LD3) + (vec_duplicate:VD (const_int 0)))) + (vec_concat:<VDBL> + (unspec:VD [(match_dup 1)] + UNSPEC_LD3) + (vec_duplicate:VD (const_int 0)))) 0))] + "TARGET_SIMD" + "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" + [(set_attr "type" "neon_load3_3reg<q>")] +) + +(define_insn "aarch64_ld3<mode>_dreg" + [(set (match_operand:CI 0 "register_operand" "=w") + (subreg:CI + (vec_concat:<VRL3> + (vec_concat:<VRL2> + (vec_concat:<VDBL> + (unspec:DX [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD3) + (const_int 0)) + (vec_concat:<VDBL> + (unspec:DX [(match_dup 1)] + UNSPEC_LD3) + (const_int 0))) + (vec_concat:<VDBL> + (unspec:DX [(match_dup 1)] + UNSPEC_LD3) + (const_int 0))) 0))] + "TARGET_SIMD" + "ld1\\t{%S0.1d - %U0.1d}, %1" + [(set_attr "type" "neon_load1_3reg<q>")] +) + +(define_insn "aarch64_ld4<mode>_dreg" + [(set (match_operand:XI 0 "register_operand" "=w") + (subreg:XI + (vec_concat:<VRL4> + (vec_concat:<VRL2> + (vec_concat:<VDBL> + (unspec:VD [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD4) + (vec_duplicate:VD (const_int 0))) + (vec_concat:<VDBL> + (unspec:VD [(match_dup 1)] + UNSPEC_LD4) + (vec_duplicate:VD (const_int 0)))) + (vec_concat:<VRL2> + (vec_concat:<VDBL> + (unspec:VD [(match_dup 1)] + UNSPEC_LD4) + (vec_duplicate:VD (const_int 0))) + (vec_concat:<VDBL> + (unspec:VD [(match_dup 1)] + UNSPEC_LD4) + (vec_duplicate:VD (const_int 0))))) 0))] + "TARGET_SIMD" + "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" + [(set_attr "type" "neon_load4_4reg<q>")] +) + +(define_insn "aarch64_ld4<mode>_dreg" + [(set (match_operand:XI 0 "register_operand" "=w") + (subreg:XI + (vec_concat:<VRL4> + (vec_concat:<VRL2> + (vec_concat:<VDBL> + (unspec:DX [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD4) + (const_int 0)) + (vec_concat:<VDBL> + (unspec:DX [(match_dup 1)] + UNSPEC_LD4) + (const_int 0))) + (vec_concat:<VRL2> + (vec_concat:<VDBL> + (unspec:DX [(match_dup 1)] + UNSPEC_LD4) + (const_int 0)) + (vec_concat:<VDBL> + (unspec:DX [(match_dup 1)] + UNSPEC_LD4) + (const_int 0)))) 0))] + "TARGET_SIMD" + "ld1\\t{%S0.1d - %V0.1d}, %1" + [(set_attr "type" "neon_load1_4reg<q>")] +) + +(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>" + [(match_operand:VSTRUCT 0 "register_operand" "=w") + (match_operand:DI 1 "register_operand" "r") + (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" +{ + enum machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode; + rtx mem = gen_rtx_MEM (mode, operands[1]); + + emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem)); + DONE; +}) + +(define_expand "aarch64_ld1<VALL:mode>" + [(match_operand:VALL 0 "register_operand") + (match_operand:DI 1 "register_operand")] + "TARGET_SIMD" +{ + enum machine_mode mode = <VALL:MODE>mode; + rtx mem = gen_rtx_MEM (mode, operands[1]); + + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_be_ld1<VALL:mode> (operands[0], mem)); + else + emit_move_insn (operands[0], mem); + DONE; +}) + +(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>" + [(match_operand:VSTRUCT 0 "register_operand" "=w") + (match_operand:DI 1 "register_operand" "r") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" +{ + enum machine_mode mode = <VSTRUCT:MODE>mode; + rtx mem = gen_rtx_MEM (mode, operands[1]); + + emit_insn (gen_vec_load_lanes<VSTRUCT:mode><VQ:mode> (operands[0], mem)); + DONE; +}) + +;; Expanders for builtins to extract vector registers from large +;; opaque integer modes. + +;; D-register list. + +(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>" + [(match_operand:VDC 0 "register_operand" "=w") + (match_operand:VSTRUCT 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_SIMD" +{ + int part = INTVAL (operands[2]); + rtx temp = gen_reg_rtx (<VDC:VDBL>mode); + int offset = part * 16; + + emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset)); + emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp)); + DONE; +}) + +;; Q-register list. + +(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>" + [(match_operand:VQ 0 "register_operand" "=w") + (match_operand:VSTRUCT 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_SIMD" +{ + int part = INTVAL (operands[2]); + int offset = part * 16; + + emit_move_insn (operands[0], + gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset)); + DONE; +}) + +;; Permuted-store expanders for neon intrinsics. + +;; Permute instructions + +;; vec_perm support + +(define_expand "vec_perm_const<mode>" + [(match_operand:VALL 0 "register_operand") + (match_operand:VALL 1 "register_operand") + (match_operand:VALL 2 "register_operand") + (match_operand:<V_cmp_result> 3)] + "TARGET_SIMD" +{ + if (aarch64_expand_vec_perm_const (operands[0], operands[1], + operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "vec_perm<mode>" + [(match_operand:VB 0 "register_operand") + (match_operand:VB 1 "register_operand") + (match_operand:VB 2 "register_operand") + (match_operand:VB 3 "register_operand")] + "TARGET_SIMD && !BYTES_BIG_ENDIAN" +{ + aarch64_expand_vec_perm (operands[0], operands[1], + operands[2], operands[3]); + DONE; +}) + +(define_insn "aarch64_tbl1<mode>" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB [(match_operand:V16QI 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w")] + UNSPEC_TBL))] + "TARGET_SIMD" + "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>" + [(set_attr "type" "neon_tbl1<q>")] +) + +;; Two source registers. + +(define_insn "aarch64_tbl2v16qi" + [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI [(match_operand:OI 1 "register_operand" "w") + (match_operand:V16QI 2 "register_operand" "w")] + UNSPEC_TBL))] + "TARGET_SIMD" + "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b" + [(set_attr "type" "neon_tbl2_q")] +) + +(define_insn_and_split "aarch64_combinev16qi" + [(set (match_operand:OI 0 "register_operand" "=w") + (unspec:OI [(match_operand:V16QI 1 "register_operand" "w") + (match_operand:V16QI 2 "register_operand" "w")] + UNSPEC_CONCAT))] + "TARGET_SIMD" + "#" + "&& reload_completed" + [(const_int 0)] +{ + aarch64_split_combinev16qi (operands); + DONE; +} +[(set_attr "type" "multiple")] +) + +(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>" + [(set (match_operand:VALL 0 "register_operand" "=w") + (unspec:VALL [(match_operand:VALL 1 "register_operand" "w") + (match_operand:VALL 2 "register_operand" "w")] + PERMUTE))] + "TARGET_SIMD" + "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_permute<q>")] +) + +(define_insn "aarch64_st2<mode>_dreg" + [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:TI [(match_operand:OI 1 "register_operand" "w") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST2))] + "TARGET_SIMD" + "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" + [(set_attr "type" "neon_store2_2reg")] +) + +(define_insn "aarch64_st2<mode>_dreg" + [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:TI [(match_operand:OI 1 "register_operand" "w") + (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST2))] + "TARGET_SIMD" + "st1\\t{%S1.1d - %T1.1d}, %0" + [(set_attr "type" "neon_store1_2reg")] +) + +(define_insn "aarch64_st3<mode>_dreg" + [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:EI [(match_operand:CI 1 "register_operand" "w") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST3))] + "TARGET_SIMD" + "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" + [(set_attr "type" "neon_store3_3reg")] +) + +(define_insn "aarch64_st3<mode>_dreg" + [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:EI [(match_operand:CI 1 "register_operand" "w") + (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST3))] + "TARGET_SIMD" + "st1\\t{%S1.1d - %U1.1d}, %0" + [(set_attr "type" "neon_store1_3reg")] +) + +(define_insn "aarch64_st4<mode>_dreg" + [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:OI [(match_operand:XI 1 "register_operand" "w") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST4))] + "TARGET_SIMD" + "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" + [(set_attr "type" "neon_store4_4reg")] +) + +(define_insn "aarch64_st4<mode>_dreg" + [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:OI [(match_operand:XI 1 "register_operand" "w") + (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST4))] + "TARGET_SIMD" + "st1\\t{%S1.1d - %V1.1d}, %0" + [(set_attr "type" "neon_store1_4reg")] +) + +(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:VSTRUCT 1 "register_operand" "w") + (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" +{ + enum machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + + emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1])); + DONE; +}) + +(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:VSTRUCT 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" +{ + enum machine_mode mode = <VSTRUCT:MODE>mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + + emit_insn (gen_vec_store_lanes<VSTRUCT:mode><VQ:mode> (mem, operands[1])); + DONE; +}) + +(define_expand "aarch64_st1<VALL:mode>" + [(match_operand:DI 0 "register_operand") + (match_operand:VALL 1 "register_operand")] + "TARGET_SIMD" +{ + enum machine_mode mode = <VALL:MODE>mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_be_st1<VALL:mode> (mem, operands[1])); + else + emit_move_insn (mem, operands[1]); + DONE; +}) + +;; Expander for builtins to insert vector registers into large +;; opaque integer modes. + +;; Q-register list. We don't need a D-reg inserter as we zero +;; extend them in arm_neon.h and insert the resulting Q-regs. + +(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>" + [(match_operand:VSTRUCT 0 "register_operand" "+w") + (match_operand:VSTRUCT 1 "register_operand" "0") + (match_operand:VQ 2 "register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + int part = INTVAL (operands[3]); + int offset = part * 16; + + emit_move_insn (operands[0], operands[1]); + emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset), + operands[2]); + DONE; +}) + +;; Standard pattern name vec_init<mode>. + +(define_expand "vec_init<mode>" + [(match_operand:VALL 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SIMD" +{ + aarch64_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +(define_insn "*aarch64_simd_ld1r<mode>" + [(set (match_operand:VALLDI 0 "register_operand" "=w") + (vec_duplicate:VALLDI + (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))] + "TARGET_SIMD" + "ld1r\\t{%0.<Vtype>}, %1" + [(set_attr "type" "neon_load1_all_lanes")] +) + +(define_insn "aarch64_frecpe<mode>" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] + UNSPEC_FRECPE))] + "TARGET_SIMD" + "frecpe\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "type" "neon_fp_recpe_<Vetype><q>")] +) + +(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] + FRECP))] + "TARGET_SIMD" + "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1" + [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")] +) + +(define_insn "aarch64_frecps<mode>" + [(set (match_operand:VALLF 0 "register_operand" "=w") + (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w") + (match_operand:VALLF 2 "register_operand" "w")] + UNSPEC_FRECPS))] + "TARGET_SIMD" + "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" + [(set_attr "type" "neon_fp_recps_<Vetype><q>")] +) + +;; Standard pattern name vec_extract<mode>. + +(define_expand "vec_extract<mode>" + [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "") + (match_operand:VALL 1 "register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_SIMD" +{ + emit_insn + (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2])); + DONE; +}) + +;; aes + +(define_insn "aarch64_crypto_aes<aes_op>v16qi" + [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "register_operand" "w")] + CRYPTO_AES))] + "TARGET_SIMD && TARGET_CRYPTO" + "aes<aes_op>\\t%0.16b, %2.16b" + [(set_attr "type" "crypto_aes")] +) + +(define_insn "aarch64_crypto_aes<aesmc_op>v16qi" + [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")] + CRYPTO_AESMC))] + "TARGET_SIMD && TARGET_CRYPTO" + "aes<aesmc_op>\\t%0.16b, %1.16b" + [(set_attr "type" "crypto_aes")] +) + +;; sha1 + +(define_insn "aarch64_crypto_sha1hsi" + [(set (match_operand:SI 0 "register_operand" "=w") + (unspec:SI [(match_operand:SI 1 + "register_operand" "w")] + UNSPEC_SHA1H))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha1h\\t%s0, %s1" + [(set_attr "type" "crypto_sha1_fast")] +) + +(define_insn "aarch64_crypto_sha1su1v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w")] + UNSPEC_SHA1SU1))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha1su1\\t%0.4s, %2.4s" + [(set_attr "type" "crypto_sha1_fast")] +) + +(define_insn "aarch64_crypto_sha1<sha1_op>v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w")] + CRYPTO_SHA1))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha1<sha1_op>\\t%q0, %s2, %3.4s" + [(set_attr "type" "crypto_sha1_slow")] +) + +(define_insn "aarch64_crypto_sha1su0v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w")] + UNSPEC_SHA1SU0))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha1su0\\t%0.4s, %2.4s, %3.4s" + [(set_attr "type" "crypto_sha1_xor")] +) + +;; sha256 + +(define_insn "aarch64_crypto_sha256h<sha256_op>v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w")] + CRYPTO_SHA256))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha256h<sha256_op>\\t%q0, %q2, %3.4s" + [(set_attr "type" "crypto_sha256_slow")] +) + +(define_insn "aarch64_crypto_sha256su0v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w")] + UNSPEC_SHA256SU0))] + "TARGET_SIMD &&TARGET_CRYPTO" + "sha256su0\\t%0.4s, %2.4s" + [(set_attr "type" "crypto_sha256_fast")] +) + +(define_insn "aarch64_crypto_sha256su1v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w")] + UNSPEC_SHA256SU1))] + "TARGET_SIMD &&TARGET_CRYPTO" + "sha256su1\\t%0.4s, %2.4s, %3.4s" + [(set_attr "type" "crypto_sha256_slow")] +) + +;; pmull + +(define_insn "aarch64_crypto_pmulldi" + [(set (match_operand:TI 0 "register_operand" "=w") + (unspec:TI [(match_operand:DI 1 "register_operand" "w") + (match_operand:DI 2 "register_operand" "w")] + UNSPEC_PMULL))] + "TARGET_SIMD && TARGET_CRYPTO" + "pmull\\t%0.1q, %1.1d, %2.1d" + [(set_attr "type" "neon_mul_d_long")] +) + +(define_insn "aarch64_crypto_pmullv2di" + [(set (match_operand:TI 0 "register_operand" "=w") + (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") + (match_operand:V2DI 2 "register_operand" "w")] + UNSPEC_PMULL2))] + "TARGET_SIMD && TARGET_CRYPTO" + "pmull2\\t%0.1q, %1.2d, %2.2d" + [(set_attr "type" "neon_mul_d_long")] +) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-tune.md b/gcc-4.9/gcc/config/aarch64/aarch64-tune.md new file mode 100644 index 000000000..b7e40e0b5 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-tune.md @@ -0,0 +1,5 @@ +;; -*- buffer-read-only: t -*- +;; Generated automatically by gentune.sh from aarch64-cores.def +(define_attr "tune" + "cortexa53,cortexa15,cortexa57cortexa53" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c new file mode 100644 index 000000000..ebd58c007 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64.c @@ -0,0 +1,8544 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "insn-codes.h" +#include "rtl.h" +#include "insn-attr.h" +#include "tree.h" +#include "stringpool.h" +#include "stor-layout.h" +#include "calls.h" +#include "varasm.h" +#include "regs.h" +#include "df.h" +#include "hard-reg-set.h" +#include "output.h" +#include "expr.h" +#include "reload.h" +#include "toplev.h" +#include "target.h" +#include "target-def.h" +#include "targhooks.h" +#include "ggc.h" +#include "function.h" +#include "tm_p.h" +#include "recog.h" +#include "langhooks.h" +#include "diagnostic-core.h" +#include "pointer-set.h" +#include "hash-table.h" +#include "vec.h" +#include "basic-block.h" +#include "tree-ssa-alias.h" +#include "internal-fn.h" +#include "gimple-fold.h" +#include "tree-eh.h" +#include "gimple-expr.h" +#include "is-a.h" +#include "gimple.h" +#include "gimplify.h" +#include "optabs.h" +#include "dwarf2.h" +#include "cfgloop.h" +#include "tree-vectorizer.h" +#include "config/arm/aarch-cost-tables.h" + +/* Defined for convenience. */ +#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) + +/* Classifies an address. + + ADDRESS_REG_IMM + A simple base register plus immediate offset. + + ADDRESS_REG_WB + A base register indexed by immediate offset with writeback. + + ADDRESS_REG_REG + A base register indexed by (optionally scaled) register. + + ADDRESS_REG_UXTW + A base register indexed by (optionally scaled) zero-extended register. + + ADDRESS_REG_SXTW + A base register indexed by (optionally scaled) sign-extended register. + + ADDRESS_LO_SUM + A LO_SUM rtx with a base register and "LO12" symbol relocation. + + ADDRESS_SYMBOLIC: + A constant symbolic address, in pc-relative literal pool. */ + +enum aarch64_address_type { + ADDRESS_REG_IMM, + ADDRESS_REG_WB, + ADDRESS_REG_REG, + ADDRESS_REG_UXTW, + ADDRESS_REG_SXTW, + ADDRESS_LO_SUM, + ADDRESS_SYMBOLIC +}; + +struct aarch64_address_info { + enum aarch64_address_type type; + rtx base; + rtx offset; + int shift; + enum aarch64_symbol_type symbol_type; +}; + +struct simd_immediate_info +{ + rtx value; + int shift; + int element_width; + bool mvn; + bool msl; +}; + +/* The current code model. */ +enum aarch64_code_model aarch64_cmodel; + +#ifdef HAVE_AS_TLS +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS 1 +#endif + +static bool aarch64_lra_p (void); +static bool aarch64_composite_type_p (const_tree, enum machine_mode); +static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode, + const_tree, + enum machine_mode *, int *, + bool *); +static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; +static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; +static void aarch64_override_options_after_change (void); +static bool aarch64_vector_mode_supported_p (enum machine_mode); +static unsigned bit_count (unsigned HOST_WIDE_INT); +static bool aarch64_const_vec_all_same_int_p (rtx, + HOST_WIDE_INT, HOST_WIDE_INT); + +static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, + const unsigned char *sel); + +/* The processor for which instructions should be scheduled. */ +enum aarch64_processor aarch64_tune = cortexa53; + +/* The current tuning set. */ +const struct tune_params *aarch64_tune_params; + +/* Mask to specify which instructions we are allowed to generate. */ +unsigned long aarch64_isa_flags = 0; + +/* Mask to specify which instruction scheduling options should be used. */ +unsigned long aarch64_tune_flags = 0; + +/* Tuning parameters. */ + +#if HAVE_DESIGNATED_INITIALIZERS +#define NAMED_PARAM(NAME, VAL) .NAME = (VAL) +#else +#define NAMED_PARAM(NAME, VAL) (VAL) +#endif + +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif + +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif +static const struct cpu_addrcost_table generic_addrcost_table = +{ + NAMED_PARAM (pre_modify, 0), + NAMED_PARAM (post_modify, 0), + NAMED_PARAM (register_offset, 0), + NAMED_PARAM (register_extend, 0), + NAMED_PARAM (imm_offset, 0) +}; + +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif +static const struct cpu_regmove_cost generic_regmove_cost = +{ + NAMED_PARAM (GP2GP, 1), + NAMED_PARAM (GP2FP, 2), + NAMED_PARAM (FP2GP, 2), + /* We currently do not provide direct support for TFmode Q->Q move. + Therefore we need to raise the cost above 2 in order to have + reload handle the situation. */ + NAMED_PARAM (FP2FP, 4) +}; + +/* Generic costs for vector insn classes. */ +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif +static const struct cpu_vector_cost generic_vector_cost = +{ + NAMED_PARAM (scalar_stmt_cost, 1), + NAMED_PARAM (scalar_load_cost, 1), + NAMED_PARAM (scalar_store_cost, 1), + NAMED_PARAM (vec_stmt_cost, 1), + NAMED_PARAM (vec_to_scalar_cost, 1), + NAMED_PARAM (scalar_to_vec_cost, 1), + NAMED_PARAM (vec_align_load_cost, 1), + NAMED_PARAM (vec_unalign_load_cost, 1), + NAMED_PARAM (vec_unalign_store_cost, 1), + NAMED_PARAM (vec_store_cost, 1), + NAMED_PARAM (cond_taken_branch_cost, 3), + NAMED_PARAM (cond_not_taken_branch_cost, 1) +}; + +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif +static const struct tune_params generic_tunings = +{ + &cortexa57_extra_costs, + &generic_addrcost_table, + &generic_regmove_cost, + &generic_vector_cost, + NAMED_PARAM (memmov_cost, 4), + NAMED_PARAM (issue_rate, 2) +}; + +static const struct tune_params cortexa53_tunings = +{ + &cortexa53_extra_costs, + &generic_addrcost_table, + &generic_regmove_cost, + &generic_vector_cost, + NAMED_PARAM (memmov_cost, 4), + NAMED_PARAM (issue_rate, 2) +}; + +static const struct tune_params cortexa57_tunings = +{ + &cortexa57_extra_costs, + &generic_addrcost_table, + &generic_regmove_cost, + &generic_vector_cost, + NAMED_PARAM (memmov_cost, 4), + NAMED_PARAM (issue_rate, 3) +}; + +/* A processor implementing AArch64. */ +struct processor +{ + const char *const name; + enum aarch64_processor core; + const char *arch; + const unsigned long flags; + const struct tune_params *const tune; +}; + +/* Processor cores implementing AArch64. */ +static const struct processor all_cores[] = +{ +#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \ + {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings}, +#include "aarch64-cores.def" +#undef AARCH64_CORE + {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings}, + {NULL, aarch64_none, NULL, 0, NULL} +}; + +/* Architectures implementing AArch64. */ +static const struct processor all_architectures[] = +{ +#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \ + {NAME, CORE, #ARCH, FLAGS, NULL}, +#include "aarch64-arches.def" +#undef AARCH64_ARCH + {NULL, aarch64_none, NULL, 0, NULL} +}; + +/* Target specification. These are populated as commandline arguments + are processed, or NULL if not specified. */ +static const struct processor *selected_arch; +static const struct processor *selected_cpu; +static const struct processor *selected_tune; + +#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0) + +/* An ISA extension in the co-processor and main instruction set space. */ +struct aarch64_option_extension +{ + const char *const name; + const unsigned long flags_on; + const unsigned long flags_off; +}; + +/* ISA extensions in AArch64. */ +static const struct aarch64_option_extension all_extensions[] = +{ +#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \ + {NAME, FLAGS_ON, FLAGS_OFF}, +#include "aarch64-option-extensions.def" +#undef AARCH64_OPT_EXTENSION + {NULL, 0, 0} +}; + +/* Used to track the size of an address when generating a pre/post + increment address. */ +static enum machine_mode aarch64_memory_reference_mode; + +/* Used to force GTY into this file. */ +static GTY(()) int gty_dummy; + +/* A table of valid AArch64 "bitmask immediate" values for + logical instructions. */ + +#define AARCH64_NUM_BITMASKS 5334 +static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS]; + +/* Did we set flag_omit_frame_pointer just so + aarch64_frame_pointer_required would be called? */ +static bool faked_omit_frame_pointer; + +typedef enum aarch64_cond_code +{ + AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL, + AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT, + AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV +} +aarch64_cc; + +#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1)) + +/* The condition codes of the processor, and the inverse function. */ +static const char * const aarch64_condition_codes[] = +{ + "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" +}; + +/* Provide a mapping from gcc register numbers to dwarf register numbers. */ +unsigned +aarch64_dbx_register_number (unsigned regno) +{ + if (GP_REGNUM_P (regno)) + return AARCH64_DWARF_R0 + regno - R0_REGNUM; + else if (regno == SP_REGNUM) + return AARCH64_DWARF_SP; + else if (FP_REGNUM_P (regno)) + return AARCH64_DWARF_V0 + regno - V0_REGNUM; + + /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no + equivalent DWARF register. */ + return DWARF_FRAME_REGISTERS; +} + +/* Return TRUE if MODE is any of the large INT modes. */ +static bool +aarch64_vect_struct_mode_p (enum machine_mode mode) +{ + return mode == OImode || mode == CImode || mode == XImode; +} + +/* Return TRUE if MODE is any of the vector modes. */ +static bool +aarch64_vector_mode_p (enum machine_mode mode) +{ + return aarch64_vector_mode_supported_p (mode) + || aarch64_vect_struct_mode_p (mode); +} + +/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */ +static bool +aarch64_array_mode_supported_p (enum machine_mode mode, + unsigned HOST_WIDE_INT nelems) +{ + if (TARGET_SIMD + && AARCH64_VALID_SIMD_QREG_MODE (mode) + && (nelems >= 2 && nelems <= 4)) + return true; + + return false; +} + +/* Implement HARD_REGNO_NREGS. */ + +int +aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode) +{ + switch (aarch64_regno_regclass (regno)) + { + case FP_REGS: + case FP_LO_REGS: + return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG; + default: + return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + } + gcc_unreachable (); +} + +/* Implement HARD_REGNO_MODE_OK. */ + +int +aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode) +{ + if (GET_MODE_CLASS (mode) == MODE_CC) + return regno == CC_REGNUM; + + if (regno == SP_REGNUM) + /* The purpose of comparing with ptr_mode is to support the + global register variable associated with the stack pointer + register via the syntax of asm ("wsp") in ILP32. */ + return mode == Pmode || mode == ptr_mode; + + if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM) + return mode == Pmode; + + if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode)) + return 1; + + if (FP_REGNUM_P (regno)) + { + if (aarch64_vect_struct_mode_p (mode)) + return + (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM; + else + return 1; + } + + return 0; +} + +/* Return true if calls to DECL should be treated as + long-calls (ie called via a register). */ +static bool +aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED) +{ + return false; +} + +/* Return true if calls to symbol-ref SYM should be treated as + long-calls (ie called via a register). */ +bool +aarch64_is_long_call_p (rtx sym) +{ + return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym)); +} + +/* Return true if the offsets to a zero/sign-extract operation + represent an expression that matches an extend operation. The + operands represent the paramters from + + (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */ +bool +aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm, + rtx extract_imm) +{ + HOST_WIDE_INT mult_val, extract_val; + + if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm)) + return false; + + mult_val = INTVAL (mult_imm); + extract_val = INTVAL (extract_imm); + + if (extract_val > 8 + && extract_val < GET_MODE_BITSIZE (mode) + && exact_log2 (extract_val & ~7) > 0 + && (extract_val & 7) <= 4 + && mult_val == (1 << (extract_val & 7))) + return true; + + return false; +} + +/* Emit an insn that's a simple single-set. Both the operands must be + known to be valid. */ +inline static rtx +emit_set_insn (rtx x, rtx y) +{ + return emit_insn (gen_rtx_SET (VOIDmode, x, y)); +} + +/* X and Y are two things to compare using CODE. Emit the compare insn and + return the rtx for register 0 in the proper mode. */ +rtx +aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) +{ + enum machine_mode mode = SELECT_CC_MODE (code, x, y); + rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); + + emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); + return cc_reg; +} + +/* Build the SYMBOL_REF for __tls_get_addr. */ + +static GTY(()) rtx tls_get_addr_libfunc; + +rtx +aarch64_tls_get_addr (void) +{ + if (!tls_get_addr_libfunc) + tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); + return tls_get_addr_libfunc; +} + +/* Return the TLS model to use for ADDR. */ + +static enum tls_model +tls_symbolic_operand_type (rtx addr) +{ + enum tls_model tls_kind = TLS_MODEL_NONE; + rtx sym, addend; + + if (GET_CODE (addr) == CONST) + { + split_const (addr, &sym, &addend); + if (GET_CODE (sym) == SYMBOL_REF) + tls_kind = SYMBOL_REF_TLS_MODEL (sym); + } + else if (GET_CODE (addr) == SYMBOL_REF) + tls_kind = SYMBOL_REF_TLS_MODEL (addr); + + return tls_kind; +} + +/* We'll allow lo_sum's in addresses in our legitimate addresses + so that combine would take care of combining addresses where + necessary, but for generation purposes, we'll generate the address + as : + RTL Absolute + tmp = hi (symbol_ref); adrp x1, foo + dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo + nop + + PIC TLS + adrp x1, :got:foo adrp tmp, :tlsgd:foo + ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo + bl __tls_get_addr + nop + + Load TLS symbol, depending on TLS mechanism and TLS access model. + + Global Dynamic - Traditional TLS: + adrp tmp, :tlsgd:imm + add dest, tmp, #:tlsgd_lo12:imm + bl __tls_get_addr + + Global Dynamic - TLS Descriptors: + adrp dest, :tlsdesc:imm + ldr tmp, [dest, #:tlsdesc_lo12:imm] + add dest, dest, #:tlsdesc_lo12:imm + blr tmp + mrs tp, tpidr_el0 + add dest, dest, tp + + Initial Exec: + mrs tp, tpidr_el0 + adrp tmp, :gottprel:imm + ldr dest, [tmp, #:gottprel_lo12:imm] + add dest, dest, tp + + Local Exec: + mrs tp, tpidr_el0 + add t0, tp, #:tprel_hi12:imm + add t0, #:tprel_lo12_nc:imm +*/ + +static void +aarch64_load_symref_appropriately (rtx dest, rtx imm, + enum aarch64_symbol_type type) +{ + switch (type) + { + case SYMBOL_SMALL_ABSOLUTE: + { + /* In ILP32, the mode of dest can be either SImode or DImode. */ + rtx tmp_reg = dest; + enum machine_mode mode = GET_MODE (dest); + + gcc_assert (mode == Pmode || mode == ptr_mode); + + if (can_create_pseudo_p ()) + tmp_reg = gen_reg_rtx (mode); + + emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm)); + emit_insn (gen_add_losym (dest, tmp_reg, imm)); + return; + } + + case SYMBOL_TINY_ABSOLUTE: + emit_insn (gen_rtx_SET (Pmode, dest, imm)); + return; + + case SYMBOL_SMALL_GOT: + { + /* In ILP32, the mode of dest can be either SImode or DImode, + while the got entry is always of SImode size. The mode of + dest depends on how dest is used: if dest is assigned to a + pointer (e.g. in the memory), it has SImode; it may have + DImode if dest is dereferenced to access the memeory. + This is why we have to handle three different ldr_got_small + patterns here (two patterns for ILP32). */ + rtx tmp_reg = dest; + enum machine_mode mode = GET_MODE (dest); + + if (can_create_pseudo_p ()) + tmp_reg = gen_reg_rtx (mode); + + emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm)); + if (mode == ptr_mode) + { + if (mode == DImode) + emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm)); + else + emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm)); + } + else + { + gcc_assert (mode == Pmode); + emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm)); + } + + return; + } + + case SYMBOL_SMALL_TLSGD: + { + rtx insns; + rtx result = gen_rtx_REG (Pmode, R0_REGNUM); + + start_sequence (); + emit_call_insn (gen_tlsgd_small (result, imm)); + insns = get_insns (); + end_sequence (); + + RTL_CONST_CALL_P (insns) = 1; + emit_libcall_block (insns, dest, result, imm); + return; + } + + case SYMBOL_SMALL_TLSDESC: + { + rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM); + rtx tp; + + emit_insn (gen_tlsdesc_small (imm)); + tp = aarch64_load_tp (NULL); + emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0))); + set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); + return; + } + + case SYMBOL_SMALL_GOTTPREL: + { + rtx tmp_reg = gen_reg_rtx (Pmode); + rtx tp = aarch64_load_tp (NULL); + emit_insn (gen_tlsie_small (tmp_reg, imm)); + emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg))); + set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); + return; + } + + case SYMBOL_SMALL_TPREL: + { + rtx tp = aarch64_load_tp (NULL); + emit_insn (gen_tlsle_small (dest, tp, imm)); + set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); + return; + } + + case SYMBOL_TINY_GOT: + emit_insn (gen_ldr_got_tiny (dest, imm)); + return; + + default: + gcc_unreachable (); + } +} + +/* Emit a move from SRC to DEST. Assume that the move expanders can + handle all moves if !can_create_pseudo_p (). The distinction is + important because, unlike emit_move_insn, the move expanders know + how to force Pmode objects into the constant pool even when the + constant pool address is not itself legitimate. */ +static rtx +aarch64_emit_move (rtx dest, rtx src) +{ + return (can_create_pseudo_p () + ? emit_move_insn (dest, src) + : emit_move_insn_1 (dest, src)); +} + +/* Split a 128-bit move operation into two 64-bit move operations, + taking care to handle partial overlap of register to register + copies. Special cases are needed when moving between GP regs and + FP regs. SRC can be a register, constant or memory; DST a register + or memory. If either operand is memory it must not have any side + effects. */ +void +aarch64_split_128bit_move (rtx dst, rtx src) +{ + rtx dst_lo, dst_hi; + rtx src_lo, src_hi; + + enum machine_mode mode = GET_MODE (dst); + + gcc_assert (mode == TImode || mode == TFmode); + gcc_assert (!(side_effects_p (src) || side_effects_p (dst))); + gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode); + + if (REG_P (dst) && REG_P (src)) + { + int src_regno = REGNO (src); + int dst_regno = REGNO (dst); + + /* Handle FP <-> GP regs. */ + if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno)) + { + src_lo = gen_lowpart (word_mode, src); + src_hi = gen_highpart (word_mode, src); + + if (mode == TImode) + { + emit_insn (gen_aarch64_movtilow_di (dst, src_lo)); + emit_insn (gen_aarch64_movtihigh_di (dst, src_hi)); + } + else + { + emit_insn (gen_aarch64_movtflow_di (dst, src_lo)); + emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi)); + } + return; + } + else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno)) + { + dst_lo = gen_lowpart (word_mode, dst); + dst_hi = gen_highpart (word_mode, dst); + + if (mode == TImode) + { + emit_insn (gen_aarch64_movdi_tilow (dst_lo, src)); + emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src)); + } + else + { + emit_insn (gen_aarch64_movdi_tflow (dst_lo, src)); + emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src)); + } + return; + } + } + + dst_lo = gen_lowpart (word_mode, dst); + dst_hi = gen_highpart (word_mode, dst); + src_lo = gen_lowpart (word_mode, src); + src_hi = gen_highpart_mode (word_mode, mode, src); + + /* At most one pairing may overlap. */ + if (reg_overlap_mentioned_p (dst_lo, src_hi)) + { + aarch64_emit_move (dst_hi, src_hi); + aarch64_emit_move (dst_lo, src_lo); + } + else + { + aarch64_emit_move (dst_lo, src_lo); + aarch64_emit_move (dst_hi, src_hi); + } +} + +bool +aarch64_split_128bit_move_p (rtx dst, rtx src) +{ + return (! REG_P (src) + || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src)))); +} + +/* Split a complex SIMD combine. */ + +void +aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2) +{ + enum machine_mode src_mode = GET_MODE (src1); + enum machine_mode dst_mode = GET_MODE (dst); + + gcc_assert (VECTOR_MODE_P (dst_mode)); + + if (REG_P (dst) && REG_P (src1) && REG_P (src2)) + { + rtx (*gen) (rtx, rtx, rtx); + + switch (src_mode) + { + case V8QImode: + gen = gen_aarch64_simd_combinev8qi; + break; + case V4HImode: + gen = gen_aarch64_simd_combinev4hi; + break; + case V2SImode: + gen = gen_aarch64_simd_combinev2si; + break; + case V2SFmode: + gen = gen_aarch64_simd_combinev2sf; + break; + case DImode: + gen = gen_aarch64_simd_combinedi; + break; + case DFmode: + gen = gen_aarch64_simd_combinedf; + break; + default: + gcc_unreachable (); + } + + emit_insn (gen (dst, src1, src2)); + return; + } +} + +/* Split a complex SIMD move. */ + +void +aarch64_split_simd_move (rtx dst, rtx src) +{ + enum machine_mode src_mode = GET_MODE (src); + enum machine_mode dst_mode = GET_MODE (dst); + + gcc_assert (VECTOR_MODE_P (dst_mode)); + + if (REG_P (dst) && REG_P (src)) + { + rtx (*gen) (rtx, rtx); + + gcc_assert (VECTOR_MODE_P (src_mode)); + + switch (src_mode) + { + case V16QImode: + gen = gen_aarch64_split_simd_movv16qi; + break; + case V8HImode: + gen = gen_aarch64_split_simd_movv8hi; + break; + case V4SImode: |