aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.8/gcc/config/aarch64
diff options
context:
space:
mode:
authorsynergydev <synergye@codefi.re>2013-10-17 18:16:42 -0700
committersynergydev <synergye@codefi.re>2013-10-17 18:16:42 -0700
commit61c0330cc243abf13fdd01f377a7f80bd3989eb1 (patch)
tree119b08ae76294f23e2b1b7e72ff9a06afa9e8509 /gcc-4.8/gcc/config/aarch64
parent1c712bf7621f3859c33fd3afaa61fdcaf3fdfd76 (diff)
downloadtoolchain_gcc-61c0330cc243abf13fdd01f377a7f80bd3989eb1.tar.gz
toolchain_gcc-61c0330cc243abf13fdd01f377a7f80bd3989eb1.tar.bz2
toolchain_gcc-61c0330cc243abf13fdd01f377a7f80bd3989eb1.zip
[4.8] Merge GCC 4.8.2
Change-Id: I0f1fcf69c5076d8534c5c45562745e1a37adb197
Diffstat (limited to 'gcc-4.8/gcc/config/aarch64')
-rw-r--r--gcc-4.8/gcc/config/aarch64/aarch64-builtins.c1
-rw-r--r--gcc-4.8/gcc/config/aarch64/aarch64-linux.h2
-rw-r--r--gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def7
-rw-r--r--gcc-4.8/gcc/config/aarch64/aarch64-simd.md22
-rw-r--r--gcc-4.8/gcc/config/aarch64/aarch64.c2
-rw-r--r--gcc-4.8/gcc/config/aarch64/aarch64.md17
-rw-r--r--gcc-4.8/gcc/config/aarch64/arm_neon.h809
7 files changed, 366 insertions, 494 deletions
diff --git a/gcc-4.8/gcc/config/aarch64/aarch64-builtins.c b/gcc-4.8/gcc/config/aarch64/aarch64-builtins.c
index 1ea55a83e..b2901dbca 100644
--- a/gcc-4.8/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc-4.8/gcc/config/aarch64/aarch64-builtins.c
@@ -1154,6 +1154,7 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
return aarch64_simd_expand_args (target, icode, 1, exp,
SIMD_ARG_COPY_TO_REG, SIMD_ARG_STOP);
+ case AARCH64_SIMD_STORE1:
case AARCH64_SIMD_STORESTRUCT:
return aarch64_simd_expand_args (target, icode, 0, exp,
SIMD_ARG_COPY_TO_REG,
diff --git a/gcc-4.8/gcc/config/aarch64/aarch64-linux.h b/gcc-4.8/gcc/config/aarch64/aarch64-linux.h
index e914ed27f..83efad447 100644
--- a/gcc-4.8/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc-4.8/gcc/config/aarch64/aarch64-linux.h
@@ -23,6 +23,8 @@
#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64.so.1"
+#define CPP_SPEC "%{pthread:-D_REENTRANT}"
+
#define LINUX_TARGET_LINK_SPEC "%{h*} \
%{static:-Bstatic} \
%{shared:-shared} \
diff --git a/gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def
index a6a5e12c7..955da265a 100644
--- a/gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -256,3 +256,10 @@
BUILTIN_VALL (BINOP, uzp2)
BUILTIN_VALL (BINOP, trn1)
BUILTIN_VALL (BINOP, trn2)
+
+ /* Implemented by aarch64_ld1<VALL:mode>. */
+ BUILTIN_VALL (LOAD1, ld1)
+
+ /* Implemented by aarch64_st1<VALL:mode>. */
+ BUILTIN_VALL (STORE1, st1)
+
diff --git a/gcc-4.8/gcc/config/aarch64/aarch64-simd.md b/gcc-4.8/gcc/config/aarch64/aarch64-simd.md
index 79c309331..00f3c3121 100644
--- a/gcc-4.8/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc-4.8/gcc/config/aarch64/aarch64-simd.md
@@ -3457,6 +3457,17 @@
DONE;
})
+(define_expand "aarch64_ld1<VALL:mode>"
+ [(match_operand:VALL 0 "register_operand")
+ (match_operand:DI 1 "register_operand")]
+ "TARGET_SIMD"
+{
+ enum machine_mode mode = <VALL:MODE>mode;
+ rtx mem = gen_rtx_MEM (mode, operands[1]);
+ emit_move_insn (operands[0], mem);
+ DONE;
+})
+
(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
[(match_operand:VSTRUCT 0 "register_operand" "=w")
(match_operand:DI 1 "register_operand" "r")
@@ -3673,6 +3684,17 @@
DONE;
})
+(define_expand "aarch64_st1<VALL:mode>"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:VALL 1 "register_operand")]
+ "TARGET_SIMD"
+{
+ enum machine_mode mode = <VALL:MODE>mode;
+ rtx mem = gen_rtx_MEM (mode, operands[0]);
+ emit_move_insn (mem, operands[1]);
+ DONE;
+})
+
;; Expander for builtins to insert vector registers into large
;; opaque integer modes.
diff --git a/gcc-4.8/gcc/config/aarch64/aarch64.c b/gcc-4.8/gcc/config/aarch64/aarch64.c
index 4166782b8..fa53c71df 100644
--- a/gcc-4.8/gcc/config/aarch64/aarch64.c
+++ b/gcc-4.8/gcc/config/aarch64/aarch64.c
@@ -3373,7 +3373,7 @@ aarch64_print_operand (FILE *f, rtx x, char code)
output_operand_lossage ("invalid operand for '%%%c'", code);
return;
}
- asm_fprintf (f, "0x%x", UINTVAL (x));
+ asm_fprintf (f, "0x%wx", UINTVAL (x));
break;
case 'w':
diff --git a/gcc-4.8/gcc/config/aarch64/aarch64.md b/gcc-4.8/gcc/config/aarch64/aarch64.md
index 37e9f05ce..04a5e01f9 100644
--- a/gcc-4.8/gcc/config/aarch64/aarch64.md
+++ b/gcc-4.8/gcc/config/aarch64/aarch64.md
@@ -834,7 +834,8 @@
movi\\t%d0, %1"
[(set_attr "v8type" "move,move,move,alu,load1,store1,adr,adr,fmov,fmov,fmov,fmov")
(set_attr "mode" "DI")
- (set_attr "fp" "*,*,*,*,*,*,*,*,yes,yes,yes,yes")]
+ (set_attr "fp" "*,*,*,*,*,*,*,*,yes,yes,yes,*")
+ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,yes")]
)
(define_insn "insv_imm<mode>"
@@ -1312,7 +1313,7 @@
)
(define_insn "*add_<shift>_<mode>"
- [(set (match_operand:GPI 0 "register_operand" "=rk")
+ [(set (match_operand:GPI 0 "register_operand" "=r")
(plus:GPI (ASHIFT:GPI (match_operand:GPI 1 "register_operand" "r")
(match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
(match_operand:GPI 3 "register_operand" "r")))]
@@ -1324,7 +1325,7 @@
;; zero_extend version of above
(define_insn "*add_<shift>_si_uxtw"
- [(set (match_operand:DI 0 "register_operand" "=rk")
+ [(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
(plus:SI (ASHIFT:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:QI 2 "aarch64_shift_imm_si" "n"))
@@ -1336,7 +1337,7 @@
)
(define_insn "*add_mul_imm_<mode>"
- [(set (match_operand:GPI 0 "register_operand" "=rk")
+ [(set (match_operand:GPI 0 "register_operand" "=r")
(plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
(match_operand:QI 2 "aarch64_pwr_2_<mode>" "n"))
(match_operand:GPI 3 "register_operand" "r")))]
@@ -1659,7 +1660,7 @@
)
(define_insn "*sub_<shift>_<mode>"
- [(set (match_operand:GPI 0 "register_operand" "=rk")
+ [(set (match_operand:GPI 0 "register_operand" "=r")
(minus:GPI (match_operand:GPI 3 "register_operand" "r")
(ASHIFT:GPI
(match_operand:GPI 1 "register_operand" "r")
@@ -1672,7 +1673,7 @@
;; zero_extend version of above
(define_insn "*sub_<shift>_si_uxtw"
- [(set (match_operand:DI 0 "register_operand" "=rk")
+ [(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
(minus:SI (match_operand:SI 3 "register_operand" "r")
(ASHIFT:SI
@@ -1685,7 +1686,7 @@
)
(define_insn "*sub_mul_imm_<mode>"
- [(set (match_operand:GPI 0 "register_operand" "=rk")
+ [(set (match_operand:GPI 0 "register_operand" "=r")
(minus:GPI (match_operand:GPI 3 "register_operand" "r")
(mult:GPI
(match_operand:GPI 1 "register_operand" "r")
@@ -1698,7 +1699,7 @@
;; zero_extend version of above
(define_insn "*sub_mul_imm_si_uxtw"
- [(set (match_operand:DI 0 "register_operand" "=rk")
+ [(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
(minus:SI (match_operand:SI 3 "register_operand" "r")
(mult:SI
diff --git a/gcc-4.8/gcc/config/aarch64/arm_neon.h b/gcc-4.8/gcc/config/aarch64/arm_neon.h
index 669217e27..2bb42af72 100644
--- a/gcc-4.8/gcc/config/aarch64/arm_neon.h
+++ b/gcc-4.8/gcc/config/aarch64/arm_neon.h
@@ -8518,28 +8518,6 @@ vld1_dup_u64 (const uint64_t * a)
return result;
}
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vld1_f32 (const float32_t * a)
-{
- float32x2_t result;
- __asm__ ("ld1 {%0.2s}, %1"
- : "=w"(result)
- : "Utv"(({const float32x2_t *_a = (float32x2_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-vld1_f64 (const float64_t * a)
-{
- float64x1_t result;
- __asm__ ("ld1 {%0.1d}, %1"
- : "=w"(result)
- : "Utv"(*a)
- : /* No clobbers */);
- return result;
-}
-
#define vld1_lane_f32(a, b, c) \
__extension__ \
({ \
@@ -8696,116 +8674,6 @@ vld1_f64 (const float64_t * a)
result; \
})
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vld1_p8 (const poly8_t * a)
-{
- poly8x8_t result;
- __asm__ ("ld1 {%0.8b}, %1"
- : "=w"(result)
- : "Utv"(({const poly8x8_t *_a = (poly8x8_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vld1_p16 (const poly16_t * a)
-{
- poly16x4_t result;
- __asm__ ("ld1 {%0.4h}, %1"
- : "=w"(result)
- : "Utv"(({const poly16x4_t *_a = (poly16x4_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vld1_s8 (const int8_t * a)
-{
- int8x8_t result;
- __asm__ ("ld1 {%0.8b}, %1"
- : "=w"(result)
- : "Utv"(({const int8x8_t *_a = (int8x8_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vld1_s16 (const int16_t * a)
-{
- int16x4_t result;
- __asm__ ("ld1 {%0.4h}, %1"
- : "=w"(result)
- : "Utv"(({const int16x4_t *_a = (int16x4_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vld1_s32 (const int32_t * a)
-{
- int32x2_t result;
- __asm__ ("ld1 {%0.2s}, %1"
- : "=w"(result)
- : "Utv"(({const int32x2_t *_a = (int32x2_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vld1_s64 (const int64_t * a)
-{
- int64x1_t result;
- __asm__ ("ld1 {%0.1d}, %1"
- : "=w"(result)
- : "Utv"(*a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vld1_u8 (const uint8_t * a)
-{
- uint8x8_t result;
- __asm__ ("ld1 {%0.8b}, %1"
- : "=w"(result)
- : "Utv"(({const uint8x8_t *_a = (uint8x8_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vld1_u16 (const uint16_t * a)
-{
- uint16x4_t result;
- __asm__ ("ld1 {%0.4h}, %1"
- : "=w"(result)
- : "Utv"(({const uint16x4_t *_a = (uint16x4_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vld1_u32 (const uint32_t * a)
-{
- uint32x2_t result;
- __asm__ ("ld1 {%0.2s}, %1"
- : "=w"(result)
- : "Utv"(({const uint32x2_t *_a = (uint32x2_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vld1_u64 (const uint64_t * a)
-{
- uint64x1_t result;
- __asm__ ("ld1 {%0.1d}, %1"
- : "=w"(result)
- : "Utv"(*a)
- : /* No clobbers */);
- return result;
-}
-
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_dup_f32 (const float32_t * a)
{
@@ -8938,28 +8806,6 @@ vld1q_dup_u64 (const uint64_t * a)
return result;
}
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vld1q_f32 (const float32_t * a)
-{
- float32x4_t result;
- __asm__ ("ld1 {%0.4s}, %1"
- : "=w"(result)
- : "Utv"(({const float32x4_t *_a = (float32x4_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vld1q_f64 (const float64_t * a)
-{
- float64x2_t result;
- __asm__ ("ld1 {%0.2d}, %1"
- : "=w"(result)
- : "Utv"(({const float64x2_t *_a = (float64x2_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
#define vld1q_lane_f32(a, b, c) \
__extension__ \
({ \
@@ -9116,116 +8962,6 @@ vld1q_f64 (const float64_t * a)
result; \
})
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vld1q_p8 (const poly8_t * a)
-{
- poly8x16_t result;
- __asm__ ("ld1 {%0.16b}, %1"
- : "=w"(result)
- : "Utv"(({const poly8x16_t *_a = (poly8x16_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vld1q_p16 (const poly16_t * a)
-{
- poly16x8_t result;
- __asm__ ("ld1 {%0.16b}, %1"
- : "=w"(result)
- : "Utv"(({const poly16x8_t *_a = (poly16x8_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vld1q_s8 (const int8_t * a)
-{
- int8x16_t result;
- __asm__ ("ld1 {%0.16b}, %1"
- : "=w"(result)
- : "Utv"(({const int8x16_t *_a = (int8x16_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vld1q_s16 (const int16_t * a)
-{
- int16x8_t result;
- __asm__ ("ld1 {%0.8h}, %1"
- : "=w"(result)
- : "Utv"(({const int16x8_t *_a = (int16x8_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vld1q_s32 (const int32_t * a)
-{
- int32x4_t result;
- __asm__ ("ld1 {%0.4s}, %1"
- : "=w"(result)
- : "Utv"(({const int32x4_t *_a = (int32x4_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vld1q_s64 (const int64_t * a)
-{
- int64x2_t result;
- __asm__ ("ld1 {%0.2d}, %1"
- : "=w"(result)
- : "Utv"(({const int64x2_t *_a = (int64x2_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vld1q_u8 (const uint8_t * a)
-{
- uint8x16_t result;
- __asm__ ("ld1 {%0.16b}, %1"
- : "=w"(result)
- : "Utv"(({const uint8x16_t *_a = (uint8x16_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vld1q_u16 (const uint16_t * a)
-{
- uint16x8_t result;
- __asm__ ("ld1 {%0.8h}, %1"
- : "=w"(result)
- : "Utv"(({const uint16x8_t *_a = (uint16x8_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vld1q_u32 (const uint32_t * a)
-{
- uint32x4_t result;
- __asm__ ("ld1 {%0.4s}, %1"
- : "=w"(result)
- : "Utv"(({const uint32x4_t *_a = (uint32x4_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vld1q_u64 (const uint64_t * a)
-{
- uint64x2_t result;
- __asm__ ("ld1 {%0.2d}, %1"
- : "=w"(result)
- : "Utv"(({const uint64x2_t *_a = (uint64x2_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmaxnm_f32 (float32x2_t a, float32x2_t b)
{
@@ -16285,24 +16021,6 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
result; \
})
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_f32 (float32_t * a, float32x2_t b)
-{
- __asm__ ("st1 {%1.2s},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_f64 (float64_t * a, float64x1_t b)
-{
- __asm__ ("st1 {%1.1d},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
#define vst1_lane_f32(a, b, c) \
__extension__ \
({ \
@@ -16435,113 +16153,6 @@ vst1_f64 (float64_t * a, float64x1_t b)
: "memory"); \
})
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_p8 (poly8_t * a, poly8x8_t b)
-{
- __asm__ ("st1 {%1.8b},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_p16 (poly16_t * a, poly16x4_t b)
-{
- __asm__ ("st1 {%1.4h},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_s8 (int8_t * a, int8x8_t b)
-{
- __asm__ ("st1 {%1.8b},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_s16 (int16_t * a, int16x4_t b)
-{
- __asm__ ("st1 {%1.4h},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_s32 (int32_t * a, int32x2_t b)
-{
- __asm__ ("st1 {%1.2s},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_s64 (int64_t * a, int64x1_t b)
-{
- __asm__ ("st1 {%1.1d},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_u8 (uint8_t * a, uint8x8_t b)
-{
- __asm__ ("st1 {%1.8b},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_u16 (uint16_t * a, uint16x4_t b)
-{
- __asm__ ("st1 {%1.4h},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_u32 (uint32_t * a, uint32x2_t b)
-{
- __asm__ ("st1 {%1.2s},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_u64 (uint64_t * a, uint64x1_t b)
-{
- __asm__ ("st1 {%1.1d},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_f32 (float32_t * a, float32x4_t b)
-{
- __asm__ ("st1 {%1.4s},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_f64 (float64_t * a, float64x2_t b)
-{
- __asm__ ("st1 {%1.2d},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
#define vst1q_lane_f32(a, b, c) \
__extension__ \
@@ -16675,96 +16286,6 @@ vst1q_f64 (float64_t * a, float64x2_t b)
: "memory"); \
})
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_p8 (poly8_t * a, poly8x16_t b)
-{
- __asm__ ("st1 {%1.16b},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_p16 (poly16_t * a, poly16x8_t b)
-{
- __asm__ ("st1 {%1.8h},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_s8 (int8_t * a, int8x16_t b)
-{
- __asm__ ("st1 {%1.16b},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_s16 (int16_t * a, int16x8_t b)
-{
- __asm__ ("st1 {%1.8h},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_s32 (int32_t * a, int32x4_t b)
-{
- __asm__ ("st1 {%1.4s},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_s64 (int64_t * a, int64x2_t b)
-{
- __asm__ ("st1 {%1.2d},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_u8 (uint8_t * a, uint8x16_t b)
-{
- __asm__ ("st1 {%1.16b},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_u16 (uint16_t * a, uint16x8_t b)
-{
- __asm__ ("st1 {%1.8h},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_u32 (uint32_t * a, uint32x4_t b)
-{
- __asm__ ("st1 {%1.4s},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_u64 (uint64_t * a, uint64x2_t b)
-{
- __asm__ ("st1 {%1.2d},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
{
@@ -19669,7 +19190,7 @@ vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx)
"cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {%2.16b}, %3.8b\n\t"
"bsl %0.8b, %4.8b, %1.8b\n\t"
- : "+w"(result), "=w"(tmp1)
+ : "+w"(result), "=&w"(tmp1)
: "w"(temp), "w"(idx), "w"(r)
: /* No clobbers */);
return result;
@@ -19685,7 +19206,7 @@ vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx)
"cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {%2.16b}, %3.8b\n\t"
"bsl %0.8b, %4.8b, %1.8b\n\t"
- : "+w"(result), "=w"(tmp1)
+ : "+w"(result), "=&w"(tmp1)
: "w"(temp), "w"(idx), "w"(r)
: /* No clobbers */);
return result;
@@ -19701,7 +19222,7 @@ vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx)
"cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {%2.16b}, %3.8b\n\t"
"bsl %0.8b, %4.8b, %1.8b\n\t"
- : "+w"(result), "=w"(tmp1)
+ : "+w"(result), "=&w"(tmp1)
: "w"(temp), "w"(idx), "w"(r)
: /* No clobbers */);
return result;
@@ -19756,7 +19277,7 @@ vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx)
"cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
"bsl %0.8b, %4.8b, %1.8b\n\t"
- : "+w"(result), "=w"(tmp1)
+ : "+w"(result), "=&w"(tmp1)
: "Q"(temp), "w"(idx), "w"(r)
: "v16", "v17", "memory");
return result;
@@ -19775,7 +19296,7 @@ vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx)
"cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
"bsl %0.8b, %4.8b, %1.8b\n\t"
- : "+w"(result), "=w"(tmp1)
+ : "+w"(result), "=&w"(tmp1)
: "Q"(temp), "w"(idx), "w"(r)
: "v16", "v17", "memory");
return result;
@@ -19794,7 +19315,7 @@ vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx)
"cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
"bsl %0.8b, %4.8b, %1.8b\n\t"
- : "+w"(result), "=w"(tmp1)
+ : "+w"(result), "=&w"(tmp1)
: "Q"(temp), "w"(idx), "w"(r)
: "v16", "v17", "memory");
return result;
@@ -20537,6 +20058,165 @@ vdupd_lane_u64 (uint64x2_t a, int const b)
return (uint64x1_t) __builtin_aarch64_dup_lanedi ((int64x2_t) a, b);
}
+/* vld1 */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vld1_f32 (const float32_t *a)
+{
+ return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vld1_f64 (const float64_t *a)
+{
+ return *a;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vld1_p8 (const poly8_t *a)
+{
+ return (poly8x8_t)
+ __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vld1_p16 (const poly16_t *a)
+{
+ return (poly16x4_t)
+ __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vld1_s8 (const int8_t *a)
+{
+ return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vld1_s16 (const int16_t *a)
+{
+ return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vld1_s32 (const int32_t *a)
+{
+ return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vld1_s64 (const int64_t *a)
+{
+ return *a;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vld1_u8 (const uint8_t *a)
+{
+ return (uint8x8_t)
+ __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vld1_u16 (const uint16_t *a)
+{
+ return (uint16x4_t)
+ __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vld1_u32 (const uint32_t *a)
+{
+ return (uint32x2_t)
+ __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vld1_u64 (const uint64_t *a)
+{
+ return *a;
+}
+
+/* vld1q */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vld1q_f32 (const float32_t *a)
+{
+ return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vld1q_f64 (const float64_t *a)
+{
+ return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vld1q_p8 (const poly8_t *a)
+{
+ return (poly8x16_t)
+ __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vld1q_p16 (const poly16_t *a)
+{
+ return (poly16x8_t)
+ __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vld1q_s8 (const int8_t *a)
+{
+ return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vld1q_s16 (const int16_t *a)
+{
+ return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vld1q_s32 (const int32_t *a)
+{
+ return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vld1q_s64 (const int64_t *a)
+{
+ return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vld1q_u8 (const uint8_t *a)
+{
+ return (uint8x16_t)
+ __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vld1q_u16 (const uint16_t *a)
+{
+ return (uint16x8_t)
+ __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vld1q_u32 (const uint32_t *a)
+{
+ return (uint32x4_t)
+ __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vld1q_u64 (const uint64_t *a)
+{
+ return (uint64x2_t)
+ __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
+}
+
/* vldn */
__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
@@ -24307,6 +23987,165 @@ vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
}
+/* vst1 */
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f32 (float32_t *a, float32x2_t b)
+{
+ __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f64 (float64_t *a, float64x1_t b)
+{
+ *a = b;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p8 (poly8_t *a, poly8x8_t b)
+{
+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
+ (int8x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p16 (poly16_t *a, poly16x4_t b)
+{
+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
+ (int16x4_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s8 (int8_t *a, int8x8_t b)
+{
+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s16 (int16_t *a, int16x4_t b)
+{
+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s32 (int32_t *a, int32x2_t b)
+{
+ __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s64 (int64_t *a, int64x1_t b)
+{
+ *a = b;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u8 (uint8_t *a, uint8x8_t b)
+{
+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
+ (int8x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u16 (uint16_t *a, uint16x4_t b)
+{
+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
+ (int16x4_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u32 (uint32_t *a, uint32x2_t b)
+{
+ __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
+ (int32x2_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u64 (uint64_t *a, uint64x1_t b)
+{
+ *a = b;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f32 (float32_t *a, float32x4_t b)
+{
+ __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f64 (float64_t *a, float64x2_t b)
+{
+ __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
+}
+
+/* vst1q */
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p8 (poly8_t *a, poly8x16_t b)
+{
+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
+ (int8x16_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p16 (poly16_t *a, poly16x8_t b)
+{
+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
+ (int16x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s8 (int8_t *a, int8x16_t b)
+{
+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s16 (int16_t *a, int16x8_t b)
+{
+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s32 (int32_t *a, int32x4_t b)
+{
+ __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s64 (int64_t *a, int64x2_t b)
+{
+ __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u8 (uint8_t *a, uint8x16_t b)
+{
+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
+ (int8x16_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u16 (uint16_t *a, uint16x8_t b)
+{
+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
+ (int16x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u32 (uint32_t *a, uint32x4_t b)
+{
+ __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
+ (int32x4_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u64 (uint64_t *a, uint64x2_t b)
+{
+ __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
+ (int64x2_t) b);
+}
+
/* vstn */
__extension__ static __inline void