/* ARM NEON intrinsics include file.
Copyright (C) 2011-2014 Free Software Foundation, Inc.
Contributed by ARM Ltd.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published
by the Free Software Foundation; either version 3, or (at your
option) any later version.
GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef _AARCH64_NEON_H_
#define _AARCH64_NEON_H_
#include <stdint.h>
#define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
#define __AARCH64_INT64_C(__C) ((int64_t) __C)
typedef __builtin_aarch64_simd_qi int8x8_t
__attribute__ ((__vector_size__ (8)));
typedef __builtin_aarch64_simd_hi int16x4_t
__attribute__ ((__vector_size__ (8)));
typedef __builtin_aarch64_simd_si int32x2_t
__attribute__ ((__vector_size__ (8)));
typedef int64_t int64x1_t;
typedef double float64x1_t;
typedef __builtin_aarch64_simd_sf float32x2_t
__attribute__ ((__vector_size__ (8)));
typedef __builtin_aarch64_simd_poly8 poly8x8_t
__attribute__ ((__vector_size__ (8)));
typedef __builtin_aarch64_simd_poly16 poly16x4_t
__attribute__ ((__vector_size__ (8)));
typedef __builtin_aarch64_simd_uqi uint8x8_t
__attribute__ ((__vector_size__ (8)));
typedef __builtin_aarch64_simd_uhi uint16x4_t
__attribute__ ((__vector_size__ (8)));
typedef __builtin_aarch64_simd_usi uint32x2_t
__attribute__ ((__vector_size__ (8)));
typedef uint64_t uint64x1_t;
typedef __builtin_aarch64_simd_qi int8x16_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_hi int16x8_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_si int32x4_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_di int64x2_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_sf float32x4_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_df float64x2_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_poly8 poly8x16_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_poly16 poly16x8_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_poly64 poly64x2_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_uqi uint8x16_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_uhi uint16x8_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_usi uint32x4_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_udi uint64x2_t
__attribute__ ((__vector_size__ (16)));
typedef float float32_t;
typedef double float64_t;
typedef __builtin_aarch64_simd_poly8 poly8_t;
typedef __builtin_aarch64_simd_poly16 poly16_t;
typedef __builtin_aarch64_simd_poly64 poly64_t;
typedef __builtin_aarch64_simd_poly128 poly128_t;
typedef struct int8x8x2_t
{
int8x8_t val[2];
} int8x8x2_t;
typedef struct int8x16x2_t
{
int8x16_t val[2];
} int8x16x2_t;
typedef struct int16x4x2_t
{
int16x4_t val[2];
} int16x4x2_t;
typedef struct int16x8x2_t
{
int16x8_t val[2];
} int16x8x2_t;
typedef struct int32x2x2_t
{
int32x2_t val[2];
} int32x2x2_t;
typedef struct int32x4x2_t
{
int32x4_t val[2];
} int32x4x2_t;
typedef struct int64x1x2_t
{
int64x1_t val[2];
} int64x1x2_t;
typedef struct int64x2x2_t
{
int64x2_t val[2];
} int64x2x2_t;
typedef struct uint8x8x2_t
{
uint8x8_t val[2];
} uint8x8x2_t;
typedef struct uint8x16x2_t
{
uint8x16_t val[2];
} uint8x16x2_t;
typedef struct uint16x4x2_t
{
uint16x4_t val[2];
} uint16x4x2_t;
typedef struct uint16x8x2_t
{
uint16x8_t val[2];
} uint16x8x2_t;
typedef struct uint32x2x2_t
{
uint32x2_t val[2];
} uint32x2x2_t;
typedef struct uint32x4x2_t
{
uint32x4_t val[2];
} uint32x4x2_t;
typedef struct uint64x1x2_t
{
uint64x1_t val[2];
} uint64x1x2_t;
typedef struct uint64x2x2_t
{
uint64x2_t val[2];
} uint64x2x2_t;
typedef struct float32x2x2_t
{
float32x2_t val[2];
} float32x2x2_t;
typedef struct float32x4x2_t
{
float32x4_t val[2];
} float32x4x2_t;
typedef struct float64x2x2_t
{
float64x2_t val[2];
} float64x2x2_t;
typedef struct float64x1x2_t
{
float64x1_t val[2];
} float64x1x2_t;
typedef struct poly8x8x2_t
{
poly8x8_t val[2];
} poly8x8x2_t;
typedef struct poly8x16x2_t
{
poly8x16_t val[2];
} poly8x16x2_t;
typedef struct poly16x4x2_t
{
poly16x4_t val[2];
} poly16x4x2_t;
typedef struct poly16x8x2_t
{
poly16x8_t val[2];
} poly16x8x2_t;
typedef struct int8x8x3_t
{
int8x8_t val[3];
} int8x8x3_t;
typedef struct int8x16x3_t
{
int8x16_t val[3];
} int8x16x3_t;
typedef struct int16x4x3_t
{
int16x4_t val[3];
} int16x4x3_t;
typedef struct int16x8x3_t
{
int16x8_t val[3];
} int16x8x3_t;
typedef struct int32x2x3_t
{
int32x2_t val[3];
} int32x2x3_t;
typedef struct int32x4x3_t
{
int32x4_t val[3];
} int32x4x3_t;
typedef struct int64x1x3_t
{
int64x1_t val[3];
} int64x1x3_t;
typedef struct int64x2x3_t
{
int64x2_t val[3];
} int64x2x3_t;
typedef struct uint8x8x3_t
{
uint8x8_t val[3];
} uint8x8x3_t;
typedef struct uint8x16x3_t
{
uint8x16_t val[3];
} uint8x16x3_t;
typedef struct uint16x4x3_t
{
uint16x4_t val[3];
} uint16x4x3_t;
typedef struct uint16x8x3_t
{
uint16x8_t val[3];
} uint16x8x3_t;
typedef struct uint32x2x3_t
{
uint32x2_t val[3];
} uint32x2x3_t;
typedef struct uint32x4x3_t
{
uint32x4_t val[3];
} uint32x4x3_t;
typedef struct uint64x1x3_t
{
uint64x1_t val[3];
} uint64x1x3_t;
typedef struct uint64x2x3_t
{
uint64x2_t val[3];
} uint64x2x3_t;
typedef struct float32x2x3_t
{
float32x2_t val[3];
} float32x2x3_t;
typedef struct float32x4x3_t
{
float32x4_t val[3];
} float32x4x3_t;
typedef struct float64x2x3_t
{
float64x2_t val[3];
} float64x2x3_t;
typedef struct float64x1x3_t
{
float64x1_t val[3];
} float64x1x3_t;
typedef struct poly8x8x3_t
{
poly8x8_t val[3];
} poly8x8x3_t;
typedef struct poly8x16x3_t
{
poly8x16_t val[3];
} poly8x16x3_t;
typedef struct poly16x4x3_t
{
poly16x4_t val[3];
} poly16x4x3_t;
typedef struct poly16x8x3_t
{
poly16x8_t val[3];
} poly16x8x3_t;
typedef struct int8x8x4_t
{
int8x8_t val[4];
} int8x8x4_t;
typedef struct int8x16x4_t
{
int8x16_t val[4];
} int8x16x4_t;
typedef struct int16x4x4_t
{
int16x4_t val[4];
} int16x4x4_t;
typedef struct int16x8x4_t
{
int16x8_t val[4];
} int16x8x4_t;
typedef struct int32x2x4_t
{
int32x2_t val[4];
} int32x2x4_t;
typedef struct int32x4x4_t
{
int32x4_t val[4];
} int32x4x4_t;
typedef struct int64x1x4_t
{
int64x1_t val[4];
} int64x1x4_t;
typedef struct int64x2x4_t
{
int64x2_t val[4];
} int64x2x4_t;
typedef struct uint8x8x4_t
{
uint8x8_t val[4];
} uint8x8x4_t;
typedef struct uint8x16x4_t
{
uint8x16_t val[4];
} uint8x16x4_t;
typedef struct uint16x4x4_t
{
uint16x4_t val[4];
} uint16x4x4_t;
typedef struct uint16x8x4_t
{
uint16x8_t val[4];
} uint16x8x4_t;
typedef struct uint32x2x4_t
{
uint32x2_t val[4];
} uint32x2x4_t;
typedef struct uint32x4x4_t
{
uint32x4_t val[4];
} uint32x4x4_t;
typedef struct uint64x1x4_t
{
uint64x1_t val[4];
} uint64x1x4_t;
typedef struct uint64x2x4_t
{
uint64x2_t val[4];
} uint64x2x4_t;
typedef struct float32x2x4_t
{
float32x2_t val[4];
} float32x2x4_t;
typedef struct float32x4x4_t
{
float32x4_t val[4];
} float32x4x4_t;
typedef struct float64x2x4_t
{
float64x2_t val[4];
} float64x2x4_t;
typedef struct float64x1x4_t
{
float64x1_t val[4];
} float64x1x4_t;
typedef struct poly8x8x4_t
{
poly8x8_t val[4];
} poly8x8x4_t;
typedef struct poly8x16x4_t
{
poly8x16_t val[4];
} poly8x16x4_t;
typedef struct poly16x4x4_t
{
poly16x4_t val[4];
} poly16x4x4_t;
typedef struct poly16x8x4_t
{
poly16x8_t val[4];
} poly16x8x4_t;
/* vget_lane internal macros. */
#define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
(__cast_ret \
__builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b))
#define __aarch64_vget_lane_f32(__a, __b) \
__aarch64_vget_lane_any (v2sf, , , __a, __b)
#define __aarch64_vget_lane_f64(__a, __b) (__a)
#define __aarch64_vget_lane_p8(__a, __b) \
__aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
#define __aarch64_vget_lane_p16(__a, __b) \
__aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
#define __aarch64_vget_lane_s8(__a, __b) \
__aarch64_vget_lane_any (v8qi, , ,__a, __b)
#define __aarch64_vget_lane_s16(__a, __b) \
__aarch64_vget_lane_any (v4hi, , ,__a, __b)
#define __aarch64_vget_lane_s32(__a, __b) \
__aarch64_vget_lane_any (v2si, , ,__a, __b)
#define __aarch64_vget_lane_s64(__a, __b) (__a)
#define __aarch64_vget_lane_u8(__a, __b) \
__aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
#define __aarch64_vget_lane_u16(__a, __b) \
__aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
#define __aarch64_vget_lane_u32(__a, __b) \
__aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
#define __aarch64_vget_lane_u64(__a, __b) (__a)
#define __aarch64_vgetq_lane_f32(__a, __b) \
__aarch64_vget_lane_any (v4sf, , , __a, __b)
#define __aarch64_vgetq_lane_f64(__a, __b) \
__aarch64_vget_lane_any (v2df, , , __a, __b)
#define __aarch64_vgetq_lane_p8(__a, __b) \
__aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
#define __aarch64_vgetq_lane_p16(__a, __b) \
__aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
#define __aarch64_vgetq_lane_s8(__a, __b) \
__aarch64_vget_lane_any (v16qi, , ,__a, __b)
#define __aarch64_vgetq_lane_s16(__a, __b) \
__aarch64_vget_lane_any (v8hi, , ,__a, __b)
#define __aarch64_vgetq_lane_s32(__a, __b) \
__aarch64_vget_lane_any (v4si, , ,__a, __b)
#define __aarch64_vgetq_lane_s64(__a, __b) \
__aarch64_vget_lane_any (v2di, , ,__a, __b)
#define __aarch64_vgetq_lane_u8(__a, __b) \
__aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
#define __aarch64_vgetq_lane_u16(__a, __b) \
__aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
#define __aarch64_vgetq_lane_u32(__a, __b) \
__aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
#define __aarch64_vgetq_lane_u64(__a, __b) \
__aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
/* __aarch64_vdup_lane internal macros. */
#define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
#define __aarch64_vdup_lane_f32(__a, __b) \
__aarch64_vdup_lane_any (f32, , , __a, __b)
#define __aarch64_vdup_lane_f64(__a, __b) (__a)
#define __aarch64_vdup_lane_p8(__a, __b) \
__aarch64_vdup_lane_any (p8, , , __a, __b)
#define __aarch64_vdup_lane_p16(__a, __b) \
__aarch64_vdup_lane_any (p16, , , __a, __b)
#define __aarch64_vdup_lane_s8(__a, __b) \
__aarch64_vdup_lane_any (s8, , , __a, __b)
#define __aarch64_vdup_lane_s16(__a, __b) \
__aarch64_vdup_lane_any (s16, , , __a, __b)
#define __aarch64_vdup_lane_s32(__a, __b) \
__aarch64_vdup_lane_any (s32, , , __a, __b)
#define __aarch64_vdup_lane_s64(__a, __b) (__a)
#define __aarch64_vdup_lane_u8(__a, __b) \
__aarch64_vdup_lane_any (u8, , , __a, __b)
#define __aarch64_vdup_lane_u16(__a, __b) \
__aarch64_vdup_lane_any (u16, , , __a, __b)
#define __aarch64_vdup_lane_u32(__a, __b) \
__aarch64_vdup_lane_any (u32, , , __a, __b)
#define __aarch64_vdup_lane_u64(__a, __b) (__a)
/* __aarch64_vdup_laneq internal macros. */
#define __aarch64_vdup_laneq_f32(__a, __b) \
__aarch64_vdup_lane_any (f32, , q, __a, __b)
#define __aarch64_vdup_laneq_f64(__a, __b) \
__aarch64_vdup_lane_any (f64, , q, __a, __b)
#define __aarch64_vdup_laneq_p8(__a, __b) \
__aarch64_vdup_lane_any (p8, , q, __a, __b)
#define __aarch64_vdup_laneq_p16(__a, __b) \
__aarch64_vdup_lane_any (p16, , q, __a, __b)
#define __aarch64_vdup_laneq_s8(__a, __b) \
__aarch64_vdup_lane_any (s8, , q, __a, __b)
#define __aarch64_vdup_laneq_s16(__a, __b) \
__aarch64_vdup_lane_any (s16, , q, __a, __b)
#define __aarch64_vdup_laneq_s32(__a, __b) \
__aarch64_vdup_lane_any (s32, , q, __a, __b)
#define __aarch64_vdup_laneq_s64(__a, __b) \
__aarch64_vdup_lane_any (s64, , q, __a, __b)
#define __aarch64_vdup_laneq_u8(__a, __b) \
__aarch64_vdup_lane_any (u8, , q, __a, __b)
#define __aarch64_vdup_laneq_u16(__a, __b) \
__aarch64_vdup_lane_any (u16, , q, __a, __b)
#define __aarch64_vdup_laneq_u32(__a, __b) \
__aarch64_vdup_lane_any (u32, , q, __a, __b)
#define __aarch64_vdup_laneq_u64(__a, __b) \
__aarch64_vdup_lane_any (u64, , q, __a, __b)
/* __aarch64_vdupq_lane internal macros. */
#define __aarch64_vdupq_lane_f32(__a, __b) \
__aarch64_vdup_lane_any (f32, q, , __a, __b)
#define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a))
#define __aarch64_vdupq_lane_p8(__a, __b) \
__aarch64_vdup_lane_any (p8, q, , __a, __b)
#define __aarch64_vdupq_lane_p16(__a, __b) \
__aarch64_vdup_lane_any (p16, q, , __a, __b)
#define __aarch64_vdupq_lane_s8(__a, __b) \
__aarch64_vdup_lane_any (s8, q, , __a, __b)
#define __aarch64_vdupq_lane_s16(__a, __b) \
__aarch64_vdup_lane_any (s16, q, , __a, __b)
#define __aarch64_vdupq_lane_s32(__a, __b) \
__aarch64_vdup_lane_any (s32, q, , __a, __b)
#define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a))
#define __aarch64_vdupq_lane_u8(__a, __b) \
__aarch64_vdup_lane_any (u8, q, , __a, __b)
#define __aarch64_vdupq_lane_u16(__a, __b) \
__aarch64_vdup_lane_any (u16, q, , __a, __b)
#define __aarch64_vdupq_lane_u32(__a, __b) \
__aarch64_vdup_lane_any (u32, q, , __a, __b)
#define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a))
/* __aarch64_vdupq_laneq internal macros. */
#define __aarch64_vdupq_laneq_f32(__a, __b) \
__aarch64_vdup_lane_any (f32, q, q, __a, __b)
#define __aarch64_vdupq_laneq_f64(__a, __b) \
__aarch64_vdup_lane_any (f64, q, q, __a, __b)
#define __aarch64_vdupq_laneq_p8(__a, __b) \
__aarch64_vdup_lane_any (p8, q, q, __a, __b)
#define __aarch64_vdupq_laneq_p16(__a, __b) \
__aarch64_vdup_lane_any (p16, q, q, __a, __b)
#define __aarch64_vdupq_laneq_s8(__a, __b) \
__aarch64_vdup_lane_any (s8, q, q, __a, __b)
#define __aarch64_vdupq_laneq_s16(__a, __b) \
__aarch64_vdup_lane_any (s16, q, q, __a, __b)
#define __aarch64_vdupq_laneq_s32(__a, __b) \
__aarch64_vdup_lane_any (s32, q, q, __a, __b)
#define __aarch64_vdupq_laneq_s64(__a, __b) \
__aarch64_vdup_lane_any (s64, q, q, __a, __b)
#define __aarch64_vdupq_laneq_u8(__a, __b) \
__aarch64_vdup_lane_any (u8, q, q, __a, __b)
#define __aarch64_vdupq_laneq_u16(__a, __b) \
__aarch64_vdup_lane_any (u16, q, q, __a, __b)
#define __aarch64_vdupq_laneq_u32(__a, __b) \
__aarch64_vdup_lane_any (u32, q, q, __a, __b)
#define __aarch64_vdupq_laneq_u64(__a, __b) \
__aarch64_vdup_lane_any (u64, q, q, __a, __b)
/* vadd */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vadd_s8 (int8x8_t __a, int8x8_t __b)
{
return __a + __b;
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vadd_s16 (int16x4_t __a, int16x4_t __b)
{
return __a + __b;
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vadd_s32 (int32x2_t __a, int32x2_t __b)
{
return __a + __b;
}
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vadd_f32 (float32x2_t __a, float32x2_t __b)
{
return __a + __b;
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vadd_f64 (float64x1_t __a, float64x1_t __b)
{
return __a + __b;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vadd_u8 (uint8x8_t __a, uint8x8_t __b)
{
return __a + __b;
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vadd_u16 (uint16x4_t __a, uint16x4_t __b)
{
return __a + __b;
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vadd_u32 (uint32x2_t __a, uint32x2_t __b)
{
return __a + __b;
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vadd_s64 (int64x1_t __a, int64x1_t __b)
{
return __a + __b;
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vadd_u64 (uint64x1_t __a, uint64x1_t __b)
{
return __a + __b;
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vaddq_s8 (int8x16_t __a, int8x16_t __b)
{
return __a + __b;
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddq_s16 (int16x8_t __a, int16x8_t __b)
{
return __a + __b;
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddq_s32 (int32x4_t __a, int32x4_t __b)
{
return __a + __b;
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddq_s64 (int64x2_t __a, int64x2_t __b)
{
return __a + __b;
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vaddq_f32 (float32x4_t __a, float32x4_t __b)
{
return __a + __b;
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vaddq_f64 (float64x2_t __a, float64x2_t __b)
{
return __a + __b;
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
{
return __a + __b;
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
{
return __a + __b;
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
{
return __a + __b;
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
{
return __a + __b;
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddl_s8 (int8x8_t __a, int8x8_t __b)
{
return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddl_s16 (int16x4_t __a, int16x4_t __b)
{
return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddl_s32 (int32x2_t __a, int32x2_t __b)
{
return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
{
return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
(int8x8_t) __b);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
{
return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
(int16x4_t) __b);
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
{
return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
(int32x2_t) __b);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
{
return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
{
return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
{
return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
{
return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
(int8x16_t) __b);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
{
return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
(int16x8_t) __b);
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
{
return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
(int32x4_t) __b);
}
__extensi
|