aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/gcc/testsuite/gcc.target/aarch64/mla_intrinsic_1.c
blob: fce41387354e65dc8fb2896f4f3bbdbb6d3b54f2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/* { dg-do run } */
/* { dg-options "-O3 --save-temps" } */

#include <arm_neon.h>

extern void abort (void);

#define MAPs(size, xx) int##size##xx##_t
#define MAPu(size, xx) uint##size##xx##_t


#define TEST_VMLA(q, su, size, in1_lanes, in2_lanes)		\
static void							\
test_vmlaq_lane##q##_##su##size (MAP##su (size, ) * res,	\
				 const MAP##su(size, ) *in1,	\
				 const MAP##su(size, ) *in2)	\
{								\
  MAP##su (size, x##in1_lanes) a = vld1q_##su##size (res);	\
  MAP##su (size, x##in1_lanes) b = vld1q_##su##size (in1);	\
  MAP##su (size, x##in2_lanes) c = vld1##q##_##su##size (in2);	\
  a = vmlaq_lane##q##_##su##size (a, b, c, 1);			\
  vst1q_##su##size (res, a);					\
}

#define BUILD_VARS(width, n_lanes, n_half_lanes)		\
TEST_VMLA (, s, width, n_lanes, n_half_lanes)			\
TEST_VMLA (q, s, width, n_lanes, n_lanes)			\
TEST_VMLA (, u, width, n_lanes, n_half_lanes)			\
TEST_VMLA (q, u, width, n_lanes, n_lanes)			\

BUILD_VARS (32, 4, 2)
BUILD_VARS (16, 8, 4)

#define POOL4 {0, 1, 2, 3}
#define POOL8 {0, 1, 2, 3, 4, 5, 6, 7}
#define EMPTY4 {0, 0, 0, 0}
#define EMPTY8 {0, 0, 0, 0, 0, 0, 0, 0}

#define BUILD_TEST(su, size, lanes)				\
static void							\
test_##su##size (void)						\
{								\
  int i;							\
  MAP##su (size,) pool[lanes] = POOL##lanes;			\
  MAP##su (size,) res[lanes] = EMPTY##lanes;			\
  MAP##su (size,) res2[lanes] = EMPTY##lanes;			\
								\
  /* Forecfully avoid optimization.  */				\
  asm volatile ("" : : : "memory");				\
  test_vmlaq_lane_##su##size (res, pool, pool);			\
  for (i = 0; i < lanes; i++)					\
    if (res[i] != pool[i])					\
      abort ();							\
								\
  /* Forecfully avoid optimization.  */				\
  asm volatile ("" : : : "memory");				\
  test_vmlaq_laneq_##su##size (res2, pool, pool);		\
  for (i = 0; i < lanes; i++)					\
    if (res2[i] != pool[i])					\
      abort ();							\
}

#undef BUILD_VARS
#define BUILD_VARS(size, lanes)					\
BUILD_TEST (s, size, lanes)					\
BUILD_TEST (u, size, lanes)

BUILD_VARS (32, 4)
BUILD_VARS (16, 8)

int
main (int argc, char **argv)
{
  test_s32 ();
  test_u32 ();
  test_s16 ();
  test_u16 ();
  return 0;
}

/* { dg-final { scan-assembler-times "mla\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s\\\[\[0-9\]+\\\]" 4 } } */
/* { dg-final { scan-assembler-times "mla\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.8h\\\[\[0-9\]+\\\]" 4 } } */
/* { dg-final { cleanup-saved-temps } } */