aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.8/gcc/testsuite/gcc.target/i386/reload-1.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.8/gcc/testsuite/gcc.target/i386/reload-1.c')
-rw-r--r--gcc-4.8/gcc/testsuite/gcc.target/i386/reload-1.c115
1 files changed, 115 insertions, 0 deletions
diff --git a/gcc-4.8/gcc/testsuite/gcc.target/i386/reload-1.c b/gcc-4.8/gcc/testsuite/gcc.target/i386/reload-1.c
new file mode 100644
index 000000000..9c6cd3222
--- /dev/null
+++ b/gcc-4.8/gcc/testsuite/gcc.target/i386/reload-1.c
@@ -0,0 +1,115 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ia32 } */
+/* { dg-options "-O3 -msse2 -fdump-rtl-csa" } */
+/* { dg-skip-if "no stdint" { vxworks_kernel } } */
+
+#include <emmintrin.h>
+#include <stdint.h>
+
+typedef __SIZE_TYPE__ size_t;
+typedef float vFloat __attribute__ ((__vector_size__ (16)));
+typedef double vDouble __attribute__ ((__vector_size__ (16)));
+typedef struct buf
+{
+ void *data;
+ unsigned long h;
+ unsigned long w;
+ size_t bytes;
+} buf;
+
+typedef struct job
+{
+ struct Job *next;
+ void * info;
+ long (*func)(struct Job *job);
+ long error;
+} job;
+
+typedef struct fj
+{
+ job hd;
+ buf src;
+ buf dest;
+ float g;
+ unsigned int flags;
+} fj;
+
+static const double r[256], t[256];
+
+long bar (const buf *src, const buf *dest, float g, unsigned int flags)
+{
+ float *d0 = (float*) src->data;
+ float *d1 = (float*) dest->data;
+ uintptr_t w = dest->w;
+ uintptr_t idx;
+ vFloat p0;
+ static const vFloat m0;
+ static const vDouble p[3], m, b;
+ float *sr = d0;
+ float *dr = d1;
+ for( idx = 0; idx + 8 <= w; idx += 8 )
+ {
+ vFloat f0 = _mm_loadu_ps (sr);
+ vFloat f1 = _mm_loadu_ps (sr + 4);
+ sr += 8;
+ vFloat fa0 = _mm_andnot_ps (m0, f0);
+ vFloat fa1 = _mm_andnot_ps (m0, f1);
+ vDouble v0 = _mm_cvtps_pd (fa0);
+ vDouble v1 = _mm_cvtps_pd (_mm_movehl_ps (fa0, fa0));
+ vDouble v2 = _mm_cvtps_pd (fa1);
+ vDouble v3 = _mm_cvtps_pd (_mm_movehl_ps (fa1, fa1));
+ vDouble vi0, vi1, vi2, vi3;
+ __m128i b0, b1, b2, b3;
+ b0 = _mm_packs_epi32 (_mm_packs_epi32 (b0, b1), _mm_packs_epi32 (b2, b3));
+ b1 = _mm_srli_epi64 (b0, 32);
+ unsigned int i0 = _mm_cvtsi128_si32 (b0);
+ unsigned int i2 = _mm_cvtsi128_si32 (b1);
+ v0 -= _mm_loadh_pd (_mm_load_sd (r + (i0 & 0xff)), r + (i0 >> 16));
+ v1 -= _mm_loadh_pd (_mm_load_sd (r + (i2 & 0xff)), r + (i2 >> 16));
+ b0 = _mm_unpackhi_epi64 (b0, b0);
+ b1 = _mm_unpackhi_epi64 (b1, b1);
+ unsigned int i4 = _mm_cvtsi128_si32 (b0);
+ unsigned int i6 = _mm_cvtsi128_si32 (b1);
+ v2 -= _mm_loadh_pd (_mm_load_sd (r + (i4 & 0xff)), r + (i4 >> 16));
+ v3 -= _mm_loadh_pd (_mm_load_sd (r + (i6 & 0xff)), r + (i6 >> 16));
+ v0 = p[0] + (p[1] + p[2] * v0) * v0;
+ v1 = p[0] + (p[1] + p[2] * v1) * v1;
+ v2 = p[0] + (p[1] + p[2] * v2) * v2;
+ v3 = p[0] + (p[1] + p[2] * v3) * v3;
+ vi0 = (vDouble) _mm_slli_epi64 ((__m128i)((vi0 + b) + m), 52);
+ vi1 = (vDouble) _mm_slli_epi64 ((__m128i)((vi1 + b) + m), 52);
+ vi2 = (vDouble) _mm_slli_epi64 ((__m128i)((vi2 + b) + m), 52);
+ vi3 = (vDouble) _mm_slli_epi64 ((__m128i)((vi3 + b) + m), 52);
+ vi0 *= _mm_loadh_pd (_mm_load_sd (t + (i0 & 0xff)), t + (i0 >> 16));
+ vi1 *= _mm_loadh_pd (_mm_load_sd (t + (i2 & 0xff)), t + (i2 >> 16));
+ vi2 *= _mm_loadh_pd (_mm_load_sd (t + (i4 & 0xff)), t + (i4 >> 16));
+ vi3 *= _mm_loadh_pd (_mm_load_sd (t + (i6 & 0xff)), t + (i6 >> 16));
+ v0 *= vi0;
+ v1 *= vi1;
+ v2 *= vi2;
+ v3 *= vi3;
+ vFloat r0 = _mm_movelh_ps (_mm_cvtpd_ps( v0 ), _mm_cvtpd_ps (v1));
+ vFloat r1 = _mm_movelh_ps (_mm_cvtpd_ps( v2 ), _mm_cvtpd_ps (v3));
+ vFloat z0 = _mm_cmpeq_ps (f0, _mm_setzero_ps());
+ vFloat z1 = _mm_cmpeq_ps (f1, _mm_setzero_ps());
+ r0 = _mm_andnot_ps (z0, r0);
+ r1 = _mm_andnot_ps (z1, r1);
+ z0 = _mm_and_ps (z0, p0);
+ z1 = _mm_and_ps (z1, p0);
+ r0 = _mm_or_ps (r0, z0);
+ r1 = _mm_or_ps (r1, z1);
+ _mm_storeu_ps (dr, r0);
+ _mm_storeu_ps (dr + 4, r1);
+ dr += 8;
+ }
+ return 0;
+}
+
+long foo (job *j )
+{
+ fj *jd = (fj*) j;
+ return bar (&jd->src, &jd->dest, jd->g, jd->flags);
+}
+
+/* { dg-final { scan-rtl-dump-not "deleted 1 dead insns" "csa" } } */
+/* { dg-final { cleanup-rtl-dump "csa" } } */