From 3f968e4e06203565960e3cac932896be2d0a0a48 Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Thu, 10 Jan 2013 18:49:49 +0100 Subject: Fix output of 2nd chunk of data for vuzp, vzip and vtrn. Reported by Victoria Zhislina. --- compute_ref.axf | Bin 3209512 -> 3235052 bytes ref-rvct.txt | 84 ++++++++++++++++++++++++++++---------------------------- ref_vuzp.c | 2 +- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/compute_ref.axf b/compute_ref.axf index f6374e3..2a480d0 100644 Binary files a/compute_ref.axf and b/compute_ref.axf differ diff --git a/ref-rvct.txt b/ref-rvct.txt index 67f65a3..05128c4 100644 --- a/ref-rvct.txt +++ b/ref-rvct.txt @@ -2831,24 +2831,24 @@ VTRN/VTRNQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } VTRN/VTRNQ:17:result_float32x4 [] = { c1800000 -0x1.0000000p+4 -16, c1700000 -0x1.e000000p+3 -15, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, } VTRN/VTRNQ chunk 1 output: -VTRN/VTRNQ:18:result_int8x8 [] = { fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4, } -VTRN/VTRNQ:19:result_int16x4 [] = { fffffff1, 22, 22, fffffff2, } -VTRN/VTRNQ:20:result_int32x2 [] = { fffffff1, 33, } +VTRN/VTRNQ:18:result_int8x8 [] = { fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, } +VTRN/VTRNQ:19:result_int16x4 [] = { fffffff2, fffffff3, 22, 22, } +VTRN/VTRNQ:20:result_int32x2 [] = { 33, 33, } VTRN/VTRNQ:21:result_int64x1 [] = { 3333333333333333, } -VTRN/VTRNQ:22:result_uint8x8 [] = { f1, 55, 55, f2, f3, 55, 55, f4, } -VTRN/VTRNQ:23:result_uint16x4 [] = { fff1, 66, 66, fff2, } -VTRN/VTRNQ:24:result_uint32x2 [] = { fffffff1, 77, } +VTRN/VTRNQ:22:result_uint8x8 [] = { f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:23:result_uint16x4 [] = { fff2, fff3, 66, 66, } +VTRN/VTRNQ:24:result_uint32x2 [] = { 77, 77, } VTRN/VTRNQ:25:result_uint64x1 [] = { 3333333333333333, } -VTRN/VTRNQ:26:result_float32x2 [] = { c1700000 -0x1.e000000p+3 -15, 42066666 0x1.0ccccc0p+5 33.6, } -VTRN/VTRNQ:27:result_int8x16 [] = { fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, fffffff8, } -VTRN/VTRNQ:28:result_int16x8 [] = { fffffff1, 22, 22, fffffff2, fffffff3, 22, 22, fffffff4, } -VTRN/VTRNQ:29:result_int32x4 [] = { fffffff1, 33, 33, fffffff2, } +VTRN/VTRNQ:26:result_float32x2 [] = { 42066666 0x1.0ccccc0p+5 33.6, 42066666 0x1.0ccccc0p+5 33.6, } +VTRN/VTRNQ:27:result_int8x16 [] = { fffffff8, fffffff9, 11, 11, fffffffa, fffffffb, 11, 11, fffffffc, fffffffd, 11, 11, fffffffe, ffffffff, 11, 11, } +VTRN/VTRNQ:28:result_int16x8 [] = { fffffff4, fffffff5, 22, 22, fffffff6, fffffff7, 22, 22, } +VTRN/VTRNQ:29:result_int32x4 [] = { fffffff2, fffffff3, 33, 33, } VTRN/VTRNQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } -VTRN/VTRNQ:31:result_uint8x16 [] = { f1, 55, 55, f2, f3, 55, 55, f4, f5, 55, 55, f6, f7, 55, 55, f8, } -VTRN/VTRNQ:32:result_uint16x8 [] = { fff1, 66, 66, fff2, fff3, 66, 66, fff4, } -VTRN/VTRNQ:33:result_uint32x4 [] = { fffffff1, 77, 77, fffffff2, } +VTRN/VTRNQ:31:result_uint8x16 [] = { f8, f9, 55, 55, fa, fb, 55, 55, fc, fd, 55, 55, fe, ff, 55, 55, } +VTRN/VTRNQ:32:result_uint16x8 [] = { fff4, fff5, 66, 66, fff6, fff7, 66, 66, } +VTRN/VTRNQ:33:result_uint32x4 [] = { fffffff2, fffffff3, 77, 77, } VTRN/VTRNQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } -VTRN/VTRNQ:35:result_float32x4 [] = { c1700000 -0x1.e000000p+3 -15, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, c1600000 -0x1.c000000p+3 -14, } +VTRN/VTRNQ:35:result_float32x4 [] = { c1600000 -0x1.c000000p+3 -14, c1500000 -0x1.a000000p+3 -13, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, } VUZP/VUZPQ chunk 0 output: VUZP/VUZPQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } @@ -2871,24 +2871,24 @@ VUZP/VUZPQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } VUZP/VUZPQ:17:result_float32x4 [] = { c1800000 -0x1.0000000p+4 -16, c1700000 -0x1.e000000p+3 -15, c1600000 -0x1.c000000p+3 -14, c1500000 -0x1.a000000p+3 -13, } VUZP/VUZPQ chunk 1 output: -VUZP/VUZPQ:18:result_int8x8 [] = { fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 11, } -VUZP/VUZPQ:19:result_int16x4 [] = { fffffff1, fffffff2, fffffff3, 22, } -VUZP/VUZPQ:20:result_int32x2 [] = { fffffff1, 33, } +VUZP/VUZPQ:18:result_int8x8 [] = { 11, 11, 11, 11, 11, 11, 11, 11, } +VUZP/VUZPQ:19:result_int16x4 [] = { 22, 22, 22, 22, } +VUZP/VUZPQ:20:result_int32x2 [] = { 33, 33, } VUZP/VUZPQ:21:result_int64x1 [] = { 3333333333333333, } -VUZP/VUZPQ:22:result_uint8x8 [] = { f1, f2, f3, f4, f5, f6, f7, 55, } -VUZP/VUZPQ:23:result_uint16x4 [] = { fff1, fff2, fff3, 66, } -VUZP/VUZPQ:24:result_uint32x2 [] = { fffffff1, 77, } +VUZP/VUZPQ:22:result_uint8x8 [] = { 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:23:result_uint16x4 [] = { 66, 66, 66, 66, } +VUZP/VUZPQ:24:result_uint32x2 [] = { 77, 77, } VUZP/VUZPQ:25:result_uint64x1 [] = { 3333333333333333, } -VUZP/VUZPQ:26:result_float32x2 [] = { c1700000 -0x1.e000000p+3 -15, 42066666 0x1.0ccccc0p+5 33.6, } -VUZP/VUZPQ:27:result_int8x16 [] = { fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 11, } -VUZP/VUZPQ:28:result_int16x8 [] = { fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 22, } -VUZP/VUZPQ:29:result_int32x4 [] = { fffffff1, fffffff2, fffffff3, 33, } +VUZP/VUZPQ:26:result_float32x2 [] = { 42066666 0x1.0ccccc0p+5 33.6, 42066666 0x1.0ccccc0p+5 33.6, } +VUZP/VUZPQ:27:result_int8x16 [] = { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, } +VUZP/VUZPQ:28:result_int16x8 [] = { 22, 22, 22, 22, 22, 22, 22, 22, } +VUZP/VUZPQ:29:result_int32x4 [] = { 33, 33, 33, 33, } VUZP/VUZPQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } -VUZP/VUZPQ:31:result_uint8x16 [] = { f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, 55, } -VUZP/VUZPQ:32:result_uint16x8 [] = { fff1, fff2, fff3, fff4, fff5, fff6, fff7, 66, } -VUZP/VUZPQ:33:result_uint32x4 [] = { fffffff1, fffffff2, fffffff3, 77, } +VUZP/VUZPQ:31:result_uint8x16 [] = { 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:32:result_uint16x8 [] = { 66, 66, 66, 66, 66, 66, 66, 66, } +VUZP/VUZPQ:33:result_uint32x4 [] = { 77, 77, 77, 77, } VUZP/VUZPQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } -VUZP/VUZPQ:35:result_float32x4 [] = { c1700000 -0x1.e000000p+3 -15, c1600000 -0x1.c000000p+3 -14, c1500000 -0x1.a000000p+3 -13, 42073333 0x1.0e66660p+5 33.8, } +VUZP/VUZPQ:35:result_float32x4 [] = { 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, } VZIP/VZIPQ chunk 0 output: VZIP/VZIPQ:0:result_int8x8 [] = { fffffff0, fffffff4, 11, 11, fffffff1, fffffff5, 11, 11, } @@ -2911,24 +2911,24 @@ VZIP/VZIPQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } VZIP/VZIPQ:17:result_float32x4 [] = { c1800000 -0x1.0000000p+4 -16, c1600000 -0x1.c000000p+3 -14, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, } VZIP/VZIPQ chunk 1 output: -VZIP/VZIPQ:18:result_int8x8 [] = { fffffff4, 11, 11, fffffff1, fffffff5, 11, 11, fffffff2, } -VZIP/VZIPQ:19:result_int16x4 [] = { fffffff2, 22, 22, fffffff1, } -VZIP/VZIPQ:20:result_int32x2 [] = { fffffff1, 33, } +VZIP/VZIPQ:18:result_int8x8 [] = { fffffff2, fffffff6, 11, 11, fffffff3, fffffff7, 11, 11, } +VZIP/VZIPQ:19:result_int16x4 [] = { fffffff1, fffffff3, 22, 22, } +VZIP/VZIPQ:20:result_int32x2 [] = { 33, 33, } VZIP/VZIPQ:21:result_int64x1 [] = { 3333333333333333, } -VZIP/VZIPQ:22:result_uint8x8 [] = { f4, 55, 55, f1, f5, 55, 55, f2, } -VZIP/VZIPQ:23:result_uint16x4 [] = { fff2, 66, 66, fff1, } -VZIP/VZIPQ:24:result_uint32x2 [] = { fffffff1, 77, } +VZIP/VZIPQ:22:result_uint8x8 [] = { f2, f6, 55, 55, f3, f7, 55, 55, } +VZIP/VZIPQ:23:result_uint16x4 [] = { fff1, fff3, 66, 66, } +VZIP/VZIPQ:24:result_uint32x2 [] = { 77, 77, } VZIP/VZIPQ:25:result_uint64x1 [] = { 3333333333333333, } -VZIP/VZIPQ:26:result_float32x2 [] = { c1700000 -0x1.e000000p+3 -15, 42066666 0x1.0ccccc0p+5 33.6, } -VZIP/VZIPQ:27:result_int8x16 [] = { fffffff8, 11, 11, fffffff1, fffffff9, 11, 11, fffffff2, fffffffa, 11, 11, fffffff3, fffffffb, 11, 11, fffffff4, } -VZIP/VZIPQ:28:result_int16x8 [] = { fffffff4, 22, 22, fffffff1, fffffff5, 22, 22, fffffff2, } -VZIP/VZIPQ:29:result_int32x4 [] = { fffffff2, 33, 33, fffffff1, } +VZIP/VZIPQ:26:result_float32x2 [] = { 42066666 0x1.0ccccc0p+5 33.6, 42066666 0x1.0ccccc0p+5 33.6, } +VZIP/VZIPQ:27:result_int8x16 [] = { fffffff4, fffffffc, 11, 11, fffffff5, fffffffd, 11, 11, fffffff6, fffffffe, 11, 11, fffffff7, ffffffff, 11, 11, } +VZIP/VZIPQ:28:result_int16x8 [] = { fffffff2, fffffff6, 22, 22, fffffff3, fffffff7, 22, 22, } +VZIP/VZIPQ:29:result_int32x4 [] = { fffffff1, fffffff3, 33, 33, } VZIP/VZIPQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } -VZIP/VZIPQ:31:result_uint8x16 [] = { f8, 55, 55, f1, f9, 55, 55, f2, fa, 55, 55, f3, fb, 55, 55, f4, } -VZIP/VZIPQ:32:result_uint16x8 [] = { fff4, 66, 66, fff1, fff5, 66, 66, fff2, } -VZIP/VZIPQ:33:result_uint32x4 [] = { fffffff2, 77, 77, fffffff1, } +VZIP/VZIPQ:31:result_uint8x16 [] = { f4, fc, 55, 55, f5, fd, 55, 55, f6, fe, 55, 55, f7, ff, 55, 55, } +VZIP/VZIPQ:32:result_uint16x8 [] = { fff2, fff6, 66, 66, fff3, fff7, 66, 66, } +VZIP/VZIPQ:33:result_uint32x4 [] = { fffffff1, fffffff3, 77, 77, } VZIP/VZIPQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } -VZIP/VZIPQ:35:result_float32x4 [] = { c1600000 -0x1.c000000p+3 -14, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, c1700000 -0x1.e000000p+3 -15, } +VZIP/VZIPQ:35:result_float32x4 [] = { c1700000 -0x1.e000000p+3 -15, c1500000 -0x1.a000000p+3 -13, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, } VREINTERPRET/VREINTERPRETQ output: VREINTERPRET/VREINTERPRETQ:0:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } diff --git a/ref_vuzp.c b/ref_vuzp.c index 2f66f66..01a4040 100644 --- a/ref_vuzp.c +++ b/ref_vuzp.c @@ -63,7 +63,7 @@ FNNAME (INSN_NAME) /* Overwrite "result" with the contents of "result_bis"[X] */ #define TEST_EXTRA_CHUNK(T1, W, N, X) \ - memcpy(VECT_VAR(result, T1, W, N), &(VECT_VAR(result_bis, T1, W, N)[X]), \ + memcpy(VECT_VAR(result, T1, W, N), &(VECT_VAR(result_bis, T1, W, N)[X*N]), \ sizeof(VECT_VAR(result, T1, W, N))); /* With ARM RVCT, we need to declare variables before any executable -- cgit v1.2.3