diff options
-rw-r--r-- | README | 6 | ||||
-rw-r--r-- | ref-rvct-all.txt | 1528 | ||||
-rw-r--r-- | ref-rvct-neon-nofp16.txt | 1528 | ||||
-rw-r--r-- | ref-rvct-neon.txt | 1528 | ||||
-rw-r--r-- | ref_v_binary_sat_op.c | 20 | ||||
-rw-r--r-- | ref_v_unary_sat_op.c | 18 | ||||
-rw-r--r-- | ref_vqabs.c | 2 | ||||
-rw-r--r-- | ref_vqadd.c | 12 | ||||
-rw-r--r-- | ref_vqdmlal.c | 28 | ||||
-rw-r--r-- | ref_vqdmlal_lane.c | 33 | ||||
-rw-r--r-- | ref_vqdmlal_n.c | 28 | ||||
-rw-r--r-- | ref_vqdmulh.c | 22 | ||||
-rw-r--r-- | ref_vqdmulh_lane.c | 28 | ||||
-rw-r--r-- | ref_vqdmulh_n.c | 14 | ||||
-rw-r--r-- | ref_vqdmull.c | 26 | ||||
-rw-r--r-- | ref_vqdmull_lane.c | 28 | ||||
-rw-r--r-- | ref_vqdmull_n.c | 27 | ||||
-rw-r--r-- | ref_vqmovn.c | 23 | ||||
-rw-r--r-- | ref_vqmovun.c | 21 | ||||
-rw-r--r-- | ref_vqneg.c | 2 | ||||
-rw-r--r-- | ref_vqrdmulh.c | 32 | ||||
-rw-r--r-- | ref_vqrdmulh_lane.c | 34 | ||||
-rw-r--r-- | ref_vqrdmulh_n.c | 20 | ||||
-rw-r--r-- | ref_vqrshl.c | 61 | ||||
-rw-r--r-- | ref_vqrshrn_n.c | 24 | ||||
-rw-r--r-- | ref_vqrshrun_n.c | 43 | ||||
-rw-r--r-- | ref_vqshl.c | 27 | ||||
-rw-r--r-- | ref_vqshl_n.c | 11 | ||||
-rw-r--r-- | ref_vqshlu_n.c | 23 | ||||
-rw-r--r-- | ref_vqshrn_n.c | 24 | ||||
-rw-r--r-- | ref_vqshrun_n.c | 29 | ||||
-rw-r--r-- | ref_vqsub.c | 15 | ||||
-rw-r--r-- | stm-arm-neon-ref.h | 41 |
33 files changed, 2669 insertions, 2637 deletions
@@ -31,9 +31,9 @@ Known issues: Some tests currently fail to build with GCC/ARM: - missing include files: dspfns.h, armdsp.h -As GCC/ARM provides no support for the Neon_Overflow/fpsrc register, -auxiliary accessor functions have been implemented in -stm-arm-neon-ref.h. +As GCC/ARM provides no support for the +Neon_Cumulative_Saturation/fpsrc register, auxiliary accessor +functions have been implemented in stm-arm-neon-ref.h. Engineering: ------------ diff --git a/ref-rvct-all.txt b/ref-rvct-all.txt index 2309744..c7b6028 100644 --- a/ref-rvct-all.txt +++ b/ref-rvct-all.txt @@ -367,9 +367,9 @@ VGET_LOW:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VGET_LOW:22:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VGET_LOW:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL_LANE overflow output: -VQDMLAL_LANE:0:vqdmlal_lane_s16 Neon overflow 0 -VQDMLAL_LANE:1:vqdmlal_lane_s32 Neon overflow 0 +VQDMLAL_LANE cumulative saturation output: +VQDMLAL_LANE:0:vqdmlal_lane_s16 Neon cumulative saturation 0 +VQDMLAL_LANE:1:vqdmlal_lane_s32 Neon cumulative saturation 0 VQDMLAL_LANE output: VQDMLAL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -397,9 +397,9 @@ VQDMLAL_LANE:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMLAL_LANE:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL_LANE:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL_LANE (mul with input=0) overflow output: -VQDMLAL_LANE:26:vqdmlal_lane_s16 Neon overflow 0 -VQDMLAL_LANE:27:vqdmlal_lane_s32 Neon overflow 0 +VQDMLAL_LANE (mul with input=0) cumulative saturation output: +VQDMLAL_LANE:26:vqdmlal_lane_s16 Neon cumulative saturation 0 +VQDMLAL_LANE:27:vqdmlal_lane_s32 Neon cumulative saturation 0 VQDMLAL_LANE (mul with input=0) output: VQDMLAL_LANE:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -427,11 +427,11 @@ VQDMLAL_LANE:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMLAL_LANE:50:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL_LANE:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL_LANE (check mul overflow) overflow output: -VQDMLAL_LANE:52:vqdmlal_lane_s16 Neon overflow 1 -VQDMLAL_LANE:53:vqdmlal_lane_s32 Neon overflow 1 +VQDMLAL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMLAL_LANE:52:vqdmlal_lane_s16 Neon cumulative saturation 1 +VQDMLAL_LANE:53:vqdmlal_lane_s32 Neon cumulative saturation 1 -VQDMLAL_LANE (check mul overflow) output: +VQDMLAL_LANE (check mul cumulative saturation) output: VQDMLAL_LANE:54:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLAL_LANE:55:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLAL_LANE:56:result_int32x2 [] = { 33333333, 33333333, } @@ -457,9 +457,9 @@ VQDMLAL_LANE:75:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMLAL_LANE:76:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL_LANE:77:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL_LANE overflow output: -VQDMLSL_LANE:0:vqdmlsl_lane_s16 Neon overflow 0 -VQDMLSL_LANE:1:vqdmlsl_lane_s32 Neon overflow 0 +VQDMLSL_LANE cumulative saturation output: +VQDMLSL_LANE:0:vqdmlsl_lane_s16 Neon cumulative saturation 0 +VQDMLSL_LANE:1:vqdmlsl_lane_s32 Neon cumulative saturation 0 VQDMLSL_LANE output: VQDMLSL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -487,9 +487,9 @@ VQDMLSL_LANE:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMLSL_LANE:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLSL_LANE:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL_LANE (mul with input=0) overflow output: -VQDMLSL_LANE:26:vqdmlsl_lane_s16 Neon overflow 0 -VQDMLSL_LANE:27:vqdmlsl_lane_s32 Neon overflow 0 +VQDMLSL_LANE (mul with input=0) cumulative saturation output: +VQDMLSL_LANE:26:vqdmlsl_lane_s16 Neon cumulative saturation 0 +VQDMLSL_LANE:27:vqdmlsl_lane_s32 Neon cumulative saturation 0 VQDMLSL_LANE (mul with input=0) output: VQDMLSL_LANE:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -517,11 +517,11 @@ VQDMLSL_LANE:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMLSL_LANE:50:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLSL_LANE:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL_LANE (check mul overflow) overflow output: -VQDMLSL_LANE:52:vqdmlsl_lane_s16 Neon overflow 1 -VQDMLSL_LANE:53:vqdmlsl_lane_s32 Neon overflow 1 +VQDMLSL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMLSL_LANE:52:vqdmlsl_lane_s16 Neon cumulative saturation 1 +VQDMLSL_LANE:53:vqdmlsl_lane_s32 Neon cumulative saturation 1 -VQDMLSL_LANE (check mul overflow) output: +VQDMLSL_LANE (check mul cumulative saturation) output: VQDMLSL_LANE:54:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLSL_LANE:55:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLSL_LANE:56:result_int32x2 [] = { 33333333, 33333333, } @@ -547,9 +547,9 @@ VQDMLSL_LANE:75:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMLSL_LANE:76:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLSL_LANE:77:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL_N overflow output: -VQDMLAL_N:0:vqdmlal_n_s16 Neon overflow 0 -VQDMLAL_N:1:vqdmlal_n_s32 Neon overflow 0 +VQDMLAL_N cumulative saturation output: +VQDMLAL_N:0:vqdmlal_n_s16 Neon cumulative saturation 0 +VQDMLAL_N:1:vqdmlal_n_s32 Neon cumulative saturation 0 VQDMLAL_N output: VQDMLAL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -577,11 +577,11 @@ VQDMLAL_N:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQDMLAL_N:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL_N:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL_N (check mul overflow) overflow output: -VQDMLAL_N:26:vqdmlal_n_s16 Neon overflow 1 -VQDMLAL_N:27:vqdmlal_n_s32 Neon overflow 1 +VQDMLAL_N (check mul cumulative saturation) cumulative saturation output: +VQDMLAL_N:26:vqdmlal_n_s16 Neon cumulative saturation 1 +VQDMLAL_N:27:vqdmlal_n_s32 Neon cumulative saturation 1 -VQDMLAL_N (check mul overflow) output: +VQDMLAL_N (check mul cumulative saturation) output: VQDMLAL_N:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLAL_N:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLAL_N:30:result_int32x2 [] = { 33333333, 33333333, } @@ -607,9 +607,9 @@ VQDMLAL_N:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQDMLAL_N:50:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL_N:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL_N overflow output: -VQDMLSL_N:0:vqdmlsl_n_s16 Neon overflow 0 -VQDMLSL_N:1:vqdmlsl_n_s32 Neon overflow 0 +VQDMLSL_N cumulative saturation output: +VQDMLSL_N:0:vqdmlsl_n_s16 Neon cumulative saturation 0 +VQDMLSL_N:1:vqdmlsl_n_s32 Neon cumulative saturation 0 VQDMLSL_N output: VQDMLSL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -637,11 +637,11 @@ VQDMLSL_N:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQDMLSL_N:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLSL_N:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL_N (check mul overflow) overflow output: -VQDMLSL_N:26:vqdmlsl_n_s16 Neon overflow 1 -VQDMLSL_N:27:vqdmlsl_n_s32 Neon overflow 1 +VQDMLSL_N (check mul cumulative saturation) cumulative saturation output: +VQDMLSL_N:26:vqdmlsl_n_s16 Neon cumulative saturation 1 +VQDMLSL_N:27:vqdmlsl_n_s32 Neon cumulative saturation 1 -VQDMLSL_N (check mul overflow) output: +VQDMLSL_N (check mul cumulative saturation) output: VQDMLSL_N:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLSL_N:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLSL_N:30:result_int32x2 [] = { 33333333, 33333333, } @@ -823,13 +823,13 @@ VRSHRN_N:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VRSHRN_N:70:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VRSHRN_N:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRN_N overflow output: -VQRSHRN_N:0:vqrshrn_n_s16 Neon overflow 0 -VQRSHRN_N:1:vqrshrn_n_s32 Neon overflow 0 -VQRSHRN_N:2:vqrshrn_n_s64 Neon overflow 0 -VQRSHRN_N:3:vqrshrn_n_u16 Neon overflow 1 -VQRSHRN_N:4:vqrshrn_n_u32 Neon overflow 1 -VQRSHRN_N:5:vqrshrn_n_u64 Neon overflow 1 +VQRSHRN_N cumulative saturation output: +VQRSHRN_N:0:vqrshrn_n_s16 Neon cumulative saturation 0 +VQRSHRN_N:1:vqrshrn_n_s32 Neon cumulative saturation 0 +VQRSHRN_N:2:vqrshrn_n_s64 Neon cumulative saturation 0 +VQRSHRN_N:3:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:4:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:5:vqrshrn_n_u64 Neon cumulative saturation 1 VQRSHRN_N output: VQRSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } @@ -857,13 +857,13 @@ VQRSHRN_N:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHRN_N:28:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHRN_N:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRN_N (check saturation: shift by 3) overflow output: -VQRSHRN_N:30:vqrshrn_n_s16 Neon overflow 1 -VQRSHRN_N:31:vqrshrn_n_s32 Neon overflow 1 -VQRSHRN_N:32:vqrshrn_n_s64 Neon overflow 1 -VQRSHRN_N:33:vqrshrn_n_u16 Neon overflow 1 -VQRSHRN_N:34:vqrshrn_n_u32 Neon overflow 1 -VQRSHRN_N:35:vqrshrn_n_u64 Neon overflow 1 +VQRSHRN_N (check saturation: shift by 3) cumulative saturation output: +VQRSHRN_N:30:vqrshrn_n_s16 Neon cumulative saturation 1 +VQRSHRN_N:31:vqrshrn_n_s32 Neon cumulative saturation 1 +VQRSHRN_N:32:vqrshrn_n_s64 Neon cumulative saturation 1 +VQRSHRN_N:33:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:34:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:35:vqrshrn_n_u64 Neon cumulative saturation 1 VQRSHRN_N (check saturation: shift by 3) output: VQRSHRN_N:36:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -891,13 +891,13 @@ VQRSHRN_N:57:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHRN_N:58:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHRN_N:59:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRN_N (check saturation: shift by max) overflow output: -VQRSHRN_N:60:vqrshrn_n_s16 Neon overflow 1 -VQRSHRN_N:61:vqrshrn_n_s32 Neon overflow 1 -VQRSHRN_N:62:vqrshrn_n_s64 Neon overflow 1 -VQRSHRN_N:63:vqrshrn_n_u16 Neon overflow 1 -VQRSHRN_N:64:vqrshrn_n_u32 Neon overflow 1 -VQRSHRN_N:65:vqrshrn_n_u64 Neon overflow 1 +VQRSHRN_N (check saturation: shift by max) cumulative saturation output: +VQRSHRN_N:60:vqrshrn_n_s16 Neon cumulative saturation 1 +VQRSHRN_N:61:vqrshrn_n_s32 Neon cumulative saturation 1 +VQRSHRN_N:62:vqrshrn_n_s64 Neon cumulative saturation 1 +VQRSHRN_N:63:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:64:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:65:vqrshrn_n_u64 Neon cumulative saturation 1 VQRSHRN_N (check saturation: shift by max) output: VQRSHRN_N:66:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -976,23 +976,23 @@ vgetq_lane_p16: fff6 vgetq_lane_f32: c1500000 -VQSUB/VQSUBQ overflow output: -VQSUB/VQSUBQ:0:vqsub_s8 Neon overflow 0 -VQSUB/VQSUBQ:1:vqsub_s16 Neon overflow 0 -VQSUB/VQSUBQ:2:vqsub_s32 Neon overflow 0 -VQSUB/VQSUBQ:3:vqsub_s64 Neon overflow 0 -VQSUB/VQSUBQ:4:vqsub_u8 Neon overflow 0 -VQSUB/VQSUBQ:5:vqsub_u16 Neon overflow 0 -VQSUB/VQSUBQ:6:vqsub_u32 Neon overflow 0 -VQSUB/VQSUBQ:7:vqsub_u64 Neon overflow 0 -VQSUB/VQSUBQ:8:vqsubq_s8 Neon overflow 0 -VQSUB/VQSUBQ:9:vqsubq_s16 Neon overflow 0 -VQSUB/VQSUBQ:10:vqsubq_s32 Neon overflow 0 -VQSUB/VQSUBQ:11:vqsubq_s64 Neon overflow 0 -VQSUB/VQSUBQ:12:vqsubq_u8 Neon overflow 0 -VQSUB/VQSUBQ:13:vqsubq_u16 Neon overflow 0 -VQSUB/VQSUBQ:14:vqsubq_u32 Neon overflow 0 -VQSUB/VQSUBQ:15:vqsubq_u64 Neon overflow 0 +VQSUB/VQSUBQ cumulative saturation output: +VQSUB/VQSUBQ:0:vqsub_s8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:1:vqsub_s16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:2:vqsub_s32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:3:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:4:vqsub_u8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:5:vqsub_u16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:6:vqsub_u32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:7:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:8:vqsubq_s8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:9:vqsubq_s16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:10:vqsubq_s32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:11:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:12:vqsubq_u8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:13:vqsubq_u16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:14:vqsubq_u32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:15:vqsubq_u64 Neon cumulative saturation 0 VQSUB/VQSUBQ output: VQSUB/VQSUBQ:16:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, } @@ -1020,11 +1020,11 @@ VQSUB/VQSUBQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQSUB/VQSUBQ:38:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSUB/VQSUBQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSUB/VQSUBQ 64 bits saturation overflow output: -VQSUB/VQSUBQ:40:vqsub_s64 Neon overflow 0 -VQSUB/VQSUBQ:41:vqsub_u64 Neon overflow 0 -VQSUB/VQSUBQ:42:vqsubq_s64 Neon overflow 0 -VQSUB/VQSUBQ:43:vqsubq_u64 Neon overflow 0 +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:40:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:41:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:42:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:43:vqsubq_u64 Neon cumulative saturation 0 64 bits saturation: VQSUB/VQSUBQ:44:result_int64x1 [] = { fffffffffffffff0, } @@ -1032,33 +1032,33 @@ VQSUB/VQSUBQ:45:result_uint64x1 [] = { fffffffffffffff0, } VQSUB/VQSUBQ:46:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } VQSUB/VQSUBQ:47:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } -VQSUB/VQSUBQ 64 bits saturation overflow output: -VQSUB/VQSUBQ:48:vqsub_s64 Neon overflow 0 -VQSUB/VQSUBQ:49:vqsub_u64 Neon overflow 0 -VQSUB/VQSUBQ:50:vqsubq_s64 Neon overflow 0 -VQSUB/VQSUBQ:51:vqsubq_u64 Neon overflow 0 +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:48:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:49:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:50:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:51:vqsubq_u64 Neon cumulative saturation 0 VQSUB/VQSUBQ:52:result_int64x1 [] = { ffffffffffffffac, } VQSUB/VQSUBQ:53:result_uint64x1 [] = { ffffffffffffff68, } VQSUB/VQSUBQ:54:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad, } VQSUB/VQSUBQ:55:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69, } -VQSUB/VQSUBQ 64 bits saturation overflow output: -VQSUB/VQSUBQ:56:vqsub_s64 Neon overflow 1 -VQSUB/VQSUBQ:57:vqsub_u64 Neon overflow 1 -VQSUB/VQSUBQ:58:vqsubq_s64 Neon overflow 1 -VQSUB/VQSUBQ:59:vqsubq_u64 Neon overflow 1 +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:56:vqsub_s64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:57:vqsub_u64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:58:vqsubq_s64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:59:vqsubq_u64 Neon cumulative saturation 1 VQSUB/VQSUBQ:60:result_int64x1 [] = { 8000000000000000, } VQSUB/VQSUBQ:61:result_uint64x1 [] = { 0, } VQSUB/VQSUBQ:62:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } VQSUB/VQSUBQ:63:result_uint64x2 [] = { 0, 0, } less than 64 bits saturation: -VQSUB/VQSUBQ:64:vqsub_s8 Neon overflow 1 -VQSUB/VQSUBQ:65:vqsub_s16 Neon overflow 1 -VQSUB/VQSUBQ:66:vqsub_s32 Neon overflow 1 -VQSUB/VQSUBQ:67:vqsubq_s8 Neon overflow 1 -VQSUB/VQSUBQ:68:vqsubq_s16 Neon overflow 1 -VQSUB/VQSUBQ:69:vqsubq_s32 Neon overflow 1 +VQSUB/VQSUBQ:64:vqsub_s8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:65:vqsub_s16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:66:vqsub_s32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:67:vqsubq_s8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:68:vqsubq_s16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:69:vqsubq_s32 Neon cumulative saturation 1 VQSUB/VQSUBQ:70:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } VQSUB/VQSUBQ:71:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } VQSUB/VQSUBQ:72:result_int32x2 [] = { 80000000, 80000000, } @@ -1066,13 +1066,13 @@ VQSUB/VQSUBQ:73:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ff VQSUB/VQSUBQ:74:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } VQSUB/VQSUBQ:75:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } -VQSUB/VQSUBQ less than 64 bits saturation overflow output: -VQSUB/VQSUBQ:76:vqsub_u8 Neon overflow 1 -VQSUB/VQSUBQ:77:vqsub_u16 Neon overflow 1 -VQSUB/VQSUBQ:78:vqsub_u32 Neon overflow 1 -VQSUB/VQSUBQ:79:vqsubq_u8 Neon overflow 1 -VQSUB/VQSUBQ:80:vqsubq_u16 Neon overflow 1 -VQSUB/VQSUBQ:81:vqsubq_u32 Neon overflow 1 +VQSUB/VQSUBQ less than 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:76:vqsub_u8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:77:vqsub_u16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:78:vqsub_u32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:79:vqsubq_u8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:80:vqsubq_u16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:81:vqsubq_u32 Neon cumulative saturation 1 VQSUB/VQSUBQ:82:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } VQSUB/VQSUBQ:83:result_uint16x4 [] = { 0, 0, 0, 0, } VQSUB/VQSUBQ:84:result_uint32x2 [] = { 0, 0, } @@ -1080,11 +1080,11 @@ VQSUB/VQSUBQ:85:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, VQSUB/VQSUBQ:86:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } VQSUB/VQSUBQ:87:result_uint32x4 [] = { 0, 0, 0, 0, } -VQDMULH overflow output: -VQDMULH:0:vqdmulh_s16 Neon overflow 0 -VQDMULH:1:vqdmulh_s32 Neon overflow 0 -VQDMULH:2:vqdmulhq_s16 Neon overflow 0 -VQDMULH:3:vqdmulhq_s32 Neon overflow 0 +VQDMULH cumulative saturation output: +VQDMULH:0:vqdmulh_s16 Neon cumulative saturation 0 +VQDMULH:1:vqdmulh_s32 Neon cumulative saturation 0 +VQDMULH:2:vqdmulhq_s16 Neon cumulative saturation 0 +VQDMULH:3:vqdmulhq_s32 Neon cumulative saturation 0 VQDMULH output: VQDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1112,11 +1112,11 @@ VQDMULH:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMULH:26:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULH:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULH overflow output: -VQDMULH:28:vqdmulh_s16 Neon overflow 1 -VQDMULH:29:vqdmulh_s32 Neon overflow 1 -VQDMULH:30:vqdmulhq_s16 Neon overflow 1 -VQDMULH:31:vqdmulhq_s32 Neon overflow 1 +VQDMULH cumulative saturation output: +VQDMULH:28:vqdmulh_s16 Neon cumulative saturation 1 +VQDMULH:29:vqdmulh_s32 Neon cumulative saturation 1 +VQDMULH:30:vqdmulhq_s16 Neon cumulative saturation 1 +VQDMULH:31:vqdmulhq_s32 Neon cumulative saturation 1 VQDMULH output: VQDMULH:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1144,11 +1144,11 @@ VQDMULH:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMULH:54:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULH:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULH_LANE overflow output: -VQDMULH_LANE:0:vqdmulh_lane_s16 Neon overflow 0 -VQDMULH_LANE:1:vqdmulh_lane_s32 Neon overflow 0 -VQDMULH_LANE:2:vqdmulhq_lane_s16 Neon overflow 0 -VQDMULH_LANE:3:vqdmulhq_lane_s32 Neon overflow 0 +VQDMULH_LANE cumulative saturation output: +VQDMULH_LANE:0:vqdmulh_lane_s16 Neon cumulative saturation 0 +VQDMULH_LANE:1:vqdmulh_lane_s32 Neon cumulative saturation 0 +VQDMULH_LANE:2:vqdmulhq_lane_s16 Neon cumulative saturation 0 +VQDMULH_LANE:3:vqdmulhq_lane_s32 Neon cumulative saturation 0 VQDMULH_LANE output: VQDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1176,13 +1176,13 @@ VQDMULH_LANE:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMULH_LANE:26:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULH_LANE:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULH_LANE (check mul overflow) overflow output: -VQDMULH_LANE:28:vqdmulh_lane_s16 Neon overflow 1 -VQDMULH_LANE:29:vqdmulh_lane_s32 Neon overflow 1 -VQDMULH_LANE:30:vqdmulhq_lane_s16 Neon overflow 1 -VQDMULH_LANE:31:vqdmulhq_lane_s32 Neon overflow 1 +VQDMULH_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMULH_LANE:28:vqdmulh_lane_s16 Neon cumulative saturation 1 +VQDMULH_LANE:29:vqdmulh_lane_s32 Neon cumulative saturation 1 +VQDMULH_LANE:30:vqdmulhq_lane_s16 Neon cumulative saturation 1 +VQDMULH_LANE:31:vqdmulhq_lane_s32 Neon cumulative saturation 1 -VQDMULH_LANE (check mul overflow) output: +VQDMULH_LANE (check mul cumulative saturation) output: VQDMULH_LANE:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMULH_LANE:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQDMULH_LANE:34:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -1208,11 +1208,11 @@ VQDMULH_LANE:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMULH_LANE:54:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULH_LANE:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULH_N overflow output: -VQDMULH_N:0:vqdmulh_n_s16 Neon overflow 0 -VQDMULH_N:1:vqdmulh_n_s32 Neon overflow 0 -VQDMULH_N:2:vqdmulhq_n_s16 Neon overflow 0 -VQDMULH_N:3:vqdmulhq_n_s32 Neon overflow 0 +VQDMULH_N cumulative saturation output: +VQDMULH_N:0:vqdmulh_n_s16 Neon cumulative saturation 0 +VQDMULH_N:1:vqdmulh_n_s32 Neon cumulative saturation 0 +VQDMULH_N:2:vqdmulhq_n_s16 Neon cumulative saturation 0 +VQDMULH_N:3:vqdmulhq_n_s32 Neon cumulative saturation 0 VQDMULH_N output: VQDMULH_N:4:result_int16x4 [] = { 19, 19, 19, 19, } @@ -1220,13 +1220,13 @@ VQDMULH_N:5:result_int32x2 [] = { 4, 4, } VQDMULH_N:6:result_int16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } VQDMULH_N:7:result_int32x4 [] = { a, a, a, a, } -VQDMULH_N (check mul overflow) overflow output: -VQDMULH_N:8:vqdmulh_n_s16 Neon overflow 1 -VQDMULH_N:9:vqdmulh_n_s32 Neon overflow 1 -VQDMULH_N:10:vqdmulhq_n_s16 Neon overflow 1 -VQDMULH_N:11:vqdmulhq_n_s32 Neon overflow 1 +VQDMULH_N (check mul cumulative saturation) cumulative saturation output: +VQDMULH_N:8:vqdmulh_n_s16 Neon cumulative saturation 1 +VQDMULH_N:9:vqdmulh_n_s32 Neon cumulative saturation 1 +VQDMULH_N:10:vqdmulhq_n_s16 Neon cumulative saturation 1 +VQDMULH_N:11:vqdmulhq_n_s32 Neon cumulative saturation 1 -VQDMULH_N (check mul overflow) output: +VQDMULH_N (check mul cumulative saturation) output: VQDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -1252,9 +1252,9 @@ VQDMULH_N:33:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQDMULH_N:34:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULH_N:35:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULL overflow output: -VQDMULL:0:vqdmull_s16 Neon overflow 0 -VQDMULL:1:vqdmull_s32 Neon overflow 0 +VQDMULL cumulative saturation output: +VQDMULL:0:vqdmull_s16 Neon cumulative saturation 0 +VQDMULL:1:vqdmull_s32 Neon cumulative saturation 0 VQDMULL output: VQDMULL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1282,11 +1282,11 @@ VQDMULL:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMULL:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULL:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULL (check mul overflow) overflow output: -VQDMULL:26:vqdmull_s16 Neon overflow 1 -VQDMULL:27:vqdmull_s32 Neon overflow 1 +VQDMULL (check mul cumulative saturation) cumulative saturation output: +VQDMULL:26:vqdmull_s16 Neon cumulative saturation 1 +VQDMULL:27:vqdmull_s32 Neon cumulative saturation 1 -VQDMULL (check mul overflow) output: +VQDMULL (check mul cumulative saturation) output: VQDMULL:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMULL:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMULL:30:result_int32x2 [] = { 33333333, 33333333, } @@ -1312,9 +1312,9 @@ VQDMULL:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMULL:50:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULL:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL overflow output: -VQDMLAL:0:vqdmlal_s16 Neon overflow 0 -VQDMLAL:1:vqdmlal_s32 Neon overflow 0 +VQDMLAL cumulative saturation output: +VQDMLAL:0:vqdmlal_s16 Neon cumulative saturation 0 +VQDMLAL:1:vqdmlal_s32 Neon cumulative saturation 0 VQDMLAL output: VQDMLAL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1342,11 +1342,11 @@ VQDMLAL:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMLAL:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL (check mul overflow) overflow output: -VQDMLAL:26:vqdmlal_s16 Neon overflow 1 -VQDMLAL:27:vqdmlal_s32 Neon overflow 1 +VQDMLAL (check mul cumulative saturation) cumulative saturation output: +VQDMLAL:26:vqdmlal_s16 Neon cumulative saturation 1 +VQDMLAL:27:vqdmlal_s32 Neon cumulative saturation 1 -VQDMLAL (check mul overflow) output: +VQDMLAL (check mul cumulative saturation) output: VQDMLAL:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLAL:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLAL:30:result_int32x2 [] = { 33333333, 33333333, } @@ -1372,9 +1372,9 @@ VQDMLAL:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMLAL:50:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL overflow output: -VQDMLSL:0:vqdmlsl_s16 Neon overflow 0 -VQDMLSL:1:vqdmlsl_s32 Neon overflow 0 +VQDMLSL cumulative saturation output: +VQDMLSL:0:vqdmlsl_s16 Neon cumulative saturation 0 +VQDMLSL:1:vqdmlsl_s32 Neon cumulative saturation 0 VQDMLSL output: VQDMLSL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1402,11 +1402,11 @@ VQDMLSL:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMLSL:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLSL:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL (check mul overflow) overflow output: -VQDMLSL:26:vqdmlsl_s16 Neon overflow 1 -VQDMLSL:27:vqdmlsl_s32 Neon overflow 1 +VQDMLSL (check mul cumulative saturation) cumulative saturation output: +VQDMLSL:26:vqdmlsl_s16 Neon cumulative saturation 1 +VQDMLSL:27:vqdmlsl_s32 Neon cumulative saturation 1 -VQDMLSL (check mul overflow) output: +VQDMLSL (check mul cumulative saturation) output: VQDMLSL:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLSL:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLSL:30:result_int32x2 [] = { 33333333, 33333333, } @@ -1696,23 +1696,23 @@ VSHL_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, VSHL_N:22:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VSHL_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (with input = 0) overflow output: -VQSHL/VQSHLQ:0:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:1:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:2:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:3:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:4:vqshl_u8 Neon overflow 0 -VQSHL/VQSHLQ:5:vqshl_u16 Neon overflow 0 -VQSHL/VQSHLQ:6:vqshl_u32 Neon overflow 0 -VQSHL/VQSHLQ:7:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:8:vqshlq_s8 Neon overflow 0 -VQSHL/VQSHLQ:9:vqshlq_s16 Neon overflow 0 -VQSHL/VQSHLQ:10:vqshlq_s32 Neon overflow 0 -VQSHL/VQSHLQ:11:vqshlq_s64 Neon overflow 0 -VQSHL/VQSHLQ:12:vqshlq_u8 Neon overflow 0 -VQSHL/VQSHLQ:13:vqshlq_u16 Neon overflow 0 -VQSHL/VQSHLQ:14:vqshlq_u32 Neon overflow 0 -VQSHL/VQSHLQ:15:vqshlq_u64 Neon overflow 0 +VQSHL/VQSHLQ (with input = 0) cumulative saturation output: +VQSHL/VQSHLQ:0:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:1:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:2:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:3:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:4:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:5:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:6:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:7:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:8:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:9:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:10:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:11:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:12:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:13:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:14:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:15:vqshlq_u64 Neon cumulative saturation 0 VQSHL/VQSHLQ (with input = 0) output: VQSHL/VQSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } @@ -1740,23 +1740,23 @@ VQSHL/VQSHLQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQSHL/VQSHLQ:38:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (input 0 and negative shift amount) overflow output: -VQSHL/VQSHLQ:40:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:41:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:42:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:43:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:44:vqshl_u8 Neon overflow 0 -VQSHL/VQSHLQ:45:vqshl_u16 Neon overflow 0 -VQSHL/VQSHLQ:46:vqshl_u32 Neon overflow 0 -VQSHL/VQSHLQ:47:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:48:vqshlq_s8 Neon overflow 0 -VQSHL/VQSHLQ:49:vqshlq_s16 Neon overflow 0 -VQSHL/VQSHLQ:50:vqshlq_s32 Neon overflow 0 -VQSHL/VQSHLQ:51:vqshlq_s64 Neon overflow 0 -VQSHL/VQSHLQ:52:vqshlq_u8 Neon overflow 0 -VQSHL/VQSHLQ:53:vqshlq_u16 Neon overflow 0 -VQSHL/VQSHLQ:54:vqshlq_u32 Neon overflow 0 -VQSHL/VQSHLQ:55:vqshlq_u64 Neon overflow 0 +VQSHL/VQSHLQ (input 0 and negative shift amount) cumulative saturation output: +VQSHL/VQSHLQ:40:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:41:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:42:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:43:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:44:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:45:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:46:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:47:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:48:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:49:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:50:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:51:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:52:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:53:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:54:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:55:vqshlq_u64 Neon cumulative saturation 0 VQSHL/VQSHLQ (input 0 and negative shift amount) output: VQSHL/VQSHLQ:56:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } @@ -1784,23 +1784,23 @@ VQSHL/VQSHLQ:77:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQSHL/VQSHLQ:78:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:79:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ overflow output: -VQSHL/VQSHLQ:80:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:81:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:82:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:83:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:84:vqshl_u8 Neon overflow 1 -VQSHL/VQSHLQ:85:vqshl_u16 Neon overflow 1 -VQSHL/VQSHLQ:86:vqshl_u32 Neon overflow 1 -VQSHL/VQSHLQ:87:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:88:vqshlq_s8 Neon overflow 1 -VQSHL/VQSHLQ:89:vqshlq_s16 Neon overflow 1 -VQSHL/VQSHLQ:90:vqshlq_s32 Neon overflow 1 -VQSHL/VQSHLQ:91:vqshlq_s64 Neon overflow 1 -VQSHL/VQSHLQ:92:vqshlq_u8 Neon overflow 1 -VQSHL/VQSHLQ:93:vqshlq_u16 Neon overflow 1 -VQSHL/VQSHLQ:94:vqshlq_u32 Neon overflow 1 -VQSHL/VQSHLQ:95:vqshlq_u64 Neon overflow 1 +VQSHL/VQSHLQ cumulative saturation output: +VQSHL/VQSHLQ:80:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:81:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:82:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:83:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:84:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:85:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:86:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:87:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:88:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:89:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:90:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:91:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:92:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:93:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:94:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:95:vqshlq_u64 Neon cumulative saturation 1 VQSHL/VQSHLQ output: VQSHL/VQSHLQ:96:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } @@ -1828,23 +1828,23 @@ VQSHL/VQSHLQ:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQSHL/VQSHLQ:118:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (negative shift amount) overflow output: -VQSHL/VQSHLQ:120:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:121:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:122:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:123:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:124:vqshl_u8 Neon overflow 0 -VQSHL/VQSHLQ:125:vqshl_u16 Neon overflow 0 -VQSHL/VQSHLQ:126:vqshl_u32 Neon overflow 0 -VQSHL/VQSHLQ:127:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:128:vqshlq_s8 Neon overflow 0 -VQSHL/VQSHLQ:129:vqshlq_s16 Neon overflow 0 -VQSHL/VQSHLQ:130:vqshlq_s32 Neon overflow 0 -VQSHL/VQSHLQ:131:vqshlq_s64 Neon overflow 0 -VQSHL/VQSHLQ:132:vqshlq_u8 Neon overflow 0 -VQSHL/VQSHLQ:133:vqshlq_u16 Neon overflow 0 -VQSHL/VQSHLQ:134:vqshlq_u32 Neon overflow 0 -VQSHL/VQSHLQ:135:vqshlq_u64 Neon overflow 0 +VQSHL/VQSHLQ (negative shift amount) cumulative saturation output: +VQSHL/VQSHLQ:120:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:121:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:122:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:123:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:124:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:125:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:126:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:127:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:128:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:129:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:130:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:131:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:132:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:133:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:134:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:135:vqshlq_u64 Neon cumulative saturation 0 VQSHL/VQSHLQ (negative shift amount) output: VQSHL/VQSHLQ:136:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } @@ -1872,23 +1872,23 @@ VQSHL/VQSHLQ:157:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQSHL/VQSHLQ:158:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:159:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (large shift amount, negative input) overflow output: -VQSHL/VQSHLQ:160:vqshl_s8 Neon overflow 1 -VQSHL/VQSHLQ:161:vqshl_s16 Neon overflow 1 -VQSHL/VQSHLQ:162:vqshl_s32 Neon overflow 1 -VQSHL/VQSHLQ:163:vqshl_s64 Neon overflow 1 -VQSHL/VQSHLQ:164:vqshl_u8 Neon overflow 1 -VQSHL/VQSHLQ:165:vqshl_u16 Neon overflow 1 -VQSHL/VQSHLQ:166:vqshl_u32 Neon overflow 1 -VQSHL/VQSHLQ:167:vqshl_u64 Neon overflow 1 -VQSHL/VQSHLQ:168:vqshlq_s8 Neon overflow 1 -VQSHL/VQSHLQ:169:vqshlq_s16 Neon overflow 1 -VQSHL/VQSHLQ:170:vqshlq_s32 Neon overflow 1 -VQSHL/VQSHLQ:171:vqshlq_s64 Neon overflow 1 -VQSHL/VQSHLQ:172:vqshlq_u8 Neon overflow 1 -VQSHL/VQSHLQ:173:vqshlq_u16 Neon overflow 1 -VQSHL/VQSHLQ:174:vqshlq_u32 Neon overflow 1 -VQSHL/VQSHLQ:175:vqshlq_u64 Neon overflow 1 +VQSHL/VQSHLQ (large shift amount, negative input) cumulative saturation output: +VQSHL/VQSHLQ:160:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:161:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:162:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:163:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:164:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:165:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:166:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:167:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:168:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:169:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:170:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:171:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:172:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:173:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:174:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:175:vqshlq_u64 Neon cumulative saturation 1 VQSHL/VQSHLQ (large shift amount, negative input) output: VQSHL/VQSHLQ:176:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } @@ -1916,25 +1916,25 @@ VQSHL/VQSHLQ:197:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQSHL/VQSHLQ:198:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:199:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (check saturation/overflow) overflow output: -VQSHL/VQSHLQ:200:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:201:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:202:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:203:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:204:vqshl_u8 Neon overflow 0 -VQSHL/VQSHLQ:205:vqshl_u16 Neon overflow 0 -VQSHL/VQSHLQ:206:vqshl_u32 Neon overflow 0 -VQSHL/VQSHLQ:207:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:208:vqshlq_s8 Neon overflow 0 -VQSHL/VQSHLQ:209:vqshlq_s16 Neon overflow 0 -VQSHL/VQSHLQ:210:vqshlq_s32 Neon overflow 0 -VQSHL/VQSHLQ:211:vqshlq_s64 Neon overflow 0 -VQSHL/VQSHLQ:212:vqshlq_u8 Neon overflow 0 -VQSHL/VQSHLQ:213:vqshlq_u16 Neon overflow 0 -VQSHL/VQSHLQ:214:vqshlq_u32 Neon overflow 0 -VQSHL/VQSHLQ:215:vqshlq_u64 Neon overflow 0 - -VQSHL/VQSHLQ (check saturation/overflow) output: +VQSHL/VQSHLQ (check cumulative saturation) cumulative saturation output: +VQSHL/VQSHLQ:200:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:201:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:202:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:203:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:204:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:205:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:206:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:207:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:208:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:209:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:210:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:211:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:212:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:213:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:214:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:215:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (check cumulative saturation) output: VQSHL/VQSHLQ:216:result_int8x8 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, } VQSHL/VQSHLQ:217:result_int16x4 [] = { 3fff, 3fff, 3fff, 3fff, } VQSHL/VQSHLQ:218:result_int32x2 [] = { 3fffffff, 3fffffff, } @@ -1960,23 +1960,23 @@ VQSHL/VQSHLQ:237:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQSHL/VQSHLQ:238:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:239:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (large shift amount, positive input) overflow output: -VQSHL/VQSHLQ:240:vqshl_s8 Neon overflow 1 -VQSHL/VQSHLQ:241:vqshl_s16 Neon overflow 1 -VQSHL/VQSHLQ:242:vqshl_s32 Neon overflow 1 -VQSHL/VQSHLQ:243:vqshl_s64 Neon overflow 1 -VQSHL/VQSHLQ:244:vqshl_u8 Neon overflow 1 -VQSHL/VQSHLQ:245:vqshl_u16 Neon overflow 1 -VQSHL/VQSHLQ:246:vqshl_u32 Neon overflow 1 -VQSHL/VQSHLQ:247:vqshl_u64 Neon overflow 1 -VQSHL/VQSHLQ:248:vqshlq_s8 Neon overflow 1 -VQSHL/VQSHLQ:249:vqshlq_s16 Neon overflow 1 -VQSHL/VQSHLQ:250:vqshlq_s32 Neon overflow 1 -VQSHL/VQSHLQ:251:vqshlq_s64 Neon overflow 1 -VQSHL/VQSHLQ:252:vqshlq_u8 Neon overflow 1 -VQSHL/VQSHLQ:253:vqshlq_u16 Neon overflow 1 -VQSHL/VQSHLQ:254:vqshlq_u32 Neon overflow 1 -VQSHL/VQSHLQ:255:vqshlq_u64 Neon overflow 1 +VQSHL/VQSHLQ (large shift amount, positive input) cumulative saturation output: +VQSHL/VQSHLQ:240:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:241:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:242:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:243:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:244:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:245:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:246:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:247:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:248:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:249:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:250:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:251:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:252:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:253:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:254:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:255:vqshlq_u64 Neon cumulative saturation 1 VQSHL/VQSHLQ (large shift amount, positive input) output: VQSHL/VQSHLQ:256:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -2004,23 +2004,23 @@ VQSHL/VQSHLQ:277:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQSHL/VQSHLQ:278:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:279:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (check saturation on 64 bits) overflow output: -VQSHL/VQSHLQ:280:vqshl_s8 Neon overflow 1 -VQSHL/VQSHLQ:281:vqshl_s16 Neon overflow 1 -VQSHL/VQSHLQ:282:vqshl_s32 Neon overflow 1 -VQSHL/VQSHLQ:283:vqshl_s64 Neon overflow 1 -VQSHL/VQSHLQ:284:vqshl_u8 Neon overflow 1 -VQSHL/VQSHLQ:285:vqshl_u16 Neon overflow 1 -VQSHL/VQSHLQ:286:vqshl_u32 Neon overflow 1 -VQSHL/VQSHLQ:287:vqshl_u64 Neon overflow 1 -VQSHL/VQSHLQ:288:vqshlq_s8 Neon overflow 1 -VQSHL/VQSHLQ:289:vqshlq_s16 Neon overflow 1 -VQSHL/VQSHLQ:290:vqshlq_s32 Neon overflow 1 -VQSHL/VQSHLQ:291:vqshlq_s64 Neon overflow 1 -VQSHL/VQSHLQ:292:vqshlq_u8 Neon overflow 1 -VQSHL/VQSHLQ:293:vqshlq_u16 Neon overflow 1 -VQSHL/VQSHLQ:294:vqshlq_u32 Neon overflow 1 -VQSHL/VQSHLQ:295:vqshlq_u64 Neon overflow 1 +VQSHL/VQSHLQ (check saturation on 64 bits) cumulative saturation output: +VQSHL/VQSHLQ:280:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:281:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:282:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:283:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:284:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:285:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:286:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:287:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:288:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:289:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:290:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:291:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:292:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:293:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:294:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:295:vqshlq_u64 Neon cumulative saturation 1 VQSHL/VQSHLQ (check saturation on 64 bits) output: VQSHL/VQSHLQ:296:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -2048,23 +2048,23 @@ VQSHL/VQSHLQ:317:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQSHL/VQSHLQ:318:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:319:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL_N/VQSHLQ_N overflow output: -VQSHL_N/VQSHLQ_N:0:vqshl_n_s8 Neon overflow 0 -VQSHL_N/VQSHLQ_N:1:vqshl_n_s16 Neon overflow 0 -VQSHL_N/VQSHLQ_N:2:vqshl_n_s32 Neon overflow 0 -VQSHL_N/VQSHLQ_N:3:vqshl_n_s64 Neon overflow 0 -VQSHL_N/VQSHLQ_N:4:vqshl_n_u8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:5:vqshl_n_u16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:6:vqshl_n_u32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:7:vqshl_n_u64 Neon overflow 1 -VQSHL_N/VQSHLQ_N:8:vqshlq_n_s8 Neon overflow 0 -VQSHL_N/VQSHLQ_N:9:vqshlq_n_s16 Neon overflow 0 -VQSHL_N/VQSHLQ_N:10:vqshlq_n_s32 Neon overflow 0 -VQSHL_N/VQSHLQ_N:11:vqshlq_n_s64 Neon overflow 0 -VQSHL_N/VQSHLQ_N:12:vqshlq_n_u8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:13:vqshlq_n_u16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:14:vqshlq_n_u32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:15:vqshlq_n_u64 Neon overflow 1 +VQSHL_N/VQSHLQ_N cumulative saturation output: +VQSHL_N/VQSHLQ_N:0:vqshl_n_s8 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:1:vqshl_n_s16 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:2:vqshl_n_s32 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:3:vqshl_n_s64 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:4:vqshl_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:5:vqshl_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:6:vqshl_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:7:vqshl_n_u64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:8:vqshlq_n_s8 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:9:vqshlq_n_s16 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:10:vqshlq_n_s32 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:11:vqshlq_n_s64 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:12:vqshlq_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:13:vqshlq_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:14:vqshlq_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:15:vqshlq_n_u64 Neon cumulative saturation 1 VQSHL_N/VQSHLQ_N output: VQSHL_N/VQSHLQ_N:16:result_int8x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, } @@ -2092,23 +2092,23 @@ VQSHL_N/VQSHLQ_N:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3 VQSHL_N/VQSHLQ_N:38:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL_N/VQSHLQ_N:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL_N/VQSHLQ_N (check saturation with large positive input) overflow output: -VQSHL_N/VQSHLQ_N:40:vqshl_n_s8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:41:vqshl_n_s16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:42:vqshl_n_s32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:43:vqshl_n_s64 Neon overflow 1 -VQSHL_N/VQSHLQ_N:44:vqshl_n_u8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:45:vqshl_n_u16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:46:vqshl_n_u32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:47:vqshl_n_u64 Neon overflow 1 -VQSHL_N/VQSHLQ_N:48:vqshlq_n_s8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:49:vqshlq_n_s16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:50:vqshlq_n_s32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:51:vqshlq_n_s64 Neon overflow 1 -VQSHL_N/VQSHLQ_N:52:vqshlq_n_u8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:53:vqshlq_n_u16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:54:vqshlq_n_u32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:55:vqshlq_n_u64 Neon overflow 1 +VQSHL_N/VQSHLQ_N (check saturation with large positive input) cumulative saturation output: +VQSHL_N/VQSHLQ_N:40:vqshl_n_s8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:41:vqshl_n_s16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:42:vqshl_n_s32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:43:vqshl_n_s64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:44:vqshl_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:45:vqshl_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:46:vqshl_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:47:vqshl_n_u64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:48:vqshlq_n_s8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:49:vqshlq_n_s16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:50:vqshlq_n_s32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:51:vqshlq_n_s64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:52:vqshlq_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:53:vqshlq_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:54:vqshlq_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:55:vqshlq_n_u64 Neon cumulative saturation 1 VQSHL_N/VQSHLQ_N (check saturation with large positive input) output: VQSHL_N/VQSHLQ_N:56:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -2630,35 +2630,35 @@ VDUP_LANE/VDUP_LANEQ:21:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff VDUP_LANE/VDUP_LANEQ:22:result_float32x4 [] = { c1700000 -0x1.e000000p+3 -15, c1700000 -0x1.e000000p+3 -15, c1700000 -0x1.e000000p+3 -15, c1700000 -0x1.e000000p+3 -15, } VDUP_LANE/VDUP_LANEQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULL_LANE overflow output: -VQDMULL_LANE:0:vqdmull_lane_s16 Neon overflow 0 -VQDMULL_LANE:1:vqdmull_lane_s32 Neon overflow 0 +VQDMULL_LANE cumulative saturation output: +VQDMULL_LANE:0:vqdmull_lane_s16 Neon cumulative saturation 0 +VQDMULL_LANE:1:vqdmull_lane_s32 Neon cumulative saturation 0 VQDMULL_LANE output: VQDMULL_LANE:2:result_int32x4 [] = { 8000, 8000, 8000, 8000, } VQDMULL_LANE:3:result_int64x2 [] = { 4000, 4000, } -VQDMULL_LANE (check mul overflow) overflow output: -VQDMULL_LANE:4:vqdmull_lane_s16 Neon overflow 1 -VQDMULL_LANE:5:vqdmull_lane_s32 Neon overflow 1 +VQDMULL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMULL_LANE:4:vqdmull_lane_s16 Neon cumulative saturation 1 +VQDMULL_LANE:5:vqdmull_lane_s32 Neon cumulative saturation 1 -VQDMULL_LANE (check mul overflow) output: +VQDMULL_LANE (check mul cumulative saturation) output: VQDMULL_LANE:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } VQDMULL_LANE:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } -VQDMULL_N overflow output: -VQDMULL_N:0:vqdmull_n_s16 Neon overflow 0 -VQDMULL_N:1:vqdmull_n_s32 Neon overflow 0 +VQDMULL_N cumulative saturation output: +VQDMULL_N:0:vqdmull_n_s16 Neon cumulative saturation 0 +VQDMULL_N:1:vqdmull_n_s32 Neon cumulative saturation 0 VQDMULL_N output: VQDMULL_N:2:result_int32x4 [] = { 44000, 44000, 44000, 44000, } VQDMULL_N:3:result_int64x2 [] = { aa000, aa000, } -VQDMULL_N (check mul overflow) overflow output: -VQDMULL_N:4:vqdmull_n_s16 Neon overflow 1 -VQDMULL_N:5:vqdmull_n_s32 Neon overflow 1 +VQDMULL_N (check mul cumulative saturation) cumulative saturation output: +VQDMULL_N:4:vqdmull_n_s16 Neon cumulative saturation 1 +VQDMULL_N:5:vqdmull_n_s32 Neon cumulative saturation 1 -VQDMULL_N (check mul overflow) output: +VQDMULL_N (check mul cumulative saturation) output: VQDMULL_N:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } VQDMULL_N:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } @@ -2718,23 +2718,23 @@ float32: VSUB/VSUBQ:24:result_float32x2 [] = { c00ccccd -0x1.19999a0p+1 -2.2, c00ccccd -0x1.19999a0p+1 -2.2, } VSUB/VSUBQ:25:result_float32x4 [] = { c00ccccc -0x1.1999980p+1 -2.2, c00ccccc -0x1.1999980p+1 -2.2, c00ccccc -0x1.1999980p+1 -2.2, c00ccccc -0x1.1999980p+1 -2.2, } -VQADD/VQADDQ overflow output: -VQADD/VQADDQ:0:vqadd_s8 Neon overflow 0 -VQADD/VQADDQ:1:vqadd_s16 Neon overflow 0 -VQADD/VQADDQ:2:vqadd_s32 Neon overflow 0 -VQADD/VQADDQ:3:vqadd_s64 Neon overflow 0 -VQADD/VQADDQ:4:vqadd_u8 Neon overflow 1 -VQADD/VQADDQ:5:vqadd_u16 Neon overflow 1 -VQADD/VQADDQ:6:vqadd_u32 Neon overflow 1 -VQADD/VQADDQ:7:vqadd_u64 Neon overflow 1 -VQADD/VQADDQ:8:vqaddq_s8 Neon overflow 0 -VQADD/VQADDQ:9:vqaddq_s16 Neon overflow 0 -VQADD/VQADDQ:10:vqaddq_s32 Neon overflow 0 -VQADD/VQADDQ:11:vqaddq_s64 Neon overflow 0 -VQADD/VQADDQ:12:vqaddq_u8 Neon overflow 1 -VQADD/VQADDQ:13:vqaddq_u16 Neon overflow 1 -VQADD/VQADDQ:14:vqaddq_u32 Neon overflow 1 -VQADD/VQADDQ:15:vqaddq_u64 Neon overflow 1 +VQADD/VQADDQ cumulative saturation output: +VQADD/VQADDQ:0:vqadd_s8 Neon cumulative saturation 0 +VQADD/VQADDQ:1:vqadd_s16 Neon cumulative saturation 0 +VQADD/VQADDQ:2:vqadd_s32 Neon cumulative saturation 0 +VQADD/VQADDQ:3:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:4:vqadd_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:5:vqadd_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:6:vqadd_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:7:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:8:vqaddq_s8 Neon cumulative saturation 0 +VQADD/VQADDQ:9:vqaddq_s16 Neon cumulative saturation 0 +VQADD/VQADDQ:10:vqaddq_s32 Neon cumulative saturation 0 +VQADD/VQADDQ:11:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:12:vqaddq_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:13:vqaddq_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:14:vqaddq_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:15:vqaddq_u64 Neon cumulative saturation 1 VQADD/VQADDQ output: VQADD/VQADDQ:16:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8, } @@ -2762,11 +2762,11 @@ VQADD/VQADDQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQADD/VQADDQ:38:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQADD/VQADDQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQADD/VQADDQ 64 bits saturation overflow output: -VQADD/VQADDQ:40:vqadd_s64 Neon overflow 0 -VQADD/VQADDQ:41:vqadd_u64 Neon overflow 0 -VQADD/VQADDQ:42:vqaddq_s64 Neon overflow 0 -VQADD/VQADDQ:43:vqaddq_u64 Neon overflow 0 +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:40:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:41:vqadd_u64 Neon cumulative saturation 0 +VQADD/VQADDQ:42:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:43:vqaddq_u64 Neon cumulative saturation 0 64 bits saturation: VQADD/VQADDQ:44:result_int64x1 [] = { fffffffffffffff0, } @@ -2774,33 +2774,33 @@ VQADD/VQADDQ:45:result_uint64x1 [] = { fffffffffffffff0, } VQADD/VQADDQ:46:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } VQADD/VQADDQ:47:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } -VQADD/VQADDQ 64 bits saturation overflow output: -VQADD/VQADDQ:48:vqadd_s64 Neon overflow 0 -VQADD/VQADDQ:49:vqadd_u64 Neon overflow 1 -VQADD/VQADDQ:50:vqaddq_s64 Neon overflow 0 -VQADD/VQADDQ:51:vqaddq_u64 Neon overflow 1 +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:48:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:49:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:50:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:51:vqaddq_u64 Neon cumulative saturation 1 VQADD/VQADDQ:52:result_int64x1 [] = { 34, } VQADD/VQADDQ:53:result_uint64x1 [] = { ffffffffffffffff, } VQADD/VQADDQ:54:result_int64x2 [] = { 34, 35, } VQADD/VQADDQ:55:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } -VQADD/VQADDQ 64 bits saturation overflow output: -VQADD/VQADDQ:56:vqadd_s64 Neon overflow 1 -VQADD/VQADDQ:57:vqadd_u64 Neon overflow 1 -VQADD/VQADDQ:58:vqaddq_s64 Neon overflow 1 -VQADD/VQADDQ:59:vqaddq_u64 Neon overflow 1 +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:56:vqadd_s64 Neon cumulative saturation 1 +VQADD/VQADDQ:57:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:58:vqaddq_s64 Neon cumulative saturation 1 +VQADD/VQADDQ:59:vqaddq_u64 Neon cumulative saturation 1 VQADD/VQADDQ:60:result_int64x1 [] = { 8000000000000000, } VQADD/VQADDQ:61:result_uint64x1 [] = { ffffffffffffffff, } VQADD/VQADDQ:62:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } VQADD/VQADDQ:63:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } less than 64 bits saturation: -VQADD/VQADDQ:64:vqadd_s8 Neon overflow 1 -VQADD/VQADDQ:65:vqadd_s16 Neon overflow 1 -VQADD/VQADDQ:66:vqadd_s32 Neon overflow 1 -VQADD/VQADDQ:67:vqaddq_s8 Neon overflow 1 -VQADD/VQADDQ:68:vqaddq_s16 Neon overflow 1 -VQADD/VQADDQ:69:vqaddq_s32 Neon overflow 1 +VQADD/VQADDQ:64:vqadd_s8 Neon cumulative saturation 1 +VQADD/VQADDQ:65:vqadd_s16 Neon cumulative saturation 1 +VQADD/VQADDQ:66:vqadd_s32 Neon cumulative saturation 1 +VQADD/VQADDQ:67:vqaddq_s8 Neon cumulative saturation 1 +VQADD/VQADDQ:68:vqaddq_s16 Neon cumulative saturation 1 +VQADD/VQADDQ:69:vqaddq_s32 Neon cumulative saturation 1 VQADD/VQADDQ:70:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } VQADD/VQADDQ:71:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } VQADD/VQADDQ:72:result_int32x2 [] = { 80000000, 80000000, } @@ -2808,13 +2808,13 @@ VQADD/VQADDQ:73:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ff VQADD/VQADDQ:74:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } VQADD/VQADDQ:75:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } -VQADD/VQADDQ less than 64 bits saturation overflow output: -VQADD/VQADDQ:76:vqadd_u8 Neon overflow 1 -VQADD/VQADDQ:77:vqadd_u16 Neon overflow 1 -VQADD/VQADDQ:78:vqadd_u32 Neon overflow 1 -VQADD/VQADDQ:79:vqaddq_u8 Neon overflow 1 -VQADD/VQADDQ:80:vqaddq_u16 Neon overflow 1 -VQADD/VQADDQ:81:vqaddq_u32 Neon overflow 1 +VQADD/VQADDQ less than 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:76:vqadd_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:77:vqadd_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:78:vqadd_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:79:vqaddq_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:80:vqaddq_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:81:vqaddq_u32 Neon cumulative saturation 1 VQADD/VQADDQ:82:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } VQADD/VQADDQ:83:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } VQADD/VQADDQ:84:result_uint32x2 [] = { ffffffff, ffffffff, } @@ -2852,13 +2852,13 @@ float32: VABS/VABSQ:24:result_float32x2 [] = { 40133333 0x1.2666660p+1 2.3, 40133333 0x1.2666660p+1 2.3, } VABS/VABSQ:25:result_float32x4 [] = { 4059999a 0x1.b333340p+1 3.4, 4059999a 0x1.b333340p+1 3.4, 4059999a 0x1.b333340p+1 3.4, 4059999a 0x1.b333340p+1 3.4, } -VQABS/VQABSQ overflow output: -VQABS/VQABSQ:0:vqabs_s8 Neon overflow 0 -VQABS/VQABSQ:1:vqabs_s16 Neon overflow 0 -VQABS/VQABSQ:2:vqabs_s32 Neon overflow 0 -VQABS/VQABSQ:3:vqabsq_s8 Neon overflow 0 -VQABS/VQABSQ:4:vqabsq_s16 Neon overflow 0 -VQABS/VQABSQ:5:vqabsq_s32 Neon overflow 0 +VQABS/VQABSQ cumulative saturation output: +VQABS/VQABSQ:0:vqabs_s8 Neon cumulative saturation 0 +VQABS/VQABSQ:1:vqabs_s16 Neon cumulative saturation 0 +VQABS/VQABSQ:2:vqabs_s32 Neon cumulative saturation 0 +VQABS/VQABSQ:3:vqabsq_s8 Neon cumulative saturation 0 +VQABS/VQABSQ:4:vqabsq_s16 Neon cumulative saturation 0 +VQABS/VQABSQ:5:vqabsq_s32 Neon cumulative saturation 0 VQABS/VQABSQ output: VQABS/VQABSQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } @@ -2886,13 +2886,13 @@ VQABS/VQABSQ:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQABS/VQABSQ:28:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQABS/VQABSQ:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQABS/VQABSQ overflow output: -VQABS/VQABSQ:0:vqabs_s8 Neon overflow 1 -VQABS/VQABSQ:1:vqabs_s16 Neon overflow 1 -VQABS/VQABSQ:2:vqabs_s32 Neon overflow 1 -VQABS/VQABSQ:3:vqabsq_s8 Neon overflow 1 -VQABS/VQABSQ:4:vqabsq_s16 Neon overflow 1 -VQABS/VQABSQ:5:vqabsq_s32 Neon overflow 1 +VQABS/VQABSQ cumulative saturation output: +VQABS/VQABSQ:0:vqabs_s8 Neon cumulative saturation 1 +VQABS/VQABSQ:1:vqabs_s16 Neon cumulative saturation 1 +VQABS/VQABSQ:2:vqabs_s32 Neon cumulative saturation 1 +VQABS/VQABSQ:3:vqabsq_s8 Neon cumulative saturation 1 +VQABS/VQABSQ:4:vqabsq_s16 Neon cumulative saturation 1 +VQABS/VQABSQ:5:vqabsq_s32 Neon cumulative saturation 1 VQABS/VQABSQ output: VQABS/VQABSQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -3040,13 +3040,13 @@ float32: VNEG/VNEGQ:24:result_float32x2 [] = { c0133333 -0x1.2666660p+1 -2.3, c0133333 -0x1.2666660p+1 -2.3, } VNEG/VNEGQ:25:result_float32x4 [] = { c059999a -0x1.b333340p+1 -3.4, c059999a -0x1.b333340p+1 -3.4, c059999a -0x1.b333340p+1 -3.4, c059999a -0x1.b333340p+1 -3.4, } -VQNEG/VQNEGQ overflow output: -VQNEG/VQNEGQ:0:vqneg_s8 Neon overflow 0 -VQNEG/VQNEGQ:1:vqneg_s16 Neon overflow 0 -VQNEG/VQNEGQ:2:vqneg_s32 Neon overflow 0 -VQNEG/VQNEGQ:3:vqnegq_s8 Neon overflow 0 -VQNEG/VQNEGQ:4:vqnegq_s16 Neon overflow 0 -VQNEG/VQNEGQ:5:vqnegq_s32 Neon overflow 0 +VQNEG/VQNEGQ cumulative saturation output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon cumulative saturation 0 +VQNEG/VQNEGQ:1:vqneg_s16 Neon cumulative saturation 0 +VQNEG/VQNEGQ:2:vqneg_s32 Neon cumulative saturation 0 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon cumulative saturation 0 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon cumulative saturation 0 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon cumulative saturation 0 VQNEG/VQNEGQ output: VQNEG/VQNEGQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } @@ -3074,13 +3074,13 @@ VQNEG/VQNEGQ:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQNEG/VQNEGQ:28:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQNEG/VQNEGQ:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQNEG/VQNEGQ overflow output: -VQNEG/VQNEGQ:0:vqneg_s8 Neon overflow 1 -VQNEG/VQNEGQ:1:vqneg_s16 Neon overflow 1 -VQNEG/VQNEGQ:2:vqneg_s32 Neon overflow 1 -VQNEG/VQNEGQ:3:vqnegq_s8 Neon overflow 1 -VQNEG/VQNEGQ:4:vqnegq_s16 Neon overflow 1 -VQNEG/VQNEGQ:5:vqnegq_s32 Neon overflow 1 +VQNEG/VQNEGQ cumulative saturation output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon cumulative saturation 1 +VQNEG/VQNEGQ:1:vqneg_s16 Neon cumulative saturation 1 +VQNEG/VQNEGQ:2:vqneg_s32 Neon cumulative saturation 1 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon cumulative saturation 1 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon cumulative saturation 1 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon cumulative saturation 1 VQNEG/VQNEGQ output: VQNEG/VQNEGQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -3876,11 +3876,11 @@ VREINTERPRET/VREINTERPRETQ:243:result_poly8x16 [] = { 0, cc, 80, cb, 0, cb, 80, VREINTERPRET/VREINTERPRETQ:244:result_poly16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } VREINTERPRET/VREINTERPRETQ:245:result_float32x4 [] = { cb80cc00 -0x1.0198000p+24 -1.688166e+07, ca80cb00 -0x1.0196000p+22 -4220288, c980ca00 -0x1.0194000p+20 -1055040, c880c900 -0x1.0192000p+18 -263752, } -VQRDMULH overflow output: -VQRDMULH:0:vqrdmulh_s16 Neon overflow 0 -VQRDMULH:1:vqrdmulh_s32 Neon overflow 0 -VQRDMULH:2:vqrdmulhq_s16 Neon overflow 0 -VQRDMULH:3:vqrdmulhq_s32 Neon overflow 0 +VQRDMULH cumulative saturation output: +VQRDMULH:0:vqrdmulh_s16 Neon cumulative saturation 0 +VQRDMULH:1:vqrdmulh_s32 Neon cumulative saturation 0 +VQRDMULH:2:vqrdmulhq_s16 Neon cumulative saturation 0 +VQRDMULH:3:vqrdmulhq_s32 Neon cumulative saturation 0 VQRDMULH output: VQRDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -3908,13 +3908,13 @@ VQRDMULH:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VQRDMULH:26:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH (check mul overflow) overflow output: -VQRDMULH:28:vqrdmulh_s16 Neon overflow 1 -VQRDMULH:29:vqrdmulh_s32 Neon overflow 1 -VQRDMULH:30:vqrdmulhq_s16 Neon overflow 1 -VQRDMULH:31:vqrdmulhq_s32 Neon overflow 1 +VQRDMULH (check mul cumulative saturation) cumulative saturation output: +VQRDMULH:28:vqrdmulh_s16 Neon cumulative saturation 1 +VQRDMULH:29:vqrdmulh_s32 Neon cumulative saturation 1 +VQRDMULH:30:vqrdmulhq_s16 Neon cumulative saturation 1 +VQRDMULH:31:vqrdmulhq_s32 Neon cumulative saturation 1 -VQRDMULH (check mul overflow) output: +VQRDMULH (check mul cumulative saturation) output: VQRDMULH:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH:34:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -3940,13 +3940,13 @@ VQRDMULH:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VQRDMULH:54:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH (check rounding overflow) overflow output: -VQRDMULH:56:vqrdmulh_s16 Neon overflow 0 -VQRDMULH:57:vqrdmulh_s32 Neon overflow 0 -VQRDMULH:58:vqrdmulhq_s16 Neon overflow 0 -VQRDMULH:59:vqrdmulhq_s32 Neon overflow 0 +VQRDMULH (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH:56:vqrdmulh_s16 Neon cumulative saturation 0 +VQRDMULH:57:vqrdmulh_s32 Neon cumulative saturation 0 +VQRDMULH:58:vqrdmulhq_s16 Neon cumulative saturation 0 +VQRDMULH:59:vqrdmulhq_s32 Neon cumulative saturation 0 -VQRDMULH (check rounding overflow) output: +VQRDMULH (check rounding cumulative saturation) output: VQRDMULH:60:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH:61:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH:62:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -3972,11 +3972,11 @@ VQRDMULH:81:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VQRDMULH:82:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH:83:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH_LANE overflow output: -VQRDMULH_LANE:0:vqrdmulh_lane_s16 Neon overflow 0 -VQRDMULH_LANE:1:vqrdmulh_lane_s32 Neon overflow 0 -VQRDMULH_LANE:2:vqrdmulhq_lane_s16 Neon overflow 0 -VQRDMULH_LANE:3:vqrdmulhq_lane_s32 Neon overflow 0 +VQRDMULH_LANE cumulative saturation output: +VQRDMULH_LANE:0:vqrdmulh_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:1:vqrdmulh_lane_s32 Neon cumulative saturation 0 +VQRDMULH_LANE:2:vqrdmulhq_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:3:vqrdmulhq_lane_s32 Neon cumulative saturation 0 VQRDMULH_LANE output: VQRDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -4004,13 +4004,13 @@ VQRDMULH_LANE:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQRDMULH_LANE:26:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH_LANE:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH_LANE (check mul overflow) overflow output: -VQRDMULH_LANE:28:vqrdmulh_lane_s16 Neon overflow 1 -VQRDMULH_LANE:29:vqrdmulh_lane_s32 Neon overflow 1 -VQRDMULH_LANE:30:vqrdmulhq_lane_s16 Neon overflow 1 -VQRDMULH_LANE:31:vqrdmulhq_lane_s32 Neon overflow 1 +VQRDMULH_LANE (check mul cumulative saturation) cumulative saturation output: +VQRDMULH_LANE:28:vqrdmulh_lane_s16 Neon cumulative saturation 1 +VQRDMULH_LANE:29:vqrdmulh_lane_s32 Neon cumulative saturation 1 +VQRDMULH_LANE:30:vqrdmulhq_lane_s16 Neon cumulative saturation 1 +VQRDMULH_LANE:31:vqrdmulhq_lane_s32 Neon cumulative saturation 1 -VQRDMULH_LANE (check mul overflow) output: +VQRDMULH_LANE (check mul cumulative saturation) output: VQRDMULH_LANE:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH_LANE:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH_LANE:34:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -4036,13 +4036,13 @@ VQRDMULH_LANE:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQRDMULH_LANE:54:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH_LANE:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH_LANE (check rounding overflow) overflow output: -VQRDMULH_LANE:56:vqrdmulh_lane_s16 Neon overflow 0 -VQRDMULH_LANE:57:vqrdmulh_lane_s32 Neon overflow 0 -VQRDMULH_LANE:58:vqrdmulhq_lane_s16 Neon overflow 0 -VQRDMULH_LANE:59:vqrdmulhq_lane_s32 Neon overflow 0 +VQRDMULH_LANE (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH_LANE:56:vqrdmulh_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:57:vqrdmulh_lane_s32 Neon cumulative saturation 0 +VQRDMULH_LANE:58:vqrdmulhq_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:59:vqrdmulhq_lane_s32 Neon cumulative saturation 0 -VQRDMULH_LANE (check rounding overflow) output: +VQRDMULH_LANE (check rounding cumulative saturation) output: VQRDMULH_LANE:60:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH_LANE:61:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH_LANE:62:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -4068,11 +4068,11 @@ VQRDMULH_LANE:81:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQRDMULH_LANE:82:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH_LANE:83:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH_N overflow output: -VQRDMULH_N:0:vqrdmulh_n_s16 Neon overflow 0 -VQRDMULH_N:1:vqrdmulh_n_s32 Neon overflow 0 -VQRDMULH_N:2:vqrdmulhq_n_s16 Neon overflow 0 -VQRDMULH_N:3:vqrdmulhq_n_s32 Neon overflow 0 +VQRDMULH_N cumulative saturation output: +VQRDMULH_N:0:vqrdmulh_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:1:vqrdmulh_n_s32 Neon cumulative saturation 0 +VQRDMULH_N:2:vqrdmulhq_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:3:vqrdmulhq_n_s32 Neon cumulative saturation 0 VQRDMULH_N output: VQRDMULH_N:4:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffd, } @@ -4080,13 +4080,13 @@ VQRDMULH_N:5:result_int32x2 [] = { fffffffe, fffffffe, } VQRDMULH_N:6:result_int16x8 [] = { 6, 6, 6, 5, 5, 4, 4, 4, } VQRDMULH_N:7:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } -VQRDMULH_N (check mul overflow) overflow output: -VQRDMULH_N:8:vqrdmulh_n_s16 Neon overflow 1 -VQRDMULH_N:9:vqrdmulh_n_s32 Neon overflow 1 -VQRDMULH_N:10:vqrdmulhq_n_s16 Neon overflow 1 -VQRDMULH_N:11:vqrdmulhq_n_s32 Neon overflow 1 +VQRDMULH_N (check mul cumulative saturation) cumulative saturation output: +VQRDMULH_N:8:vqrdmulh_n_s16 Neon cumulative saturation 1 +VQRDMULH_N:9:vqrdmulh_n_s32 Neon cumulative saturation 1 +VQRDMULH_N:10:vqrdmulhq_n_s16 Neon cumulative saturation 1 +VQRDMULH_N:11:vqrdmulhq_n_s32 Neon cumulative saturation 1 -VQRDMULH_N (check mul overflow) output: +VQRDMULH_N (check mul cumulative saturation) output: VQRDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -4112,13 +4112,13 @@ VQRDMULH_N:33:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VQRDMULH_N:34:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH_N:35:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH_N (check rounding overflow) overflow output: -VQRDMULH_N:36:vqrdmulh_n_s16 Neon overflow 0 -VQRDMULH_N:37:vqrdmulh_n_s32 Neon overflow 0 -VQRDMULH_N:38:vqrdmulhq_n_s16 Neon overflow 0 -VQRDMULH_N:39:vqrdmulhq_n_s32 Neon overflow 0 +VQRDMULH_N (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH_N:36:vqrdmulh_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:37:vqrdmulh_n_s32 Neon cumulative saturation 0 +VQRDMULH_N:38:vqrdmulhq_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:39:vqrdmulhq_n_s32 Neon cumulative saturation 0 -VQRDMULH_N (check rounding overflow) output: +VQRDMULH_N (check rounding cumulative saturation) output: VQRDMULH_N:40:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH_N:41:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH_N:42:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -4144,23 +4144,23 @@ VQRDMULH_N:61:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VQRDMULH_N:62:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH_N:63:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (with input = 0) overflow output: -VQRSHL/VQRSHLQ:0:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:1:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:2:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:3:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:4:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:5:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:6:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:7:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:8:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:9:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:10:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:11:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:12:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:13:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:14:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:15:vqrshlq_u64 Neon overflow 0 +VQRSHL/VQRSHLQ (with input = 0) cumulative saturation output: +VQRSHL/VQRSHLQ:0:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:1:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:2:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:3:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:4:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:5:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:6:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:7:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:8:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:9:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:10:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:11:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:12:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:13:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:14:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:15:vqrshlq_u64 Neon cumulative saturation 0 VQRSHL/VQRSHLQ (with input = 0) output: VQRSHL/VQRSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } @@ -4188,23 +4188,23 @@ VQRSHL/VQRSHLQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 333 VQRSHL/VQRSHLQ:38:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (input 0 and negative shift amount) overflow output: -VQRSHL/VQRSHLQ:40:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:41:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:42:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:43:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:44:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:45:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:46:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:47:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:48:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:49:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:50:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:51:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:52:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:53:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:54:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:55:vqrshlq_u64 Neon overflow 0 +VQRSHL/VQRSHLQ (input 0 and negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:40:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:41:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:42:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:43:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:44:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:45:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:46:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:47:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:48:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:49:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:50:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:51:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:52:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:53:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:54:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:55:vqrshlq_u64 Neon cumulative saturation 0 VQRSHL/VQRSHLQ (input 0 and negative shift amount) output: VQRSHL/VQRSHLQ:56:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } @@ -4232,23 +4232,23 @@ VQRSHL/VQRSHLQ:77:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 333 VQRSHL/VQRSHLQ:78:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:79:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ overflow output: -VQRSHL/VQRSHLQ:80:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:81:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:82:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:83:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:84:vqrshl_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:85:vqrshl_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:86:vqrshl_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:87:vqrshl_u64 Neon overflow 1 -VQRSHL/VQRSHLQ:88:vqrshlq_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:89:vqrshlq_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:90:vqrshlq_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:91:vqrshlq_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:92:vqrshlq_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:93:vqrshlq_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:94:vqrshlq_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:95:vqrshlq_u64 Neon overflow 1 +VQRSHL/VQRSHLQ cumulative saturation output: +VQRSHL/VQRSHLQ:80:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:81:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:82:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:83:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:84:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:85:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:86:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:87:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:88:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:89:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:90:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:91:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:92:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:93:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:94:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:95:vqrshlq_u64 Neon cumulative saturation 1 VQRSHL/VQRSHLQ output: VQRSHL/VQRSHLQ:96:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } @@ -4276,23 +4276,23 @@ VQRSHL/VQRSHLQ:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:118:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (negative shift amount) overflow output: -VQRSHL/VQRSHLQ:120:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:121:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:122:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:123:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:124:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:125:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:126:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:127:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:128:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:129:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:130:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:131:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:132:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:133:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:134:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:135:vqrshlq_u64 Neon overflow 0 +VQRSHL/VQRSHLQ (negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:120:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:121:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:122:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:123:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:124:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:125:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:126:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:127:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:128:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:129:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:130:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:131:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:132:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:133:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:134:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:135:vqrshlq_u64 Neon cumulative saturation 0 VQRSHL/VQRSHLQ (negative shift amount) output: VQRSHL/VQRSHLQ:136:result_int8x8 [] = { fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe, } @@ -4320,25 +4320,25 @@ VQRSHL/VQRSHLQ:157:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:158:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:159:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (checking overflow: shift by -1) overflow output: -VQRSHL/VQRSHLQ:160:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:161:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:162:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:163:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:164:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:165:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:166:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:167:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:168:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:169:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:170:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:171:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:172:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:173:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:174:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:175:vqrshlq_u64 Neon overflow 0 - -VQRSHL/VQRSHLQ (checking overflow: shift by -1) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) cumulative saturation output: +VQRSHL/VQRSHLQ:160:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:161:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:162:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:163:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:164:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:165:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:166:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:167:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:168:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:169:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:170:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:171:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:172:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:173:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:174:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:175:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) output: VQRSHL/VQRSHLQ:176:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } VQRSHL/VQRSHLQ:177:result_int16x4 [] = { 4000, 4000, 4000, 4000, } VQRSHL/VQRSHLQ:178:result_int32x2 [] = { 40000000, 40000000, } @@ -4364,25 +4364,25 @@ VQRSHL/VQRSHLQ:197:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:198:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:199:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (checking overflow: shift by -3) overflow output: -VQRSHL/VQRSHLQ:200:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:201:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:202:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:203:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:204:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:205:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:206:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:207:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:208:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:209:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:210:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:211:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:212:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:213:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:214:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:215:vqrshlq_u64 Neon overflow 0 - -VQRSHL/VQRSHLQ (checking overflow: shift by -3) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) cumulative saturation output: +VQRSHL/VQRSHLQ:200:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:201:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:202:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:203:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:204:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:205:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:206:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:207:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:208:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:209:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:210:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:211:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:212:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:213:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:214:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:215:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) output: VQRSHL/VQRSHLQ:216:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } VQRSHL/VQRSHLQ:217:result_int16x4 [] = { 1000, 1000, 1000, 1000, } VQRSHL/VQRSHLQ:218:result_int32x2 [] = { 10000000, 10000000, } @@ -4408,25 +4408,25 @@ VQRSHL/VQRSHLQ:237:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:238:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:239:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (checking overflow: large shift amount) overflow output: -VQRSHL/VQRSHLQ:240:vqrshl_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:241:vqrshl_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:242:vqrshl_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:243:vqrshl_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:244:vqrshl_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:245:vqrshl_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:246:vqrshl_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:247:vqrshl_u64 Neon overflow 1 -VQRSHL/VQRSHLQ:248:vqrshlq_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:249:vqrshlq_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:250:vqrshlq_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:251:vqrshlq_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:252:vqrshlq_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:253:vqrshlq_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:254:vqrshlq_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:255:vqrshlq_u64 Neon overflow 1 - -VQRSHL/VQRSHLQ (checking overflow: large shift amount) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:240:vqrshl_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:241:vqrshl_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:242:vqrshl_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:243:vqrshl_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:244:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:245:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:246:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:247:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:248:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:249:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:250:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:251:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:252:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:253:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:254:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:255:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) output: VQRSHL/VQRSHLQ:256:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } VQRSHL/VQRSHLQ:257:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRSHL/VQRSHLQ:258:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -4452,25 +4452,25 @@ VQRSHL/VQRSHLQ:277:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:278:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:279:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (checking overflow: large shift amount with negative input) overflow output: -VQRSHL/VQRSHLQ:280:vqrshl_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:281:vqrshl_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:282:vqrshl_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:283:vqrshl_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:284:vqrshl_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:285:vqrshl_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:286:vqrshl_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:287:vqrshl_u64 Neon overflow 1 -VQRSHL/VQRSHLQ:288:vqrshlq_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:289:vqrshlq_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:290:vqrshlq_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:291:vqrshlq_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:292:vqrshlq_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:293:vqrshlq_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:294:vqrshlq_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:295:vqrshlq_u64 Neon overflow 1 - -VQRSHL/VQRSHLQ (checking overflow: large shift amount with negative input) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) cumulative saturation output: +VQRSHL/VQRSHLQ:280:vqrshl_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:281:vqrshl_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:282:vqrshl_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:283:vqrshl_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:284:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:285:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:286:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:287:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:288:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:289:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:290:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:291:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:292:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:293:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:294:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:295:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) output: VQRSHL/VQRSHLQ:296:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } VQRSHL/VQRSHLQ:297:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } VQRSHL/VQRSHLQ:298:result_int32x2 [] = { 80000000, 80000000, } @@ -4496,25 +4496,25 @@ VQRSHL/VQRSHLQ:317:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:318:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:319:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (checking overflow: large negative shift amount) overflow output: -VQRSHL/VQRSHLQ:320:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:321:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:322:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:323:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:324:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:325:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:326:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:327:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:328:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:329:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:330:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:331:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:332:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:333:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:334:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:335:vqrshlq_u64 Neon overflow 0 - -VQRSHL/VQRSHLQ (checking overflow: large negative shift amount) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:320:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:321:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:322:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:323:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:324:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:325:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:326:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:327:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:328:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:329:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:330:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:331:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:332:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:333:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:334:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:335:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) output: VQRSHL/VQRSHLQ:336:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } VQRSHL/VQRSHLQ:337:result_int16x4 [] = { 0, 0, 0, 0, } VQRSHL/VQRSHLQ:338:result_int32x2 [] = { 0, 0, } @@ -4540,25 +4540,25 @@ VQRSHL/VQRSHLQ:357:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:358:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:359:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (checking overflow: large shift amount with 0 input) overflow output: -VQRSHL/VQRSHLQ:360:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:361:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:362:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:363:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:364:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:365:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:366:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:367:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:368:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:369:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:370:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:371:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:372:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:373:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:374:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:375:vqrshlq_u64 Neon overflow 0 - -VQRSHL/VQRSHLQ (checking overflow: large shift amount with 0 input) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) cumulative saturation output: +VQRSHL/VQRSHLQ:360:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:361:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:362:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:363:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:364:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:365:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:366:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:367:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:368:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:369:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:370:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:371:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:372:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:373:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:374:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:375:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) output: VQRSHL/VQRSHLQ:376:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } VQRSHL/VQRSHLQ:377:result_int16x4 [] = { 0, 0, 0, 0, } VQRSHL/VQRSHLQ:378:result_int32x2 [] = { 0, 0, } @@ -6048,13 +6048,13 @@ VMVN/VMVNQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VMVN/VMVNQ:22:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VMVN/VMVNQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQMOVN overflow output: -VQMOVN:0:vqmovn_s16 Neon overflow 0 -VQMOVN:1:vqmovn_s32 Neon overflow 0 -VQMOVN:2:vqmovn_s64 Neon overflow 0 -VQMOVN:3:vqmovn_u16 Neon overflow 0 -VQMOVN:4:vqmovn_u32 Neon overflow 0 -VQMOVN:5:vqmovn_u64 Neon overflow 0 +VQMOVN cumulative saturation output: +VQMOVN:0:vqmovn_s16 Neon cumulative saturation 0 +VQMOVN:1:vqmovn_s32 Neon cumulative saturation 0 +VQMOVN:2:vqmovn_s64 Neon cumulative saturation 0 +VQMOVN:3:vqmovn_u16 Neon cumulative saturation 0 +VQMOVN:4:vqmovn_u32 Neon cumulative saturation 0 +VQMOVN:5:vqmovn_u64 Neon cumulative saturation 0 VQMOVN output: VQMOVN:6:result_int8x8 [] = { 12, 12, 12, 12, 12, 12, 12, 12, } @@ -6082,13 +6082,13 @@ VQMOVN:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQMOVN:28:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQMOVN:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQMOVN overflow output: -VQMOVN:30:vqmovn_s16 Neon overflow 1 -VQMOVN:31:vqmovn_s32 Neon overflow 1 -VQMOVN:32:vqmovn_s64 Neon overflow 1 -VQMOVN:33:vqmovn_u16 Neon overflow 1 -VQMOVN:34:vqmovn_u32 Neon overflow 1 -VQMOVN:35:vqmovn_u64 Neon overflow 1 +VQMOVN cumulative saturation output: +VQMOVN:30:vqmovn_s16 Neon cumulative saturation 1 +VQMOVN:31:vqmovn_s32 Neon cumulative saturation 1 +VQMOVN:32:vqmovn_s64 Neon cumulative saturation 1 +VQMOVN:33:vqmovn_u16 Neon cumulative saturation 1 +VQMOVN:34:vqmovn_u32 Neon cumulative saturation 1 +VQMOVN:35:vqmovn_u64 Neon cumulative saturation 1 VQMOVN output: VQMOVN:36:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -6116,10 +6116,10 @@ VQMOVN:57:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQMOVN:58:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQMOVN:59:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQMOVUN overflow output: -VQMOVUN:0:vqmovun_s16 Neon overflow 0 -VQMOVUN:1:vqmovun_s32 Neon overflow 0 -VQMOVUN:2:vqmovun_s64 Neon overflow 0 +VQMOVUN cumulative saturation output: +VQMOVUN:0:vqmovun_s16 Neon cumulative saturation 0 +VQMOVUN:1:vqmovun_s32 Neon cumulative saturation 0 +VQMOVUN:2:vqmovun_s64 Neon cumulative saturation 0 VQMOVUN output: VQMOVUN:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6147,10 +6147,10 @@ VQMOVUN:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQMOVUN:25:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQMOVUN:26:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQMOVUN (negative input) overflow output: -VQMOVUN:27:vqmovun_s16 Neon overflow 1 -VQMOVUN:28:vqmovun_s32 Neon overflow 1 -VQMOVUN:29:vqmovun_s64 Neon overflow 1 +VQMOVUN (negative input) cumulative saturation output: +VQMOVUN:27:vqmovun_s16 Neon cumulative saturation 1 +VQMOVUN:28:vqmovun_s32 Neon cumulative saturation 1 +VQMOVUN:29:vqmovun_s64 Neon cumulative saturation 1 VQMOVUN (negative input) output: VQMOVUN:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6620,15 +6620,15 @@ VPADAL/VPADALQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 333 VPADAL/VPADALQ:22:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VPADAL/VPADALQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHLU_N/VQSHLUQ_N (negative input) overflow output: -VQSHLU_N/VQSHLUQ_N:0:vqshlu_n_s8 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:1:vqshlu_n_s16 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:2:vqshlu_n_s32 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:3:vqshlu_n_s64 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:4:vqshluq_n_s8 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:5:vqshluq_n_s16 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:6:vqshluq_n_s32 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:7:vqshluq_n_s64 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N (negative input) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:0:vqshlu_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:1:vqshlu_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:2:vqshlu_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:3:vqshlu_n_s64 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:4:vqshluq_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:5:vqshluq_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:6:vqshluq_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:7:vqshluq_n_s64 Neon cumulative saturation 1 VQSHLU_N/VQSHLUQ_N (negative input) output: VQSHLU_N/VQSHLUQ_N:8:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6656,17 +6656,17 @@ VQSHLU_N/VQSHLUQ_N:29:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, VQSHLU_N/VQSHLUQ_N:30:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHLU_N/VQSHLUQ_N:31:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 1) overflow output: -VQSHLU_N/VQSHLUQ_N:32:vqshlu_n_s8 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:33:vqshlu_n_s16 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:34:vqshlu_n_s32 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:35:vqshlu_n_s64 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:36:vqshluq_n_s8 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:37:vqshluq_n_s16 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:38:vqshluq_n_s32 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:39:vqshluq_n_s64 Neon overflow 0 - -VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 1) output: +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:32:vqshlu_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:33:vqshlu_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:34:vqshlu_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:35:vqshlu_n_s64 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:36:vqshluq_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:37:vqshluq_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:38:vqshluq_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:39:vqshluq_n_s64 Neon cumulative saturation 0 + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) output: VQSHLU_N/VQSHLUQ_N:40:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQSHLU_N/VQSHLUQ_N:41:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQSHLU_N/VQSHLUQ_N:42:result_int32x2 [] = { 33333333, 33333333, } @@ -6692,17 +6692,17 @@ VQSHLU_N/VQSHLUQ_N:61:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, VQSHLU_N/VQSHLUQ_N:62:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHLU_N/VQSHLUQ_N:63:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 2) overflow output: -VQSHLU_N/VQSHLUQ_N:64:vqshlu_n_s8 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:65:vqshlu_n_s16 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:66:vqshlu_n_s32 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:67:vqshlu_n_s64 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:68:vqshluq_n_s8 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:69:vqshluq_n_s16 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:70:vqshluq_n_s32 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:71:vqshluq_n_s64 Neon overflow 1 - -VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 2) output: +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:64:vqshlu_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:65:vqshlu_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:66:vqshlu_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:67:vqshlu_n_s64 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:68:vqshluq_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:69:vqshluq_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:70:vqshluq_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:71:vqshluq_n_s64 Neon cumulative saturation 1 + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) output: VQSHLU_N/VQSHLUQ_N:72:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQSHLU_N/VQSHLUQ_N:73:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQSHLU_N/VQSHLUQ_N:74:result_int32x2 [] = { 33333333, 33333333, } @@ -6728,15 +6728,15 @@ VQSHLU_N/VQSHLUQ_N:93:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, VQSHLU_N/VQSHLUQ_N:94:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHLU_N/VQSHLUQ_N:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHLU_N/VQSHLUQ_N overflow output: -VQSHLU_N/VQSHLUQ_N:96:vqshlu_n_s8 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:97:vqshlu_n_s16 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:98:vqshlu_n_s32 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:99:vqshlu_n_s64 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:100:vqshluq_n_s8 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:101:vqshluq_n_s16 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:102:vqshluq_n_s32 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:103:vqshluq_n_s64 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:96:vqshlu_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:97:vqshlu_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:98:vqshlu_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:99:vqshlu_n_s64 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:100:vqshluq_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:101:vqshluq_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:102:vqshluq_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:103:vqshluq_n_s64 Neon cumulative saturation 0 VQSHLU_N/VQSHLUQ_N output: VQSHLU_N/VQSHLUQ_N:104:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6894,13 +6894,13 @@ VCNT/VCNTQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VCNT/VCNTQ:22:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VCNT/VCNTQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHRN_N overflow output: -VQSHRN_N:0:vqshrn_n_s16 Neon overflow 0 -VQSHRN_N:1:vqshrn_n_s32 Neon overflow 0 -VQSHRN_N:2:vqshrn_n_s64 Neon overflow 0 -VQSHRN_N:3:vqshrn_n_u16 Neon overflow 1 -VQSHRN_N:4:vqshrn_n_u32 Neon overflow 1 -VQSHRN_N:5:vqshrn_n_u64 Neon overflow 1 +VQSHRN_N cumulative saturation output: +VQSHRN_N:0:vqshrn_n_s16 Neon cumulative saturation 0 +VQSHRN_N:1:vqshrn_n_s32 Neon cumulative saturation 0 +VQSHRN_N:2:vqshrn_n_s64 Neon cumulative saturation 0 +VQSHRN_N:3:vqshrn_n_u16 Neon cumulative saturation 1 +VQSHRN_N:4:vqshrn_n_u32 Neon cumulative saturation 1 +VQSHRN_N:5:vqshrn_n_u64 Neon cumulative saturation 1 VQSHRN_N output: VQSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } @@ -6928,13 +6928,13 @@ VQSHRN_N:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VQSHRN_N:28:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHRN_N:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHRN_N (check saturation: shift by 3) overflow output: -VQSHRN_N:30:vqshrn_n_s16 Neon overflow 1 -VQSHRN_N:31:vqshrn_n_s32 Neon overflow 1 -VQSHRN_N:32:vqshrn_n_s64 Neon overflow 1 -VQSHRN_N:33:vqshrn_n_u16 Neon overflow 1 -VQSHRN_N:34:vqshrn_n_u32 Neon overflow 1 -VQSHRN_N:35:vqshrn_n_u64 Neon overflow 1 +VQSHRN_N (check saturation: shift by 3) cumulative saturation output: +VQSHRN_N:30:vqshrn_n_s16 Neon cumulative saturation 1 +VQSHRN_N:31:vqshrn_n_s32 Neon cumulative saturation 1 +VQSHRN_N:32:vqshrn_n_s64 Neon cumulative saturation 1 +VQSHRN_N:33:vqshrn_n_u16 Neon cumulative saturation 1 +VQSHRN_N:34:vqshrn_n_u32 Neon cumulative saturation 1 +VQSHRN_N:35:vqshrn_n_u64 Neon cumulative saturation 1 VQSHRN_N (check saturation: shift by 3) output: VQSHRN_N:36:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -6962,13 +6962,13 @@ VQSHRN_N:57:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VQSHRN_N:58:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHRN_N:59:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHRN_N (check saturation: shift by max) overflow output: -VQSHRN_N:60:vqshrn_n_s16 Neon overflow 0 -VQSHRN_N:61:vqshrn_n_s32 Neon overflow 0 -VQSHRN_N:62:vqshrn_n_s64 Neon overflow 0 -VQSHRN_N:63:vqshrn_n_u16 Neon overflow 0 -VQSHRN_N:64:vqshrn_n_u32 Neon overflow 0 -VQSHRN_N:65:vqshrn_n_u64 Neon overflow 0 +VQSHRN_N (check saturation: shift by max) cumulative saturation output: +VQSHRN_N:60:vqshrn_n_s16 Neon cumulative saturation 0 +VQSHRN_N:61:vqshrn_n_s32 Neon cumulative saturation 0 +VQSHRN_N:62:vqshrn_n_s64 Neon cumulative saturation 0 +VQSHRN_N:63:vqshrn_n_u16 Neon cumulative saturation 0 +VQSHRN_N:64:vqshrn_n_u32 Neon cumulative saturation 0 +VQSHRN_N:65:vqshrn_n_u64 Neon cumulative saturation 0 VQSHRN_N (check saturation: shift by max) output: VQSHRN_N:66:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -7048,10 +7048,10 @@ VPMIN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, VPMIN:22:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VPMIN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHRUN_N (negative input) overflow output: -VQSHRUN_N:0:vqshrun_n_s16 Neon overflow 1 -VQSHRUN_N:1:vqshrun_n_s32 Neon overflow 1 -VQSHRUN_N:2:vqshrun_n_s64 Neon overflow 1 +VQSHRUN_N (negative input) cumulative saturation output: +VQSHRUN_N:0:vqshrun_n_s16 Neon cumulative saturation 1 +VQSHRUN_N:1:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:2:vqshrun_n_s64 Neon cumulative saturation 1 VQSHRUN_N (negative input) output: VQSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -7079,12 +7079,12 @@ VQSHRUN_N:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQSHRUN_N:25:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHRUN_N:26:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHRUN_N (check saturation/overflow) overflow output: -VQSHRUN_N:27:vqshrun_n_s16 Neon overflow 1 -VQSHRUN_N:28:vqshrun_n_s32 Neon overflow 1 -VQSHRUN_N:29:vqshrun_n_s64 Neon overflow 1 +VQSHRUN_N (check cumulative saturation) cumulative saturation output: +VQSHRUN_N:27:vqshrun_n_s16 Neon cumulative saturation 1 +VQSHRUN_N:28:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:29:vqshrun_n_s64 Neon cumulative saturation 1 -VQSHRUN_N (check saturation/overflow) output: +VQSHRUN_N (check cumulative saturation) output: VQSHRUN_N:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQSHRUN_N:31:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQSHRUN_N:32:result_int32x2 [] = { 33333333, 33333333, } @@ -7110,10 +7110,10 @@ VQSHRUN_N:51:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQSHRUN_N:52:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHRUN_N:53:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHRUN_N overflow output: -VQSHRUN_N:54:vqshrun_n_s16 Neon overflow 0 -VQSHRUN_N:55:vqshrun_n_s32 Neon overflow 1 -VQSHRUN_N:56:vqshrun_n_s64 Neon overflow 0 +VQSHRUN_N cumulative saturation output: +VQSHRUN_N:54:vqshrun_n_s16 Neon cumulative saturation 0 +VQSHRUN_N:55:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:56:vqshrun_n_s64 Neon cumulative saturation 0 VQSHRUN_N output: VQSHRUN_N:57:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -7141,10 +7141,10 @@ VQSHRUN_N:78:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQSHRUN_N:79:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHRUN_N:80:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRUN_N (negative input) overflow output: -VQRSHRUN_N:0:vqrshrun_n_s16 Neon overflow 0 -VQRSHRUN_N:1:vqrshrun_n_s32 Neon overflow 0 -VQRSHRUN_N:2:vqrshrun_n_s64 Neon overflow 1 +VQRSHRUN_N (negative input) cumulative saturation output: +VQRSHRUN_N:0:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:1:vqrshrun_n_s32 Neon cumulative saturation 0 +VQRSHRUN_N:2:vqrshrun_n_s64 Neon cumulative saturation 1 VQRSHRUN_N (negative input) output: VQRSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -7172,12 +7172,12 @@ VQRSHRUN_N:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VQRSHRUN_N:25:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHRUN_N:26:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRUN_N (check saturation/overflow: shift by 1) overflow output: -VQRSHRUN_N:27:vqrshrun_n_s16 Neon overflow 1 -VQRSHRUN_N:28:vqrshrun_n_s32 Neon overflow 1 -VQRSHRUN_N:29:vqrshrun_n_s64 Neon overflow 1 +VQRSHRUN_N (check cumulative saturation: shift by 1) cumulative saturation output: +VQRSHRUN_N:27:vqrshrun_n_s16 Neon cumulative saturation 1 +VQRSHRUN_N:28:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:29:vqrshrun_n_s64 Neon cumulative saturation 1 -VQRSHRUN_N (check saturation/overflow: shift by 1) output: +VQRSHRUN_N (check cumulative saturation: shift by 1) output: VQRSHRUN_N:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRSHRUN_N:31:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQRSHRUN_N:32:result_int32x2 [] = { 33333333, 33333333, } @@ -7203,12 +7203,12 @@ VQRSHRUN_N:51:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VQRSHRUN_N:52:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHRUN_N:53:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRUN_N (check saturation/overflow: shift by max, positive input) overflow output: -VQRSHRUN_N:54:vqrshrun_n_s16 Neon overflow 0 -VQRSHRUN_N:55:vqrshrun_n_s32 Neon overflow 0 -VQRSHRUN_N:56:vqrshrun_n_s64 Neon overflow 0 +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) cumulative saturation output: +VQRSHRUN_N:54:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:55:vqrshrun_n_s32 Neon cumulative saturation 0 +VQRSHRUN_N:56:vqrshrun_n_s64 Neon cumulative saturation 0 -VQRSHRUN_N (check saturation/overflow: shift by max, positive input) output: +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) output: VQRSHRUN_N:57:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRSHRUN_N:58:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQRSHRUN_N:59:result_int32x2 [] = { 33333333, 33333333, } @@ -7234,12 +7234,12 @@ VQRSHRUN_N:78:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VQRSHRUN_N:79:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHRUN_N:80:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRUN_N (check saturation/overflow: shift by max, negative input) overflow output: -VQRSHRUN_N:81:vqrshrun_n_s16 Neon overflow 1 -VQRSHRUN_N:82:vqrshrun_n_s32 Neon overflow 1 -VQRSHRUN_N:83:vqrshrun_n_s64 Neon overflow 1 +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) cumulative saturation output: +VQRSHRUN_N:81:vqrshrun_n_s16 Neon cumulative saturation 1 +VQRSHRUN_N:82:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:83:vqrshrun_n_s64 Neon cumulative saturation 1 -VQRSHRUN_N (check saturation/overflow: shift by max, negative input) output: +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) output: VQRSHRUN_N:84:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRSHRUN_N:85:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQRSHRUN_N:86:result_int32x2 [] = { 33333333, 33333333, } @@ -7265,10 +7265,10 @@ VQRSHRUN_N:105:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQRSHRUN_N:106:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHRUN_N:107:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRUN_N overflow output: -VQRSHRUN_N:108:vqrshrun_n_s16 Neon overflow 0 -VQRSHRUN_N:109:vqrshrun_n_s32 Neon overflow 1 -VQRSHRUN_N:110:vqrshrun_n_s64 Neon overflow 0 +VQRSHRUN_N cumulative saturation output: +VQRSHRUN_N:108:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:109:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:110:vqrshrun_n_s64 Neon cumulative saturation 0 VQRSHRUN_N output: VQRSHRUN_N:111:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } diff --git a/ref-rvct-neon-nofp16.txt b/ref-rvct-neon-nofp16.txt index 6e6d1cb..12c4c5a 100644 --- a/ref-rvct-neon-nofp16.txt +++ b/ref-rvct-neon-nofp16.txt @@ -339,9 +339,9 @@ VGET_LOW:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VGET_LOW:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VGET_LOW:21:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLAL_LANE overflow output: -VQDMLAL_LANE:0:vqdmlal_lane_s16 Neon overflow 0 -VQDMLAL_LANE:1:vqdmlal_lane_s32 Neon overflow 0 +VQDMLAL_LANE cumulative saturation output: +VQDMLAL_LANE:0:vqdmlal_lane_s16 Neon cumulative saturation 0 +VQDMLAL_LANE:1:vqdmlal_lane_s32 Neon cumulative saturation 0 VQDMLAL_LANE output: VQDMLAL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -367,9 +367,9 @@ VQDMLAL_LANE:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQDMLAL_LANE:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMLAL_LANE:23:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLAL_LANE (mul with input=0) overflow output: -VQDMLAL_LANE:24:vqdmlal_lane_s16 Neon overflow 0 -VQDMLAL_LANE:25:vqdmlal_lane_s32 Neon overflow 0 +VQDMLAL_LANE (mul with input=0) cumulative saturation output: +VQDMLAL_LANE:24:vqdmlal_lane_s16 Neon cumulative saturation 0 +VQDMLAL_LANE:25:vqdmlal_lane_s32 Neon cumulative saturation 0 VQDMLAL_LANE (mul with input=0) output: VQDMLAL_LANE:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -395,11 +395,11 @@ VQDMLAL_LANE:45:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQDMLAL_LANE:46:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMLAL_LANE:47:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLAL_LANE (check mul overflow) overflow output: -VQDMLAL_LANE:48:vqdmlal_lane_s16 Neon overflow 1 -VQDMLAL_LANE:49:vqdmlal_lane_s32 Neon overflow 1 +VQDMLAL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMLAL_LANE:48:vqdmlal_lane_s16 Neon cumulative saturation 1 +VQDMLAL_LANE:49:vqdmlal_lane_s32 Neon cumulative saturation 1 -VQDMLAL_LANE (check mul overflow) output: +VQDMLAL_LANE (check mul cumulative saturation) output: VQDMLAL_LANE:50:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLAL_LANE:51:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLAL_LANE:52:result_int32x2 [] = { 33333333, 33333333, } @@ -423,9 +423,9 @@ VQDMLAL_LANE:69:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQDMLAL_LANE:70:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMLAL_LANE:71:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLSL_LANE overflow output: -VQDMLSL_LANE:0:vqdmlsl_lane_s16 Neon overflow 0 -VQDMLSL_LANE:1:vqdmlsl_lane_s32 Neon overflow 0 +VQDMLSL_LANE cumulative saturation output: +VQDMLSL_LANE:0:vqdmlsl_lane_s16 Neon cumulative saturation 0 +VQDMLSL_LANE:1:vqdmlsl_lane_s32 Neon cumulative saturation 0 VQDMLSL_LANE output: VQDMLSL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -451,9 +451,9 @@ VQDMLSL_LANE:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQDMLSL_LANE:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMLSL_LANE:23:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLSL_LANE (mul with input=0) overflow output: -VQDMLSL_LANE:24:vqdmlsl_lane_s16 Neon overflow 0 -VQDMLSL_LANE:25:vqdmlsl_lane_s32 Neon overflow 0 +VQDMLSL_LANE (mul with input=0) cumulative saturation output: +VQDMLSL_LANE:24:vqdmlsl_lane_s16 Neon cumulative saturation 0 +VQDMLSL_LANE:25:vqdmlsl_lane_s32 Neon cumulative saturation 0 VQDMLSL_LANE (mul with input=0) output: VQDMLSL_LANE:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -479,11 +479,11 @@ VQDMLSL_LANE:45:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQDMLSL_LANE:46:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMLSL_LANE:47:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLSL_LANE (check mul overflow) overflow output: -VQDMLSL_LANE:48:vqdmlsl_lane_s16 Neon overflow 1 -VQDMLSL_LANE:49:vqdmlsl_lane_s32 Neon overflow 1 +VQDMLSL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMLSL_LANE:48:vqdmlsl_lane_s16 Neon cumulative saturation 1 +VQDMLSL_LANE:49:vqdmlsl_lane_s32 Neon cumulative saturation 1 -VQDMLSL_LANE (check mul overflow) output: +VQDMLSL_LANE (check mul cumulative saturation) output: VQDMLSL_LANE:50:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLSL_LANE:51:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLSL_LANE:52:result_int32x2 [] = { 33333333, 33333333, } @@ -507,9 +507,9 @@ VQDMLSL_LANE:69:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQDMLSL_LANE:70:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMLSL_LANE:71:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLAL_N overflow output: -VQDMLAL_N:0:vqdmlal_n_s16 Neon overflow 0 -VQDMLAL_N:1:vqdmlal_n_s32 Neon overflow 0 +VQDMLAL_N cumulative saturation output: +VQDMLAL_N:0:vqdmlal_n_s16 Neon cumulative saturation 0 +VQDMLAL_N:1:vqdmlal_n_s32 Neon cumulative saturation 0 VQDMLAL_N output: VQDMLAL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -535,11 +535,11 @@ VQDMLAL_N:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQDMLAL_N:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMLAL_N:23:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLAL_N (check mul overflow) overflow output: -VQDMLAL_N:24:vqdmlal_n_s16 Neon overflow 1 -VQDMLAL_N:25:vqdmlal_n_s32 Neon overflow 1 +VQDMLAL_N (check mul cumulative saturation) cumulative saturation output: +VQDMLAL_N:24:vqdmlal_n_s16 Neon cumulative saturation 1 +VQDMLAL_N:25:vqdmlal_n_s32 Neon cumulative saturation 1 -VQDMLAL_N (check mul overflow) output: +VQDMLAL_N (check mul cumulative saturation) output: VQDMLAL_N:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLAL_N:27:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLAL_N:28:result_int32x2 [] = { 33333333, 33333333, } @@ -563,9 +563,9 @@ VQDMLAL_N:45:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQDMLAL_N:46:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMLAL_N:47:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLSL_N overflow output: -VQDMLSL_N:0:vqdmlsl_n_s16 Neon overflow 0 -VQDMLSL_N:1:vqdmlsl_n_s32 Neon overflow 0 +VQDMLSL_N cumulative saturation output: +VQDMLSL_N:0:vqdmlsl_n_s16 Neon cumulative saturation 0 +VQDMLSL_N:1:vqdmlsl_n_s32 Neon cumulative saturation 0 VQDMLSL_N output: VQDMLSL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -591,11 +591,11 @@ VQDMLSL_N:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQDMLSL_N:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMLSL_N:23:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLSL_N (check mul overflow) overflow output: -VQDMLSL_N:24:vqdmlsl_n_s16 Neon overflow 1 -VQDMLSL_N:25:vqdmlsl_n_s32 Neon overflow 1 +VQDMLSL_N (check mul cumulative saturation) cumulative saturation output: +VQDMLSL_N:24:vqdmlsl_n_s16 Neon cumulative saturation 1 +VQDMLSL_N:25:vqdmlsl_n_s32 Neon cumulative saturation 1 -VQDMLSL_N (check mul overflow) output: +VQDMLSL_N (check mul cumulative saturation) output: VQDMLSL_N:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLSL_N:27:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLSL_N:28:result_int32x2 [] = { 33333333, 33333333, } @@ -763,13 +763,13 @@ VRSHRN_N:63:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VRSHRN_N:64:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VRSHRN_N:65:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHRN_N overflow output: -VQRSHRN_N:0:vqrshrn_n_s16 Neon overflow 0 -VQRSHRN_N:1:vqrshrn_n_s32 Neon overflow 0 -VQRSHRN_N:2:vqrshrn_n_s64 Neon overflow 0 -VQRSHRN_N:3:vqrshrn_n_u16 Neon overflow 1 -VQRSHRN_N:4:vqrshrn_n_u32 Neon overflow 1 -VQRSHRN_N:5:vqrshrn_n_u64 Neon overflow 1 +VQRSHRN_N cumulative saturation output: +VQRSHRN_N:0:vqrshrn_n_s16 Neon cumulative saturation 0 +VQRSHRN_N:1:vqrshrn_n_s32 Neon cumulative saturation 0 +VQRSHRN_N:2:vqrshrn_n_s64 Neon cumulative saturation 0 +VQRSHRN_N:3:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:4:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:5:vqrshrn_n_u64 Neon cumulative saturation 1 VQRSHRN_N output: VQRSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } @@ -795,13 +795,13 @@ VQRSHRN_N:25:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRSHRN_N:26:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHRN_N:27:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHRN_N (check saturation: shift by 3) overflow output: -VQRSHRN_N:28:vqrshrn_n_s16 Neon overflow 1 -VQRSHRN_N:29:vqrshrn_n_s32 Neon overflow 1 -VQRSHRN_N:30:vqrshrn_n_s64 Neon overflow 1 -VQRSHRN_N:31:vqrshrn_n_u16 Neon overflow 1 -VQRSHRN_N:32:vqrshrn_n_u32 Neon overflow 1 -VQRSHRN_N:33:vqrshrn_n_u64 Neon overflow 1 +VQRSHRN_N (check saturation: shift by 3) cumulative saturation output: +VQRSHRN_N:28:vqrshrn_n_s16 Neon cumulative saturation 1 +VQRSHRN_N:29:vqrshrn_n_s32 Neon cumulative saturation 1 +VQRSHRN_N:30:vqrshrn_n_s64 Neon cumulative saturation 1 +VQRSHRN_N:31:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:32:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:33:vqrshrn_n_u64 Neon cumulative saturation 1 VQRSHRN_N (check saturation: shift by 3) output: VQRSHRN_N:34:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -827,13 +827,13 @@ VQRSHRN_N:53:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRSHRN_N:54:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHRN_N:55:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHRN_N (check saturation: shift by max) overflow output: -VQRSHRN_N:56:vqrshrn_n_s16 Neon overflow 1 -VQRSHRN_N:57:vqrshrn_n_s32 Neon overflow 1 -VQRSHRN_N:58:vqrshrn_n_s64 Neon overflow 1 -VQRSHRN_N:59:vqrshrn_n_u16 Neon overflow 1 -VQRSHRN_N:60:vqrshrn_n_u32 Neon overflow 1 -VQRSHRN_N:61:vqrshrn_n_u64 Neon overflow 1 +VQRSHRN_N (check saturation: shift by max) cumulative saturation output: +VQRSHRN_N:56:vqrshrn_n_s16 Neon cumulative saturation 1 +VQRSHRN_N:57:vqrshrn_n_s32 Neon cumulative saturation 1 +VQRSHRN_N:58:vqrshrn_n_s64 Neon cumulative saturation 1 +VQRSHRN_N:59:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:60:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:61:vqrshrn_n_u64 Neon cumulative saturation 1 VQRSHRN_N (check saturation: shift by max) output: VQRSHRN_N:62:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -908,23 +908,23 @@ vgetq_lane_p16: fff6 vgetq_lane_f32: c1500000 -VQSUB/VQSUBQ overflow output: -VQSUB/VQSUBQ:0:vqsub_s8 Neon overflow 0 -VQSUB/VQSUBQ:1:vqsub_s16 Neon overflow 0 -VQSUB/VQSUBQ:2:vqsub_s32 Neon overflow 0 -VQSUB/VQSUBQ:3:vqsub_s64 Neon overflow 0 -VQSUB/VQSUBQ:4:vqsub_u8 Neon overflow 0 -VQSUB/VQSUBQ:5:vqsub_u16 Neon overflow 0 -VQSUB/VQSUBQ:6:vqsub_u32 Neon overflow 0 -VQSUB/VQSUBQ:7:vqsub_u64 Neon overflow 0 -VQSUB/VQSUBQ:8:vqsubq_s8 Neon overflow 0 -VQSUB/VQSUBQ:9:vqsubq_s16 Neon overflow 0 -VQSUB/VQSUBQ:10:vqsubq_s32 Neon overflow 0 -VQSUB/VQSUBQ:11:vqsubq_s64 Neon overflow 0 -VQSUB/VQSUBQ:12:vqsubq_u8 Neon overflow 0 -VQSUB/VQSUBQ:13:vqsubq_u16 Neon overflow 0 -VQSUB/VQSUBQ:14:vqsubq_u32 Neon overflow 0 -VQSUB/VQSUBQ:15:vqsubq_u64 Neon overflow 0 +VQSUB/VQSUBQ cumulative saturation output: +VQSUB/VQSUBQ:0:vqsub_s8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:1:vqsub_s16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:2:vqsub_s32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:3:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:4:vqsub_u8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:5:vqsub_u16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:6:vqsub_u32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:7:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:8:vqsubq_s8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:9:vqsubq_s16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:10:vqsubq_s32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:11:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:12:vqsubq_u8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:13:vqsubq_u16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:14:vqsubq_u32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:15:vqsubq_u64 Neon cumulative saturation 0 VQSUB/VQSUBQ output: VQSUB/VQSUBQ:16:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, } @@ -950,11 +950,11 @@ VQSUB/VQSUBQ:35:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQSUB/VQSUBQ:36:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSUB/VQSUBQ:37:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSUB/VQSUBQ 64 bits saturation overflow output: -VQSUB/VQSUBQ:38:vqsub_s64 Neon overflow 0 -VQSUB/VQSUBQ:39:vqsub_u64 Neon overflow 0 -VQSUB/VQSUBQ:40:vqsubq_s64 Neon overflow 0 -VQSUB/VQSUBQ:41:vqsubq_u64 Neon overflow 0 +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:38:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:39:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:40:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:41:vqsubq_u64 Neon cumulative saturation 0 64 bits saturation: VQSUB/VQSUBQ:42:result_int64x1 [] = { fffffffffffffff0, } @@ -962,33 +962,33 @@ VQSUB/VQSUBQ:43:result_uint64x1 [] = { fffffffffffffff0, } VQSUB/VQSUBQ:44:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } VQSUB/VQSUBQ:45:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } -VQSUB/VQSUBQ 64 bits saturation overflow output: -VQSUB/VQSUBQ:46:vqsub_s64 Neon overflow 0 -VQSUB/VQSUBQ:47:vqsub_u64 Neon overflow 0 -VQSUB/VQSUBQ:48:vqsubq_s64 Neon overflow 0 -VQSUB/VQSUBQ:49:vqsubq_u64 Neon overflow 0 +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:46:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:47:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:48:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:49:vqsubq_u64 Neon cumulative saturation 0 VQSUB/VQSUBQ:50:result_int64x1 [] = { ffffffffffffffac, } VQSUB/VQSUBQ:51:result_uint64x1 [] = { ffffffffffffff68, } VQSUB/VQSUBQ:52:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad, } VQSUB/VQSUBQ:53:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69, } -VQSUB/VQSUBQ 64 bits saturation overflow output: -VQSUB/VQSUBQ:54:vqsub_s64 Neon overflow 1 -VQSUB/VQSUBQ:55:vqsub_u64 Neon overflow 1 -VQSUB/VQSUBQ:56:vqsubq_s64 Neon overflow 1 -VQSUB/VQSUBQ:57:vqsubq_u64 Neon overflow 1 +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:54:vqsub_s64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:55:vqsub_u64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:56:vqsubq_s64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:57:vqsubq_u64 Neon cumulative saturation 1 VQSUB/VQSUBQ:58:result_int64x1 [] = { 8000000000000000, } VQSUB/VQSUBQ:59:result_uint64x1 [] = { 0, } VQSUB/VQSUBQ:60:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } VQSUB/VQSUBQ:61:result_uint64x2 [] = { 0, 0, } less than 64 bits saturation: -VQSUB/VQSUBQ:62:vqsub_s8 Neon overflow 1 -VQSUB/VQSUBQ:63:vqsub_s16 Neon overflow 1 -VQSUB/VQSUBQ:64:vqsub_s32 Neon overflow 1 -VQSUB/VQSUBQ:65:vqsubq_s8 Neon overflow 1 -VQSUB/VQSUBQ:66:vqsubq_s16 Neon overflow 1 -VQSUB/VQSUBQ:67:vqsubq_s32 Neon overflow 1 +VQSUB/VQSUBQ:62:vqsub_s8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:63:vqsub_s16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:64:vqsub_s32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:65:vqsubq_s8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:66:vqsubq_s16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:67:vqsubq_s32 Neon cumulative saturation 1 VQSUB/VQSUBQ:68:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } VQSUB/VQSUBQ:69:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } VQSUB/VQSUBQ:70:result_int32x2 [] = { 80000000, 80000000, } @@ -996,13 +996,13 @@ VQSUB/VQSUBQ:71:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ff VQSUB/VQSUBQ:72:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } VQSUB/VQSUBQ:73:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } -VQSUB/VQSUBQ less than 64 bits saturation overflow output: -VQSUB/VQSUBQ:74:vqsub_u8 Neon overflow 1 -VQSUB/VQSUBQ:75:vqsub_u16 Neon overflow 1 -VQSUB/VQSUBQ:76:vqsub_u32 Neon overflow 1 -VQSUB/VQSUBQ:77:vqsubq_u8 Neon overflow 1 -VQSUB/VQSUBQ:78:vqsubq_u16 Neon overflow 1 -VQSUB/VQSUBQ:79:vqsubq_u32 Neon overflow 1 +VQSUB/VQSUBQ less than 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:74:vqsub_u8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:75:vqsub_u16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:76:vqsub_u32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:77:vqsubq_u8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:78:vqsubq_u16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:79:vqsubq_u32 Neon cumulative saturation 1 VQSUB/VQSUBQ:80:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } VQSUB/VQSUBQ:81:result_uint16x4 [] = { 0, 0, 0, 0, } VQSUB/VQSUBQ:82:result_uint32x2 [] = { 0, 0, } @@ -1010,11 +1010,11 @@ VQSUB/VQSUBQ:83:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, VQSUB/VQSUBQ:84:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } VQSUB/VQSUBQ:85:result_uint32x4 [] = { 0, 0, 0, 0, } -VQDMULH overflow output: -VQDMULH:0:vqdmulh_s16 Neon overflow 0 -VQDMULH:1:vqdmulh_s32 Neon overflow 0 -VQDMULH:2:vqdmulhq_s16 Neon overflow 0 -VQDMULH:3:vqdmulhq_s32 Neon overflow 0 +VQDMULH cumulative saturation output: +VQDMULH:0:vqdmulh_s16 Neon cumulative saturation 0 +VQDMULH:1:vqdmulh_s32 Neon cumulative saturation 0 +VQDMULH:2:vqdmulhq_s16 Neon cumulative saturation 0 +VQDMULH:3:vqdmulhq_s32 Neon cumulative saturation 0 VQDMULH output: VQDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1040,11 +1040,11 @@ VQDMULH:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQDMULH:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMULH:25:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMULH overflow output: -VQDMULH:26:vqdmulh_s16 Neon overflow 1 -VQDMULH:27:vqdmulh_s32 Neon overflow 1 -VQDMULH:28:vqdmulhq_s16 Neon overflow 1 -VQDMULH:29:vqdmulhq_s32 Neon overflow 1 +VQDMULH cumulative saturation output: +VQDMULH:26:vqdmulh_s16 Neon cumulative saturation 1 +VQDMULH:27:vqdmulh_s32 Neon cumulative saturation 1 +VQDMULH:28:vqdmulhq_s16 Neon cumulative saturation 1 +VQDMULH:29:vqdmulhq_s32 Neon cumulative saturation 1 VQDMULH output: VQDMULH:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1070,11 +1070,11 @@ VQDMULH:49:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQDMULH:50:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMULH:51:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMULH_LANE overflow output: -VQDMULH_LANE:0:vqdmulh_lane_s16 Neon overflow 0 -VQDMULH_LANE:1:vqdmulh_lane_s32 Neon overflow 0 -VQDMULH_LANE:2:vqdmulhq_lane_s16 Neon overflow 0 -VQDMULH_LANE:3:vqdmulhq_lane_s32 Neon overflow 0 +VQDMULH_LANE cumulative saturation output: +VQDMULH_LANE:0:vqdmulh_lane_s16 Neon cumulative saturation 0 +VQDMULH_LANE:1:vqdmulh_lane_s32 Neon cumulative saturation 0 +VQDMULH_LANE:2:vqdmulhq_lane_s16 Neon cumulative saturation 0 +VQDMULH_LANE:3:vqdmulhq_lane_s32 Neon cumulative saturation 0 VQDMULH_LANE output: VQDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1100,13 +1100,13 @@ VQDMULH_LANE:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQDMULH_LANE:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMULH_LANE:25:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMULH_LANE (check mul overflow) overflow output: -VQDMULH_LANE:26:vqdmulh_lane_s16 Neon overflow 1 -VQDMULH_LANE:27:vqdmulh_lane_s32 Neon overflow 1 -VQDMULH_LANE:28:vqdmulhq_lane_s16 Neon overflow 1 -VQDMULH_LANE:29:vqdmulhq_lane_s32 Neon overflow 1 +VQDMULH_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMULH_LANE:26:vqdmulh_lane_s16 Neon cumulative saturation 1 +VQDMULH_LANE:27:vqdmulh_lane_s32 Neon cumulative saturation 1 +VQDMULH_LANE:28:vqdmulhq_lane_s16 Neon cumulative saturation 1 +VQDMULH_LANE:29:vqdmulhq_lane_s32 Neon cumulative saturation 1 -VQDMULH_LANE (check mul overflow) output: +VQDMULH_LANE (check mul cumulative saturation) output: VQDMULH_LANE:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMULH_LANE:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQDMULH_LANE:32:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -1130,11 +1130,11 @@ VQDMULH_LANE:49:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQDMULH_LANE:50:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMULH_LANE:51:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMULH_N overflow output: -VQDMULH_N:0:vqdmulh_n_s16 Neon overflow 0 -VQDMULH_N:1:vqdmulh_n_s32 Neon overflow 0 -VQDMULH_N:2:vqdmulhq_n_s16 Neon overflow 0 -VQDMULH_N:3:vqdmulhq_n_s32 Neon overflow 0 +VQDMULH_N cumulative saturation output: +VQDMULH_N:0:vqdmulh_n_s16 Neon cumulative saturation 0 +VQDMULH_N:1:vqdmulh_n_s32 Neon cumulative saturation 0 +VQDMULH_N:2:vqdmulhq_n_s16 Neon cumulative saturation 0 +VQDMULH_N:3:vqdmulhq_n_s32 Neon cumulative saturation 0 VQDMULH_N output: VQDMULH_N:4:result_int16x4 [] = { 19, 19, 19, 19, } @@ -1142,13 +1142,13 @@ VQDMULH_N:5:result_int32x2 [] = { 4, 4, } VQDMULH_N:6:result_int16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } VQDMULH_N:7:result_int32x4 [] = { a, a, a, a, } -VQDMULH_N (check mul overflow) overflow output: -VQDMULH_N:8:vqdmulh_n_s16 Neon overflow 1 -VQDMULH_N:9:vqdmulh_n_s32 Neon overflow 1 -VQDMULH_N:10:vqdmulhq_n_s16 Neon overflow 1 -VQDMULH_N:11:vqdmulhq_n_s32 Neon overflow 1 +VQDMULH_N (check mul cumulative saturation) cumulative saturation output: +VQDMULH_N:8:vqdmulh_n_s16 Neon cumulative saturation 1 +VQDMULH_N:9:vqdmulh_n_s32 Neon cumulative saturation 1 +VQDMULH_N:10:vqdmulhq_n_s16 Neon cumulative saturation 1 +VQDMULH_N:11:vqdmulhq_n_s32 Neon cumulative saturation 1 -VQDMULH_N (check mul overflow) output: +VQDMULH_N (check mul cumulative saturation) output: VQDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -1172,9 +1172,9 @@ VQDMULH_N:31:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQDMULH_N:32:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMULH_N:33:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMULL overflow output: -VQDMULL:0:vqdmull_s16 Neon overflow 0 -VQDMULL:1:vqdmull_s32 Neon overflow 0 +VQDMULL cumulative saturation output: +VQDMULL:0:vqdmull_s16 Neon cumulative saturation 0 +VQDMULL:1:vqdmull_s32 Neon cumulative saturation 0 VQDMULL output: VQDMULL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1200,11 +1200,11 @@ VQDMULL:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQDMULL:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMULL:23:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMULL (check mul overflow) overflow output: -VQDMULL:24:vqdmull_s16 Neon overflow 1 -VQDMULL:25:vqdmull_s32 Neon overflow 1 +VQDMULL (check mul cumulative saturation) cumulative saturation output: +VQDMULL:24:vqdmull_s16 Neon cumulative saturation 1 +VQDMULL:25:vqdmull_s32 Neon cumulative saturation 1 -VQDMULL (check mul overflow) output: +VQDMULL (check mul cumulative saturation) output: VQDMULL:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMULL:27:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMULL:28:result_int32x2 [] = { 33333333, 33333333, } @@ -1228,9 +1228,9 @@ VQDMULL:45:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQDMULL:46:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMULL:47:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLAL overflow output: -VQDMLAL:0:vqdmlal_s16 Neon overflow 0 -VQDMLAL:1:vqdmlal_s32 Neon overflow 0 +VQDMLAL cumulative saturation output: +VQDMLAL:0:vqdmlal_s16 Neon cumulative saturation 0 +VQDMLAL:1:vqdmlal_s32 Neon cumulative saturation 0 VQDMLAL output: VQDMLAL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1256,11 +1256,11 @@ VQDMLAL:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQDMLAL:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMLAL:23:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLAL (check mul overflow) overflow output: -VQDMLAL:24:vqdmlal_s16 Neon overflow 1 -VQDMLAL:25:vqdmlal_s32 Neon overflow 1 +VQDMLAL (check mul cumulative saturation) cumulative saturation output: +VQDMLAL:24:vqdmlal_s16 Neon cumulative saturation 1 +VQDMLAL:25:vqdmlal_s32 Neon cumulative saturation 1 -VQDMLAL (check mul overflow) output: +VQDMLAL (check mul cumulative saturation) output: VQDMLAL:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLAL:27:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLAL:28:result_int32x2 [] = { 33333333, 33333333, } @@ -1284,9 +1284,9 @@ VQDMLAL:45:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQDMLAL:46:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMLAL:47:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLSL overflow output: -VQDMLSL:0:vqdmlsl_s16 Neon overflow 0 -VQDMLSL:1:vqdmlsl_s32 Neon overflow 0 +VQDMLSL cumulative saturation output: +VQDMLSL:0:vqdmlsl_s16 Neon cumulative saturation 0 +VQDMLSL:1:vqdmlsl_s32 Neon cumulative saturation 0 VQDMLSL output: VQDMLSL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1312,11 +1312,11 @@ VQDMLSL:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQDMLSL:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQDMLSL:23:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQDMLSL (check mul overflow) overflow output: -VQDMLSL:24:vqdmlsl_s16 Neon overflow 1 -VQDMLSL:25:vqdmlsl_s32 Neon overflow 1 +VQDMLSL (check mul cumulative saturation) cumulative saturation output: +VQDMLSL:24:vqdmlsl_s16 Neon cumulative saturation 1 +VQDMLSL:25:vqdmlsl_s32 Neon cumulative saturation 1 -VQDMLSL (check mul overflow) output: +VQDMLSL (check mul cumulative saturation) output: VQDMLSL:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLSL:27:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLSL:28:result_int32x2 [] = { 33333333, 33333333, } @@ -1594,23 +1594,23 @@ VSHL_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VSHL_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VSHL_N:21:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHL/VQSHLQ (with input = 0) overflow output: -VQSHL/VQSHLQ:0:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:1:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:2:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:3:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:4:vqshl_u8 Neon overflow 0 -VQSHL/VQSHLQ:5:vqshl_u16 Neon overflow 0 -VQSHL/VQSHLQ:6:vqshl_u32 Neon overflow 0 -VQSHL/VQSHLQ:7:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:8:vqshlq_s8 Neon overflow 0 -VQSHL/VQSHLQ:9:vqshlq_s16 Neon overflow 0 -VQSHL/VQSHLQ:10:vqshlq_s32 Neon overflow 0 -VQSHL/VQSHLQ:11:vqshlq_s64 Neon overflow 0 -VQSHL/VQSHLQ:12:vqshlq_u8 Neon overflow 0 -VQSHL/VQSHLQ:13:vqshlq_u16 Neon overflow 0 -VQSHL/VQSHLQ:14:vqshlq_u32 Neon overflow 0 -VQSHL/VQSHLQ:15:vqshlq_u64 Neon overflow 0 +VQSHL/VQSHLQ (with input = 0) cumulative saturation output: +VQSHL/VQSHLQ:0:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:1:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:2:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:3:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:4:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:5:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:6:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:7:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:8:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:9:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:10:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:11:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:12:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:13:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:14:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:15:vqshlq_u64 Neon cumulative saturation 0 VQSHL/VQSHLQ (with input = 0) output: VQSHL/VQSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } @@ -1636,23 +1636,23 @@ VQSHL/VQSHLQ:35:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQSHL/VQSHLQ:36:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHL/VQSHLQ:37:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHL/VQSHLQ (input 0 and negative shift amount) overflow output: -VQSHL/VQSHLQ:38:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:39:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:40:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:41:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:42:vqshl_u8 Neon overflow 0 -VQSHL/VQSHLQ:43:vqshl_u16 Neon overflow 0 -VQSHL/VQSHLQ:44:vqshl_u32 Neon overflow 0 -VQSHL/VQSHLQ:45:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:46:vqshlq_s8 Neon overflow 0 -VQSHL/VQSHLQ:47:vqshlq_s16 Neon overflow 0 -VQSHL/VQSHLQ:48:vqshlq_s32 Neon overflow 0 -VQSHL/VQSHLQ:49:vqshlq_s64 Neon overflow 0 -VQSHL/VQSHLQ:50:vqshlq_u8 Neon overflow 0 -VQSHL/VQSHLQ:51:vqshlq_u16 Neon overflow 0 -VQSHL/VQSHLQ:52:vqshlq_u32 Neon overflow 0 -VQSHL/VQSHLQ:53:vqshlq_u64 Neon overflow 0 +VQSHL/VQSHLQ (input 0 and negative shift amount) cumulative saturation output: +VQSHL/VQSHLQ:38:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:39:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:40:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:41:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:42:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:43:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:44:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:45:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:46:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:47:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:48:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:49:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:50:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:51:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:52:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:53:vqshlq_u64 Neon cumulative saturation 0 VQSHL/VQSHLQ (input 0 and negative shift amount) output: VQSHL/VQSHLQ:54:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } @@ -1678,23 +1678,23 @@ VQSHL/VQSHLQ:73:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQSHL/VQSHLQ:74:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHL/VQSHLQ:75:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHL/VQSHLQ overflow output: -VQSHL/VQSHLQ:76:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:77:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:78:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:79:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:80:vqshl_u8 Neon overflow 1 -VQSHL/VQSHLQ:81:vqshl_u16 Neon overflow 1 -VQSHL/VQSHLQ:82:vqshl_u32 Neon overflow 1 -VQSHL/VQSHLQ:83:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:84:vqshlq_s8 Neon overflow 1 -VQSHL/VQSHLQ:85:vqshlq_s16 Neon overflow 1 -VQSHL/VQSHLQ:86:vqshlq_s32 Neon overflow 1 -VQSHL/VQSHLQ:87:vqshlq_s64 Neon overflow 1 -VQSHL/VQSHLQ:88:vqshlq_u8 Neon overflow 1 -VQSHL/VQSHLQ:89:vqshlq_u16 Neon overflow 1 -VQSHL/VQSHLQ:90:vqshlq_u32 Neon overflow 1 -VQSHL/VQSHLQ:91:vqshlq_u64 Neon overflow 1 +VQSHL/VQSHLQ cumulative saturation output: +VQSHL/VQSHLQ:76:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:77:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:78:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:79:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:80:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:81:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:82:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:83:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:84:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:85:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:86:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:87:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:88:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:89:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:90:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:91:vqshlq_u64 Neon cumulative saturation 1 VQSHL/VQSHLQ output: VQSHL/VQSHLQ:92:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } @@ -1720,23 +1720,23 @@ VQSHL/VQSHLQ:111:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQSHL/VQSHLQ:112:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHL/VQSHLQ:113:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHL/VQSHLQ (negative shift amount) overflow output: -VQSHL/VQSHLQ:114:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:115:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:116:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:117:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:118:vqshl_u8 Neon overflow 0 -VQSHL/VQSHLQ:119:vqshl_u16 Neon overflow 0 -VQSHL/VQSHLQ:120:vqshl_u32 Neon overflow 0 -VQSHL/VQSHLQ:121:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:122:vqshlq_s8 Neon overflow 0 -VQSHL/VQSHLQ:123:vqshlq_s16 Neon overflow 0 -VQSHL/VQSHLQ:124:vqshlq_s32 Neon overflow 0 -VQSHL/VQSHLQ:125:vqshlq_s64 Neon overflow 0 -VQSHL/VQSHLQ:126:vqshlq_u8 Neon overflow 0 -VQSHL/VQSHLQ:127:vqshlq_u16 Neon overflow 0 -VQSHL/VQSHLQ:128:vqshlq_u32 Neon overflow 0 -VQSHL/VQSHLQ:129:vqshlq_u64 Neon overflow 0 +VQSHL/VQSHLQ (negative shift amount) cumulative saturation output: +VQSHL/VQSHLQ:114:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:115:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:116:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:117:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:118:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:119:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:120:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:121:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:122:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:123:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:124:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:125:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:126:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:127:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:128:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:129:vqshlq_u64 Neon cumulative saturation 0 VQSHL/VQSHLQ (negative shift amount) output: VQSHL/VQSHLQ:130:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } @@ -1762,23 +1762,23 @@ VQSHL/VQSHLQ:149:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQSHL/VQSHLQ:150:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHL/VQSHLQ:151:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHL/VQSHLQ (large shift amount, negative input) overflow output: -VQSHL/VQSHLQ:152:vqshl_s8 Neon overflow 1 -VQSHL/VQSHLQ:153:vqshl_s16 Neon overflow 1 -VQSHL/VQSHLQ:154:vqshl_s32 Neon overflow 1 -VQSHL/VQSHLQ:155:vqshl_s64 Neon overflow 1 -VQSHL/VQSHLQ:156:vqshl_u8 Neon overflow 1 -VQSHL/VQSHLQ:157:vqshl_u16 Neon overflow 1 -VQSHL/VQSHLQ:158:vqshl_u32 Neon overflow 1 -VQSHL/VQSHLQ:159:vqshl_u64 Neon overflow 1 -VQSHL/VQSHLQ:160:vqshlq_s8 Neon overflow 1 -VQSHL/VQSHLQ:161:vqshlq_s16 Neon overflow 1 -VQSHL/VQSHLQ:162:vqshlq_s32 Neon overflow 1 -VQSHL/VQSHLQ:163:vqshlq_s64 Neon overflow 1 -VQSHL/VQSHLQ:164:vqshlq_u8 Neon overflow 1 -VQSHL/VQSHLQ:165:vqshlq_u16 Neon overflow 1 -VQSHL/VQSHLQ:166:vqshlq_u32 Neon overflow 1 -VQSHL/VQSHLQ:167:vqshlq_u64 Neon overflow 1 +VQSHL/VQSHLQ (large shift amount, negative input) cumulative saturation output: +VQSHL/VQSHLQ:152:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:153:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:154:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:155:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:156:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:157:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:158:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:159:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:160:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:161:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:162:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:163:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:164:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:165:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:166:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:167:vqshlq_u64 Neon cumulative saturation 1 VQSHL/VQSHLQ (large shift amount, negative input) output: VQSHL/VQSHLQ:168:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } @@ -1804,25 +1804,25 @@ VQSHL/VQSHLQ:187:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQSHL/VQSHLQ:188:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHL/VQSHLQ:189:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHL/VQSHLQ (check saturation/overflow) overflow output: -VQSHL/VQSHLQ:190:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:191:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:192:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:193:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:194:vqshl_u8 Neon overflow 0 -VQSHL/VQSHLQ:195:vqshl_u16 Neon overflow 0 -VQSHL/VQSHLQ:196:vqshl_u32 Neon overflow 0 -VQSHL/VQSHLQ:197:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:198:vqshlq_s8 Neon overflow 0 -VQSHL/VQSHLQ:199:vqshlq_s16 Neon overflow 0 -VQSHL/VQSHLQ:200:vqshlq_s32 Neon overflow 0 -VQSHL/VQSHLQ:201:vqshlq_s64 Neon overflow 0 -VQSHL/VQSHLQ:202:vqshlq_u8 Neon overflow 0 -VQSHL/VQSHLQ:203:vqshlq_u16 Neon overflow 0 -VQSHL/VQSHLQ:204:vqshlq_u32 Neon overflow 0 -VQSHL/VQSHLQ:205:vqshlq_u64 Neon overflow 0 - -VQSHL/VQSHLQ (check saturation/overflow) output: +VQSHL/VQSHLQ (check cumulative saturation) cumulative saturation output: +VQSHL/VQSHLQ:190:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:191:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:192:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:193:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:194:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:195:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:196:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:197:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:198:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:199:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:200:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:201:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:202:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:203:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:204:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:205:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (check cumulative saturation) output: VQSHL/VQSHLQ:206:result_int8x8 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, } VQSHL/VQSHLQ:207:result_int16x4 [] = { 3fff, 3fff, 3fff, 3fff, } VQSHL/VQSHLQ:208:result_int32x2 [] = { 3fffffff, 3fffffff, } @@ -1846,23 +1846,23 @@ VQSHL/VQSHLQ:225:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQSHL/VQSHLQ:226:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHL/VQSHLQ:227:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHL/VQSHLQ (large shift amount, positive input) overflow output: -VQSHL/VQSHLQ:228:vqshl_s8 Neon overflow 1 -VQSHL/VQSHLQ:229:vqshl_s16 Neon overflow 1 -VQSHL/VQSHLQ:230:vqshl_s32 Neon overflow 1 -VQSHL/VQSHLQ:231:vqshl_s64 Neon overflow 1 -VQSHL/VQSHLQ:232:vqshl_u8 Neon overflow 1 -VQSHL/VQSHLQ:233:vqshl_u16 Neon overflow 1 -VQSHL/VQSHLQ:234:vqshl_u32 Neon overflow 1 -VQSHL/VQSHLQ:235:vqshl_u64 Neon overflow 1 -VQSHL/VQSHLQ:236:vqshlq_s8 Neon overflow 1 -VQSHL/VQSHLQ:237:vqshlq_s16 Neon overflow 1 -VQSHL/VQSHLQ:238:vqshlq_s32 Neon overflow 1 -VQSHL/VQSHLQ:239:vqshlq_s64 Neon overflow 1 -VQSHL/VQSHLQ:240:vqshlq_u8 Neon overflow 1 -VQSHL/VQSHLQ:241:vqshlq_u16 Neon overflow 1 -VQSHL/VQSHLQ:242:vqshlq_u32 Neon overflow 1 -VQSHL/VQSHLQ:243:vqshlq_u64 Neon overflow 1 +VQSHL/VQSHLQ (large shift amount, positive input) cumulative saturation output: +VQSHL/VQSHLQ:228:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:229:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:230:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:231:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:232:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:233:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:234:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:235:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:236:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:237:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:238:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:239:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:240:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:241:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:242:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:243:vqshlq_u64 Neon cumulative saturation 1 VQSHL/VQSHLQ (large shift amount, positive input) output: VQSHL/VQSHLQ:244:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -1888,23 +1888,23 @@ VQSHL/VQSHLQ:263:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQSHL/VQSHLQ:264:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHL/VQSHLQ:265:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHL/VQSHLQ (check saturation on 64 bits) overflow output: -VQSHL/VQSHLQ:266:vqshl_s8 Neon overflow 1 -VQSHL/VQSHLQ:267:vqshl_s16 Neon overflow 1 -VQSHL/VQSHLQ:268:vqshl_s32 Neon overflow 1 -VQSHL/VQSHLQ:269:vqshl_s64 Neon overflow 1 -VQSHL/VQSHLQ:270:vqshl_u8 Neon overflow 1 -VQSHL/VQSHLQ:271:vqshl_u16 Neon overflow 1 -VQSHL/VQSHLQ:272:vqshl_u32 Neon overflow 1 -VQSHL/VQSHLQ:273:vqshl_u64 Neon overflow 1 -VQSHL/VQSHLQ:274:vqshlq_s8 Neon overflow 1 -VQSHL/VQSHLQ:275:vqshlq_s16 Neon overflow 1 -VQSHL/VQSHLQ:276:vqshlq_s32 Neon overflow 1 -VQSHL/VQSHLQ:277:vqshlq_s64 Neon overflow 1 -VQSHL/VQSHLQ:278:vqshlq_u8 Neon overflow 1 -VQSHL/VQSHLQ:279:vqshlq_u16 Neon overflow 1 -VQSHL/VQSHLQ:280:vqshlq_u32 Neon overflow 1 -VQSHL/VQSHLQ:281:vqshlq_u64 Neon overflow 1 +VQSHL/VQSHLQ (check saturation on 64 bits) cumulative saturation output: +VQSHL/VQSHLQ:266:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:267:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:268:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:269:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:270:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:271:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:272:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:273:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:274:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:275:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:276:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:277:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:278:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:279:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:280:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:281:vqshlq_u64 Neon cumulative saturation 1 VQSHL/VQSHLQ (check saturation on 64 bits) output: VQSHL/VQSHLQ:282:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -1930,23 +1930,23 @@ VQSHL/VQSHLQ:301:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQSHL/VQSHLQ:302:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHL/VQSHLQ:303:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHL_N/VQSHLQ_N overflow output: -VQSHL_N/VQSHLQ_N:0:vqshl_n_s8 Neon overflow 0 -VQSHL_N/VQSHLQ_N:1:vqshl_n_s16 Neon overflow 0 -VQSHL_N/VQSHLQ_N:2:vqshl_n_s32 Neon overflow 0 -VQSHL_N/VQSHLQ_N:3:vqshl_n_s64 Neon overflow 0 -VQSHL_N/VQSHLQ_N:4:vqshl_n_u8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:5:vqshl_n_u16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:6:vqshl_n_u32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:7:vqshl_n_u64 Neon overflow 1 -VQSHL_N/VQSHLQ_N:8:vqshlq_n_s8 Neon overflow 0 -VQSHL_N/VQSHLQ_N:9:vqshlq_n_s16 Neon overflow 0 -VQSHL_N/VQSHLQ_N:10:vqshlq_n_s32 Neon overflow 0 -VQSHL_N/VQSHLQ_N:11:vqshlq_n_s64 Neon overflow 0 -VQSHL_N/VQSHLQ_N:12:vqshlq_n_u8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:13:vqshlq_n_u16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:14:vqshlq_n_u32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:15:vqshlq_n_u64 Neon overflow 1 +VQSHL_N/VQSHLQ_N cumulative saturation output: +VQSHL_N/VQSHLQ_N:0:vqshl_n_s8 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:1:vqshl_n_s16 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:2:vqshl_n_s32 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:3:vqshl_n_s64 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:4:vqshl_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:5:vqshl_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:6:vqshl_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:7:vqshl_n_u64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:8:vqshlq_n_s8 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:9:vqshlq_n_s16 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:10:vqshlq_n_s32 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:11:vqshlq_n_s64 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:12:vqshlq_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:13:vqshlq_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:14:vqshlq_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:15:vqshlq_n_u64 Neon cumulative saturation 1 VQSHL_N/VQSHLQ_N output: VQSHL_N/VQSHLQ_N:16:result_int8x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, } @@ -1972,23 +1972,23 @@ VQSHL_N/VQSHLQ_N:35:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQSHL_N/VQSHLQ_N:36:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHL_N/VQSHLQ_N:37:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHL_N/VQSHLQ_N (check saturation with large positive input) overflow output: -VQSHL_N/VQSHLQ_N:38:vqshl_n_s8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:39:vqshl_n_s16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:40:vqshl_n_s32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:41:vqshl_n_s64 Neon overflow 1 -VQSHL_N/VQSHLQ_N:42:vqshl_n_u8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:43:vqshl_n_u16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:44:vqshl_n_u32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:45:vqshl_n_u64 Neon overflow 1 -VQSHL_N/VQSHLQ_N:46:vqshlq_n_s8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:47:vqshlq_n_s16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:48:vqshlq_n_s32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:49:vqshlq_n_s64 Neon overflow 1 -VQSHL_N/VQSHLQ_N:50:vqshlq_n_u8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:51:vqshlq_n_u16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:52:vqshlq_n_u32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:53:vqshlq_n_u64 Neon overflow 1 +VQSHL_N/VQSHLQ_N (check saturation with large positive input) cumulative saturation output: +VQSHL_N/VQSHLQ_N:38:vqshl_n_s8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:39:vqshl_n_s16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:40:vqshl_n_s32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:41:vqshl_n_s64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:42:vqshl_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:43:vqshl_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:44:vqshl_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:45:vqshl_n_u64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:46:vqshlq_n_s8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:47:vqshlq_n_s16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:48:vqshlq_n_s32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:49:vqshlq_n_s64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:50:vqshlq_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:51:vqshlq_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:52:vqshlq_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:53:vqshlq_n_u64 Neon cumulative saturation 1 VQSHL_N/VQSHLQ_N (check saturation with large positive input) output: VQSHL_N/VQSHLQ_N:54:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -2470,35 +2470,35 @@ VDUP_LANE/VDUP_LANEQ:19:result_poly8x16 [] = { f5, f5, f5, f5, f5, f5, f5, f5, f VDUP_LANE/VDUP_LANEQ:20:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } VDUP_LANE/VDUP_LANEQ:21:result_float32x4 [] = { c1700000 -0x1.e000000p+3 -15, c1700000 -0x1.e000000p+3 -15, c1700000 -0x1.e000000p+3 -15, c1700000 -0x1.e000000p+3 -15, } -VQDMULL_LANE overflow output: -VQDMULL_LANE:0:vqdmull_lane_s16 Neon overflow 0 -VQDMULL_LANE:1:vqdmull_lane_s32 Neon overflow 0 +VQDMULL_LANE cumulative saturation output: +VQDMULL_LANE:0:vqdmull_lane_s16 Neon cumulative saturation 0 +VQDMULL_LANE:1:vqdmull_lane_s32 Neon cumulative saturation 0 VQDMULL_LANE output: VQDMULL_LANE:2:result_int32x4 [] = { 8000, 8000, 8000, 8000, } VQDMULL_LANE:3:result_int64x2 [] = { 4000, 4000, } -VQDMULL_LANE (check mul overflow) overflow output: -VQDMULL_LANE:4:vqdmull_lane_s16 Neon overflow 1 -VQDMULL_LANE:5:vqdmull_lane_s32 Neon overflow 1 +VQDMULL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMULL_LANE:4:vqdmull_lane_s16 Neon cumulative saturation 1 +VQDMULL_LANE:5:vqdmull_lane_s32 Neon cumulative saturation 1 -VQDMULL_LANE (check mul overflow) output: +VQDMULL_LANE (check mul cumulative saturation) output: VQDMULL_LANE:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } VQDMULL_LANE:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } -VQDMULL_N overflow output: -VQDMULL_N:0:vqdmull_n_s16 Neon overflow 0 -VQDMULL_N:1:vqdmull_n_s32 Neon overflow 0 +VQDMULL_N cumulative saturation output: +VQDMULL_N:0:vqdmull_n_s16 Neon cumulative saturation 0 +VQDMULL_N:1:vqdmull_n_s32 Neon cumulative saturation 0 VQDMULL_N output: VQDMULL_N:2:result_int32x4 [] = { 44000, 44000, 44000, 44000, } VQDMULL_N:3:result_int64x2 [] = { aa000, aa000, } -VQDMULL_N (check mul overflow) overflow output: -VQDMULL_N:4:vqdmull_n_s16 Neon overflow 1 -VQDMULL_N:5:vqdmull_n_s32 Neon overflow 1 +VQDMULL_N (check mul cumulative saturation) cumulative saturation output: +VQDMULL_N:4:vqdmull_n_s16 Neon cumulative saturation 1 +VQDMULL_N:5:vqdmull_n_s32 Neon cumulative saturation 1 -VQDMULL_N (check mul overflow) output: +VQDMULL_N (check mul cumulative saturation) output: VQDMULL_N:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } VQDMULL_N:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } @@ -2554,23 +2554,23 @@ float32: VSUB/VSUBQ:22:result_float32x2 [] = { c00ccccd -0x1.19999a0p+1 -2.2, c00ccccd -0x1.19999a0p+1 -2.2, } VSUB/VSUBQ:23:result_float32x4 [] = { c00ccccc -0x1.1999980p+1 -2.2, c00ccccc -0x1.1999980p+1 -2.2, c00ccccc -0x1.1999980p+1 -2.2, c00ccccc -0x1.1999980p+1 -2.2, } -VQADD/VQADDQ overflow output: -VQADD/VQADDQ:0:vqadd_s8 Neon overflow 0 -VQADD/VQADDQ:1:vqadd_s16 Neon overflow 0 -VQADD/VQADDQ:2:vqadd_s32 Neon overflow 0 -VQADD/VQADDQ:3:vqadd_s64 Neon overflow 0 -VQADD/VQADDQ:4:vqadd_u8 Neon overflow 1 -VQADD/VQADDQ:5:vqadd_u16 Neon overflow 1 -VQADD/VQADDQ:6:vqadd_u32 Neon overflow 1 -VQADD/VQADDQ:7:vqadd_u64 Neon overflow 1 -VQADD/VQADDQ:8:vqaddq_s8 Neon overflow 0 -VQADD/VQADDQ:9:vqaddq_s16 Neon overflow 0 -VQADD/VQADDQ:10:vqaddq_s32 Neon overflow 0 -VQADD/VQADDQ:11:vqaddq_s64 Neon overflow 0 -VQADD/VQADDQ:12:vqaddq_u8 Neon overflow 1 -VQADD/VQADDQ:13:vqaddq_u16 Neon overflow 1 -VQADD/VQADDQ:14:vqaddq_u32 Neon overflow 1 -VQADD/VQADDQ:15:vqaddq_u64 Neon overflow 1 +VQADD/VQADDQ cumulative saturation output: +VQADD/VQADDQ:0:vqadd_s8 Neon cumulative saturation 0 +VQADD/VQADDQ:1:vqadd_s16 Neon cumulative saturation 0 +VQADD/VQADDQ:2:vqadd_s32 Neon cumulative saturation 0 +VQADD/VQADDQ:3:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:4:vqadd_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:5:vqadd_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:6:vqadd_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:7:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:8:vqaddq_s8 Neon cumulative saturation 0 +VQADD/VQADDQ:9:vqaddq_s16 Neon cumulative saturation 0 +VQADD/VQADDQ:10:vqaddq_s32 Neon cumulative saturation 0 +VQADD/VQADDQ:11:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:12:vqaddq_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:13:vqaddq_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:14:vqaddq_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:15:vqaddq_u64 Neon cumulative saturation 1 VQADD/VQADDQ output: VQADD/VQADDQ:16:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8, } @@ -2596,11 +2596,11 @@ VQADD/VQADDQ:35:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQADD/VQADDQ:36:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQADD/VQADDQ:37:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQADD/VQADDQ 64 bits saturation overflow output: -VQADD/VQADDQ:38:vqadd_s64 Neon overflow 0 -VQADD/VQADDQ:39:vqadd_u64 Neon overflow 0 -VQADD/VQADDQ:40:vqaddq_s64 Neon overflow 0 -VQADD/VQADDQ:41:vqaddq_u64 Neon overflow 0 +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:38:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:39:vqadd_u64 Neon cumulative saturation 0 +VQADD/VQADDQ:40:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:41:vqaddq_u64 Neon cumulative saturation 0 64 bits saturation: VQADD/VQADDQ:42:result_int64x1 [] = { fffffffffffffff0, } @@ -2608,33 +2608,33 @@ VQADD/VQADDQ:43:result_uint64x1 [] = { fffffffffffffff0, } VQADD/VQADDQ:44:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } VQADD/VQADDQ:45:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } -VQADD/VQADDQ 64 bits saturation overflow output: -VQADD/VQADDQ:46:vqadd_s64 Neon overflow 0 -VQADD/VQADDQ:47:vqadd_u64 Neon overflow 1 -VQADD/VQADDQ:48:vqaddq_s64 Neon overflow 0 -VQADD/VQADDQ:49:vqaddq_u64 Neon overflow 1 +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:46:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:47:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:48:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:49:vqaddq_u64 Neon cumulative saturation 1 VQADD/VQADDQ:50:result_int64x1 [] = { 34, } VQADD/VQADDQ:51:result_uint64x1 [] = { ffffffffffffffff, } VQADD/VQADDQ:52:result_int64x2 [] = { 34, 35, } VQADD/VQADDQ:53:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } -VQADD/VQADDQ 64 bits saturation overflow output: -VQADD/VQADDQ:54:vqadd_s64 Neon overflow 1 -VQADD/VQADDQ:55:vqadd_u64 Neon overflow 1 -VQADD/VQADDQ:56:vqaddq_s64 Neon overflow 1 -VQADD/VQADDQ:57:vqaddq_u64 Neon overflow 1 +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:54:vqadd_s64 Neon cumulative saturation 1 +VQADD/VQADDQ:55:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:56:vqaddq_s64 Neon cumulative saturation 1 +VQADD/VQADDQ:57:vqaddq_u64 Neon cumulative saturation 1 VQADD/VQADDQ:58:result_int64x1 [] = { 8000000000000000, } VQADD/VQADDQ:59:result_uint64x1 [] = { ffffffffffffffff, } VQADD/VQADDQ:60:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } VQADD/VQADDQ:61:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } less than 64 bits saturation: -VQADD/VQADDQ:62:vqadd_s8 Neon overflow 1 -VQADD/VQADDQ:63:vqadd_s16 Neon overflow 1 -VQADD/VQADDQ:64:vqadd_s32 Neon overflow 1 -VQADD/VQADDQ:65:vqaddq_s8 Neon overflow 1 -VQADD/VQADDQ:66:vqaddq_s16 Neon overflow 1 -VQADD/VQADDQ:67:vqaddq_s32 Neon overflow 1 +VQADD/VQADDQ:62:vqadd_s8 Neon cumulative saturation 1 +VQADD/VQADDQ:63:vqadd_s16 Neon cumulative saturation 1 +VQADD/VQADDQ:64:vqadd_s32 Neon cumulative saturation 1 +VQADD/VQADDQ:65:vqaddq_s8 Neon cumulative saturation 1 +VQADD/VQADDQ:66:vqaddq_s16 Neon cumulative saturation 1 +VQADD/VQADDQ:67:vqaddq_s32 Neon cumulative saturation 1 VQADD/VQADDQ:68:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } VQADD/VQADDQ:69:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } VQADD/VQADDQ:70:result_int32x2 [] = { 80000000, 80000000, } @@ -2642,13 +2642,13 @@ VQADD/VQADDQ:71:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ff VQADD/VQADDQ:72:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } VQADD/VQADDQ:73:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } -VQADD/VQADDQ less than 64 bits saturation overflow output: -VQADD/VQADDQ:74:vqadd_u8 Neon overflow 1 -VQADD/VQADDQ:75:vqadd_u16 Neon overflow 1 -VQADD/VQADDQ:76:vqadd_u32 Neon overflow 1 -VQADD/VQADDQ:77:vqaddq_u8 Neon overflow 1 -VQADD/VQADDQ:78:vqaddq_u16 Neon overflow 1 -VQADD/VQADDQ:79:vqaddq_u32 Neon overflow 1 +VQADD/VQADDQ less than 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:74:vqadd_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:75:vqadd_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:76:vqadd_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:77:vqaddq_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:78:vqaddq_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:79:vqaddq_u32 Neon cumulative saturation 1 VQADD/VQADDQ:80:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } VQADD/VQADDQ:81:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } VQADD/VQADDQ:82:result_uint32x2 [] = { ffffffff, ffffffff, } @@ -2684,13 +2684,13 @@ float32: VABS/VABSQ:22:result_float32x2 [] = { 40133333 0x1.2666660p+1 2.3, 40133333 0x1.2666660p+1 2.3, } VABS/VABSQ:23:result_float32x4 [] = { 4059999a 0x1.b333340p+1 3.4, 4059999a 0x1.b333340p+1 3.4, 4059999a 0x1.b333340p+1 3.4, 4059999a 0x1.b333340p+1 3.4, } -VQABS/VQABSQ overflow output: -VQABS/VQABSQ:0:vqabs_s8 Neon overflow 0 -VQABS/VQABSQ:1:vqabs_s16 Neon overflow 0 -VQABS/VQABSQ:2:vqabs_s32 Neon overflow 0 -VQABS/VQABSQ:3:vqabsq_s8 Neon overflow 0 -VQABS/VQABSQ:4:vqabsq_s16 Neon overflow 0 -VQABS/VQABSQ:5:vqabsq_s32 Neon overflow 0 +VQABS/VQABSQ cumulative saturation output: +VQABS/VQABSQ:0:vqabs_s8 Neon cumulative saturation 0 +VQABS/VQABSQ:1:vqabs_s16 Neon cumulative saturation 0 +VQABS/VQABSQ:2:vqabs_s32 Neon cumulative saturation 0 +VQABS/VQABSQ:3:vqabsq_s8 Neon cumulative saturation 0 +VQABS/VQABSQ:4:vqabsq_s16 Neon cumulative saturation 0 +VQABS/VQABSQ:5:vqabsq_s32 Neon cumulative saturation 0 VQABS/VQABSQ output: VQABS/VQABSQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } @@ -2716,13 +2716,13 @@ VQABS/VQABSQ:25:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQABS/VQABSQ:26:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQABS/VQABSQ:27:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQABS/VQABSQ overflow output: -VQABS/VQABSQ:0:vqabs_s8 Neon overflow 1 -VQABS/VQABSQ:1:vqabs_s16 Neon overflow 1 -VQABS/VQABSQ:2:vqabs_s32 Neon overflow 1 -VQABS/VQABSQ:3:vqabsq_s8 Neon overflow 1 -VQABS/VQABSQ:4:vqabsq_s16 Neon overflow 1 -VQABS/VQABSQ:5:vqabsq_s32 Neon overflow 1 +VQABS/VQABSQ cumulative saturation output: +VQABS/VQABSQ:0:vqabs_s8 Neon cumulative saturation 1 +VQABS/VQABSQ:1:vqabs_s16 Neon cumulative saturation 1 +VQABS/VQABSQ:2:vqabs_s32 Neon cumulative saturation 1 +VQABS/VQABSQ:3:vqabsq_s8 Neon cumulative saturation 1 +VQABS/VQABSQ:4:vqabsq_s16 Neon cumulative saturation 1 +VQABS/VQABSQ:5:vqabsq_s32 Neon cumulative saturation 1 VQABS/VQABSQ output: VQABS/VQABSQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -2860,13 +2860,13 @@ float32: VNEG/VNEGQ:22:result_float32x2 [] = { c0133333 -0x1.2666660p+1 -2.3, c0133333 -0x1.2666660p+1 -2.3, } VNEG/VNEGQ:23:result_float32x4 [] = { c059999a -0x1.b333340p+1 -3.4, c059999a -0x1.b333340p+1 -3.4, c059999a -0x1.b333340p+1 -3.4, c059999a -0x1.b333340p+1 -3.4, } -VQNEG/VQNEGQ overflow output: -VQNEG/VQNEGQ:0:vqneg_s8 Neon overflow 0 -VQNEG/VQNEGQ:1:vqneg_s16 Neon overflow 0 -VQNEG/VQNEGQ:2:vqneg_s32 Neon overflow 0 -VQNEG/VQNEGQ:3:vqnegq_s8 Neon overflow 0 -VQNEG/VQNEGQ:4:vqnegq_s16 Neon overflow 0 -VQNEG/VQNEGQ:5:vqnegq_s32 Neon overflow 0 +VQNEG/VQNEGQ cumulative saturation output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon cumulative saturation 0 +VQNEG/VQNEGQ:1:vqneg_s16 Neon cumulative saturation 0 +VQNEG/VQNEGQ:2:vqneg_s32 Neon cumulative saturation 0 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon cumulative saturation 0 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon cumulative saturation 0 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon cumulative saturation 0 VQNEG/VQNEGQ output: VQNEG/VQNEGQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } @@ -2892,13 +2892,13 @@ VQNEG/VQNEGQ:25:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQNEG/VQNEGQ:26:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQNEG/VQNEGQ:27:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQNEG/VQNEGQ overflow output: -VQNEG/VQNEGQ:0:vqneg_s8 Neon overflow 1 -VQNEG/VQNEGQ:1:vqneg_s16 Neon overflow 1 -VQNEG/VQNEGQ:2:vqneg_s32 Neon overflow 1 -VQNEG/VQNEGQ:3:vqnegq_s8 Neon overflow 1 -VQNEG/VQNEGQ:4:vqnegq_s16 Neon overflow 1 -VQNEG/VQNEGQ:5:vqnegq_s32 Neon overflow 1 +VQNEG/VQNEGQ cumulative saturation output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon cumulative saturation 1 +VQNEG/VQNEGQ:1:vqneg_s16 Neon cumulative saturation 1 +VQNEG/VQNEGQ:2:vqneg_s32 Neon cumulative saturation 1 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon cumulative saturation 1 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon cumulative saturation 1 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon cumulative saturation 1 VQNEG/VQNEGQ output: VQNEG/VQNEGQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -3608,11 +3608,11 @@ VREINTERPRET/VREINTERPRETQ:199:result_uint64x2 [] = { c1700000c1800000, c1500000 VREINTERPRET/VREINTERPRETQ:200:result_poly8x16 [] = { 0, 0, 80, c1, 0, 0, 70, c1, 0, 0, 60, c1, 0, 0, 50, c1, } VREINTERPRET/VREINTERPRETQ:201:result_poly16x8 [] = { 0, c180, 0, c170, 0, c160, 0, c150, } -VQRDMULH overflow output: -VQRDMULH:0:vqrdmulh_s16 Neon overflow 0 -VQRDMULH:1:vqrdmulh_s32 Neon overflow 0 -VQRDMULH:2:vqrdmulhq_s16 Neon overflow 0 -VQRDMULH:3:vqrdmulhq_s32 Neon overflow 0 +VQRDMULH cumulative saturation output: +VQRDMULH:0:vqrdmulh_s16 Neon cumulative saturation 0 +VQRDMULH:1:vqrdmulh_s32 Neon cumulative saturation 0 +VQRDMULH:2:vqrdmulhq_s16 Neon cumulative saturation 0 +VQRDMULH:3:vqrdmulhq_s32 Neon cumulative saturation 0 VQRDMULH output: VQRDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -3638,13 +3638,13 @@ VQRDMULH:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQRDMULH:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRDMULH:25:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRDMULH (check mul overflow) overflow output: -VQRDMULH:26:vqrdmulh_s16 Neon overflow 1 -VQRDMULH:27:vqrdmulh_s32 Neon overflow 1 -VQRDMULH:28:vqrdmulhq_s16 Neon overflow 1 -VQRDMULH:29:vqrdmulhq_s32 Neon overflow 1 +VQRDMULH (check mul cumulative saturation) cumulative saturation output: +VQRDMULH:26:vqrdmulh_s16 Neon cumulative saturation 1 +VQRDMULH:27:vqrdmulh_s32 Neon cumulative saturation 1 +VQRDMULH:28:vqrdmulhq_s16 Neon cumulative saturation 1 +VQRDMULH:29:vqrdmulhq_s32 Neon cumulative saturation 1 -VQRDMULH (check mul overflow) output: +VQRDMULH (check mul cumulative saturation) output: VQRDMULH:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH:32:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -3668,13 +3668,13 @@ VQRDMULH:49:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQRDMULH:50:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRDMULH:51:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRDMULH (check rounding overflow) overflow output: -VQRDMULH:52:vqrdmulh_s16 Neon overflow 0 -VQRDMULH:53:vqrdmulh_s32 Neon overflow 0 -VQRDMULH:54:vqrdmulhq_s16 Neon overflow 0 -VQRDMULH:55:vqrdmulhq_s32 Neon overflow 0 +VQRDMULH (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH:52:vqrdmulh_s16 Neon cumulative saturation 0 +VQRDMULH:53:vqrdmulh_s32 Neon cumulative saturation 0 +VQRDMULH:54:vqrdmulhq_s16 Neon cumulative saturation 0 +VQRDMULH:55:vqrdmulhq_s32 Neon cumulative saturation 0 -VQRDMULH (check rounding overflow) output: +VQRDMULH (check rounding cumulative saturation) output: VQRDMULH:56:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH:57:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH:58:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -3698,11 +3698,11 @@ VQRDMULH:75:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQRDMULH:76:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRDMULH:77:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRDMULH_LANE overflow output: -VQRDMULH_LANE:0:vqrdmulh_lane_s16 Neon overflow 0 -VQRDMULH_LANE:1:vqrdmulh_lane_s32 Neon overflow 0 -VQRDMULH_LANE:2:vqrdmulhq_lane_s16 Neon overflow 0 -VQRDMULH_LANE:3:vqrdmulhq_lane_s32 Neon overflow 0 +VQRDMULH_LANE cumulative saturation output: +VQRDMULH_LANE:0:vqrdmulh_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:1:vqrdmulh_lane_s32 Neon cumulative saturation 0 +VQRDMULH_LANE:2:vqrdmulhq_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:3:vqrdmulhq_lane_s32 Neon cumulative saturation 0 VQRDMULH_LANE output: VQRDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -3728,13 +3728,13 @@ VQRDMULH_LANE:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRDMULH_LANE:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRDMULH_LANE:25:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRDMULH_LANE (check mul overflow) overflow output: -VQRDMULH_LANE:26:vqrdmulh_lane_s16 Neon overflow 1 -VQRDMULH_LANE:27:vqrdmulh_lane_s32 Neon overflow 1 -VQRDMULH_LANE:28:vqrdmulhq_lane_s16 Neon overflow 1 -VQRDMULH_LANE:29:vqrdmulhq_lane_s32 Neon overflow 1 +VQRDMULH_LANE (check mul cumulative saturation) cumulative saturation output: +VQRDMULH_LANE:26:vqrdmulh_lane_s16 Neon cumulative saturation 1 +VQRDMULH_LANE:27:vqrdmulh_lane_s32 Neon cumulative saturation 1 +VQRDMULH_LANE:28:vqrdmulhq_lane_s16 Neon cumulative saturation 1 +VQRDMULH_LANE:29:vqrdmulhq_lane_s32 Neon cumulative saturation 1 -VQRDMULH_LANE (check mul overflow) output: +VQRDMULH_LANE (check mul cumulative saturation) output: VQRDMULH_LANE:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH_LANE:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH_LANE:32:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -3758,13 +3758,13 @@ VQRDMULH_LANE:49:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRDMULH_LANE:50:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRDMULH_LANE:51:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRDMULH_LANE (check rounding overflow) overflow output: -VQRDMULH_LANE:52:vqrdmulh_lane_s16 Neon overflow 0 -VQRDMULH_LANE:53:vqrdmulh_lane_s32 Neon overflow 0 -VQRDMULH_LANE:54:vqrdmulhq_lane_s16 Neon overflow 0 -VQRDMULH_LANE:55:vqrdmulhq_lane_s32 Neon overflow 0 +VQRDMULH_LANE (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH_LANE:52:vqrdmulh_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:53:vqrdmulh_lane_s32 Neon cumulative saturation 0 +VQRDMULH_LANE:54:vqrdmulhq_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:55:vqrdmulhq_lane_s32 Neon cumulative saturation 0 -VQRDMULH_LANE (check rounding overflow) output: +VQRDMULH_LANE (check rounding cumulative saturation) output: VQRDMULH_LANE:56:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH_LANE:57:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH_LANE:58:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -3788,11 +3788,11 @@ VQRDMULH_LANE:75:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRDMULH_LANE:76:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRDMULH_LANE:77:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRDMULH_N overflow output: -VQRDMULH_N:0:vqrdmulh_n_s16 Neon overflow 0 -VQRDMULH_N:1:vqrdmulh_n_s32 Neon overflow 0 -VQRDMULH_N:2:vqrdmulhq_n_s16 Neon overflow 0 -VQRDMULH_N:3:vqrdmulhq_n_s32 Neon overflow 0 +VQRDMULH_N cumulative saturation output: +VQRDMULH_N:0:vqrdmulh_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:1:vqrdmulh_n_s32 Neon cumulative saturation 0 +VQRDMULH_N:2:vqrdmulhq_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:3:vqrdmulhq_n_s32 Neon cumulative saturation 0 VQRDMULH_N output: VQRDMULH_N:4:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffd, } @@ -3800,13 +3800,13 @@ VQRDMULH_N:5:result_int32x2 [] = { fffffffe, fffffffe, } VQRDMULH_N:6:result_int16x8 [] = { 6, 6, 6, 5, 5, 4, 4, 4, } VQRDMULH_N:7:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } -VQRDMULH_N (check mul overflow) overflow output: -VQRDMULH_N:8:vqrdmulh_n_s16 Neon overflow 1 -VQRDMULH_N:9:vqrdmulh_n_s32 Neon overflow 1 -VQRDMULH_N:10:vqrdmulhq_n_s16 Neon overflow 1 -VQRDMULH_N:11:vqrdmulhq_n_s32 Neon overflow 1 +VQRDMULH_N (check mul cumulative saturation) cumulative saturation output: +VQRDMULH_N:8:vqrdmulh_n_s16 Neon cumulative saturation 1 +VQRDMULH_N:9:vqrdmulh_n_s32 Neon cumulative saturation 1 +VQRDMULH_N:10:vqrdmulhq_n_s16 Neon cumulative saturation 1 +VQRDMULH_N:11:vqrdmulhq_n_s32 Neon cumulative saturation 1 -VQRDMULH_N (check mul overflow) output: +VQRDMULH_N (check mul cumulative saturation) output: VQRDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -3830,13 +3830,13 @@ VQRDMULH_N:31:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRDMULH_N:32:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRDMULH_N:33:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRDMULH_N (check rounding overflow) overflow output: -VQRDMULH_N:34:vqrdmulh_n_s16 Neon overflow 0 -VQRDMULH_N:35:vqrdmulh_n_s32 Neon overflow 0 -VQRDMULH_N:36:vqrdmulhq_n_s16 Neon overflow 0 -VQRDMULH_N:37:vqrdmulhq_n_s32 Neon overflow 0 +VQRDMULH_N (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH_N:34:vqrdmulh_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:35:vqrdmulh_n_s32 Neon cumulative saturation 0 +VQRDMULH_N:36:vqrdmulhq_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:37:vqrdmulhq_n_s32 Neon cumulative saturation 0 -VQRDMULH_N (check rounding overflow) output: +VQRDMULH_N (check rounding cumulative saturation) output: VQRDMULH_N:38:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH_N:39:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH_N:40:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -3860,23 +3860,23 @@ VQRDMULH_N:57:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRDMULH_N:58:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRDMULH_N:59:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHL/VQRSHLQ (with input = 0) overflow output: -VQRSHL/VQRSHLQ:0:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:1:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:2:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:3:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:4:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:5:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:6:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:7:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:8:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:9:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:10:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:11:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:12:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:13:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:14:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:15:vqrshlq_u64 Neon overflow 0 +VQRSHL/VQRSHLQ (with input = 0) cumulative saturation output: +VQRSHL/VQRSHLQ:0:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:1:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:2:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:3:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:4:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:5:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:6:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:7:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:8:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:9:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:10:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:11:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:12:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:13:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:14:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:15:vqrshlq_u64 Neon cumulative saturation 0 VQRSHL/VQRSHLQ (with input = 0) output: VQRSHL/VQRSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } @@ -3902,23 +3902,23 @@ VQRSHL/VQRSHLQ:35:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRSHL/VQRSHLQ:36:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHL/VQRSHLQ:37:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHL/VQRSHLQ (input 0 and negative shift amount) overflow output: -VQRSHL/VQRSHLQ:38:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:39:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:40:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:41:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:42:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:43:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:44:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:45:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:46:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:47:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:48:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:49:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:50:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:51:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:52:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:53:vqrshlq_u64 Neon overflow 0 +VQRSHL/VQRSHLQ (input 0 and negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:38:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:39:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:40:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:41:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:42:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:43:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:44:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:45:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:46:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:47:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:48:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:49:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:50:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:51:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:52:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:53:vqrshlq_u64 Neon cumulative saturation 0 VQRSHL/VQRSHLQ (input 0 and negative shift amount) output: VQRSHL/VQRSHLQ:54:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } @@ -3944,23 +3944,23 @@ VQRSHL/VQRSHLQ:73:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRSHL/VQRSHLQ:74:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHL/VQRSHLQ:75:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHL/VQRSHLQ overflow output: -VQRSHL/VQRSHLQ:76:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:77:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:78:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:79:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:80:vqrshl_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:81:vqrshl_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:82:vqrshl_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:83:vqrshl_u64 Neon overflow 1 -VQRSHL/VQRSHLQ:84:vqrshlq_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:85:vqrshlq_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:86:vqrshlq_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:87:vqrshlq_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:88:vqrshlq_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:89:vqrshlq_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:90:vqrshlq_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:91:vqrshlq_u64 Neon overflow 1 +VQRSHL/VQRSHLQ cumulative saturation output: +VQRSHL/VQRSHLQ:76:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:77:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:78:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:79:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:80:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:81:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:82:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:83:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:84:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:85:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:86:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:87:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:88:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:89:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:90:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:91:vqrshlq_u64 Neon cumulative saturation 1 VQRSHL/VQRSHLQ output: VQRSHL/VQRSHLQ:92:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } @@ -3986,23 +3986,23 @@ VQRSHL/VQRSHLQ:111:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQRSHL/VQRSHLQ:112:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHL/VQRSHLQ:113:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHL/VQRSHLQ (negative shift amount) overflow output: -VQRSHL/VQRSHLQ:114:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:115:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:116:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:117:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:118:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:119:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:120:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:121:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:122:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:123:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:124:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:125:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:126:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:127:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:128:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:129:vqrshlq_u64 Neon overflow 0 +VQRSHL/VQRSHLQ (negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:114:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:115:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:116:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:117:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:118:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:119:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:120:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:121:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:122:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:123:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:124:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:125:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:126:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:127:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:128:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:129:vqrshlq_u64 Neon cumulative saturation 0 VQRSHL/VQRSHLQ (negative shift amount) output: VQRSHL/VQRSHLQ:130:result_int8x8 [] = { fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe, } @@ -4028,25 +4028,25 @@ VQRSHL/VQRSHLQ:149:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQRSHL/VQRSHLQ:150:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHL/VQRSHLQ:151:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHL/VQRSHLQ (checking overflow: shift by -1) overflow output: -VQRSHL/VQRSHLQ:152:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:153:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:154:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:155:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:156:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:157:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:158:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:159:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:160:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:161:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:162:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:163:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:164:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:165:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:166:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:167:vqrshlq_u64 Neon overflow 0 - -VQRSHL/VQRSHLQ (checking overflow: shift by -1) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) cumulative saturation output: +VQRSHL/VQRSHLQ:152:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:153:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:154:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:155:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:156:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:157:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:158:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:159:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:160:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:161:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:162:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:163:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:164:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:165:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:166:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:167:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) output: VQRSHL/VQRSHLQ:168:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } VQRSHL/VQRSHLQ:169:result_int16x4 [] = { 4000, 4000, 4000, 4000, } VQRSHL/VQRSHLQ:170:result_int32x2 [] = { 40000000, 40000000, } @@ -4070,25 +4070,25 @@ VQRSHL/VQRSHLQ:187:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQRSHL/VQRSHLQ:188:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHL/VQRSHLQ:189:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHL/VQRSHLQ (checking overflow: shift by -3) overflow output: -VQRSHL/VQRSHLQ:190:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:191:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:192:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:193:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:194:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:195:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:196:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:197:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:198:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:199:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:200:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:201:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:202:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:203:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:204:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:205:vqrshlq_u64 Neon overflow 0 - -VQRSHL/VQRSHLQ (checking overflow: shift by -3) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) cumulative saturation output: +VQRSHL/VQRSHLQ:190:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:191:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:192:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:193:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:194:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:195:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:196:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:197:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:198:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:199:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:200:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:201:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:202:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:203:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:204:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:205:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) output: VQRSHL/VQRSHLQ:206:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } VQRSHL/VQRSHLQ:207:result_int16x4 [] = { 1000, 1000, 1000, 1000, } VQRSHL/VQRSHLQ:208:result_int32x2 [] = { 10000000, 10000000, } @@ -4112,25 +4112,25 @@ VQRSHL/VQRSHLQ:225:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQRSHL/VQRSHLQ:226:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHL/VQRSHLQ:227:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHL/VQRSHLQ (checking overflow: large shift amount) overflow output: -VQRSHL/VQRSHLQ:228:vqrshl_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:229:vqrshl_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:230:vqrshl_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:231:vqrshl_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:232:vqrshl_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:233:vqrshl_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:234:vqrshl_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:235:vqrshl_u64 Neon overflow 1 -VQRSHL/VQRSHLQ:236:vqrshlq_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:237:vqrshlq_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:238:vqrshlq_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:239:vqrshlq_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:240:vqrshlq_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:241:vqrshlq_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:242:vqrshlq_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:243:vqrshlq_u64 Neon overflow 1 - -VQRSHL/VQRSHLQ (checking overflow: large shift amount) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:228:vqrshl_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:229:vqrshl_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:230:vqrshl_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:231:vqrshl_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:232:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:233:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:234:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:235:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:236:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:237:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:238:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:239:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:240:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:241:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:242:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:243:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) output: VQRSHL/VQRSHLQ:244:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } VQRSHL/VQRSHLQ:245:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRSHL/VQRSHLQ:246:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -4154,25 +4154,25 @@ VQRSHL/VQRSHLQ:263:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQRSHL/VQRSHLQ:264:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHL/VQRSHLQ:265:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHL/VQRSHLQ (checking overflow: large shift amount with negative input) overflow output: -VQRSHL/VQRSHLQ:266:vqrshl_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:267:vqrshl_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:268:vqrshl_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:269:vqrshl_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:270:vqrshl_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:271:vqrshl_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:272:vqrshl_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:273:vqrshl_u64 Neon overflow 1 -VQRSHL/VQRSHLQ:274:vqrshlq_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:275:vqrshlq_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:276:vqrshlq_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:277:vqrshlq_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:278:vqrshlq_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:279:vqrshlq_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:280:vqrshlq_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:281:vqrshlq_u64 Neon overflow 1 - -VQRSHL/VQRSHLQ (checking overflow: large shift amount with negative input) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) cumulative saturation output: +VQRSHL/VQRSHLQ:266:vqrshl_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:267:vqrshl_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:268:vqrshl_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:269:vqrshl_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:270:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:271:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:272:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:273:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:274:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:275:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:276:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:277:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:278:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:279:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:280:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:281:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) output: VQRSHL/VQRSHLQ:282:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } VQRSHL/VQRSHLQ:283:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } VQRSHL/VQRSHLQ:284:result_int32x2 [] = { 80000000, 80000000, } @@ -4196,25 +4196,25 @@ VQRSHL/VQRSHLQ:301:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQRSHL/VQRSHLQ:302:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHL/VQRSHLQ:303:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHL/VQRSHLQ (checking overflow: large negative shift amount) overflow output: -VQRSHL/VQRSHLQ:304:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:305:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:306:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:307:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:308:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:309:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:310:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:311:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:312:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:313:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:314:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:315:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:316:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:317:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:318:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:319:vqrshlq_u64 Neon overflow 0 - -VQRSHL/VQRSHLQ (checking overflow: large negative shift amount) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:304:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:305:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:306:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:307:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:308:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:309:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:310:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:311:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:312:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:313:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:314:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:315:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:316:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:317:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:318:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:319:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) output: VQRSHL/VQRSHLQ:320:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } VQRSHL/VQRSHLQ:321:result_int16x4 [] = { 0, 0, 0, 0, } VQRSHL/VQRSHLQ:322:result_int32x2 [] = { 0, 0, } @@ -4238,25 +4238,25 @@ VQRSHL/VQRSHLQ:339:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQRSHL/VQRSHLQ:340:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHL/VQRSHLQ:341:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHL/VQRSHLQ (checking overflow: large shift amount with 0 input) overflow output: -VQRSHL/VQRSHLQ:342:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:343:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:344:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:345:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:346:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:347:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:348:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:349:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:350:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:351:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:352:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:353:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:354:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:355:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:356:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:357:vqrshlq_u64 Neon overflow 0 - -VQRSHL/VQRSHLQ (checking overflow: large shift amount with 0 input) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) cumulative saturation output: +VQRSHL/VQRSHLQ:342:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:343:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:344:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:345:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:346:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:347:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:348:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:349:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:350:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:351:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:352:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:353:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:354:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:355:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:356:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:357:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) output: VQRSHL/VQRSHLQ:358:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } VQRSHL/VQRSHLQ:359:result_int16x4 [] = { 0, 0, 0, 0, } VQRSHL/VQRSHLQ:360:result_int32x2 [] = { 0, 0, } @@ -5632,13 +5632,13 @@ VMVN/VMVNQ:19:result_poly8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1 VMVN/VMVNQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VMVN/VMVNQ:21:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQMOVN overflow output: -VQMOVN:0:vqmovn_s16 Neon overflow 0 -VQMOVN:1:vqmovn_s32 Neon overflow 0 -VQMOVN:2:vqmovn_s64 Neon overflow 0 -VQMOVN:3:vqmovn_u16 Neon overflow 0 -VQMOVN:4:vqmovn_u32 Neon overflow 0 -VQMOVN:5:vqmovn_u64 Neon overflow 0 +VQMOVN cumulative saturation output: +VQMOVN:0:vqmovn_s16 Neon cumulative saturation 0 +VQMOVN:1:vqmovn_s32 Neon cumulative saturation 0 +VQMOVN:2:vqmovn_s64 Neon cumulative saturation 0 +VQMOVN:3:vqmovn_u16 Neon cumulative saturation 0 +VQMOVN:4:vqmovn_u32 Neon cumulative saturation 0 +VQMOVN:5:vqmovn_u64 Neon cumulative saturation 0 VQMOVN output: VQMOVN:6:result_int8x8 [] = { 12, 12, 12, 12, 12, 12, 12, 12, } @@ -5664,13 +5664,13 @@ VQMOVN:25:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQMOVN:26:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQMOVN:27:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQMOVN overflow output: -VQMOVN:28:vqmovn_s16 Neon overflow 1 -VQMOVN:29:vqmovn_s32 Neon overflow 1 -VQMOVN:30:vqmovn_s64 Neon overflow 1 -VQMOVN:31:vqmovn_u16 Neon overflow 1 -VQMOVN:32:vqmovn_u32 Neon overflow 1 -VQMOVN:33:vqmovn_u64 Neon overflow 1 +VQMOVN cumulative saturation output: +VQMOVN:28:vqmovn_s16 Neon cumulative saturation 1 +VQMOVN:29:vqmovn_s32 Neon cumulative saturation 1 +VQMOVN:30:vqmovn_s64 Neon cumulative saturation 1 +VQMOVN:31:vqmovn_u16 Neon cumulative saturation 1 +VQMOVN:32:vqmovn_u32 Neon cumulative saturation 1 +VQMOVN:33:vqmovn_u64 Neon cumulative saturation 1 VQMOVN output: VQMOVN:34:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -5696,10 +5696,10 @@ VQMOVN:53:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQMOVN:54:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQMOVN:55:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQMOVUN overflow output: -VQMOVUN:0:vqmovun_s16 Neon overflow 0 -VQMOVUN:1:vqmovun_s32 Neon overflow 0 -VQMOVUN:2:vqmovun_s64 Neon overflow 0 +VQMOVUN cumulative saturation output: +VQMOVUN:0:vqmovun_s16 Neon cumulative saturation 0 +VQMOVUN:1:vqmovun_s32 Neon cumulative saturation 0 +VQMOVUN:2:vqmovun_s64 Neon cumulative saturation 0 VQMOVUN output: VQMOVUN:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -5725,10 +5725,10 @@ VQMOVUN:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 VQMOVUN:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQMOVUN:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQMOVUN (negative input) overflow output: -VQMOVUN:25:vqmovun_s16 Neon overflow 1 -VQMOVUN:26:vqmovun_s32 Neon overflow 1 -VQMOVUN:27:vqmovun_s64 Neon overflow 1 +VQMOVUN (negative input) cumulative saturation output: +VQMOVUN:25:vqmovun_s16 Neon cumulative saturation 1 +VQMOVUN:26:vqmovun_s32 Neon cumulative saturation 1 +VQMOVUN:27:vqmovun_s64 Neon cumulative saturation 1 VQMOVUN (negative input) output: VQMOVUN:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6162,15 +6162,15 @@ VPADAL/VPADALQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VPADAL/VPADALQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VPADAL/VPADALQ:21:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHLU_N/VQSHLUQ_N (negative input) overflow output: -VQSHLU_N/VQSHLUQ_N:0:vqshlu_n_s8 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:1:vqshlu_n_s16 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:2:vqshlu_n_s32 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:3:vqshlu_n_s64 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:4:vqshluq_n_s8 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:5:vqshluq_n_s16 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:6:vqshluq_n_s32 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:7:vqshluq_n_s64 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N (negative input) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:0:vqshlu_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:1:vqshlu_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:2:vqshlu_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:3:vqshlu_n_s64 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:4:vqshluq_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:5:vqshluq_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:6:vqshluq_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:7:vqshluq_n_s64 Neon cumulative saturation 1 VQSHLU_N/VQSHLUQ_N (negative input) output: VQSHLU_N/VQSHLUQ_N:8:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6196,17 +6196,17 @@ VQSHLU_N/VQSHLUQ_N:27:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, VQSHLU_N/VQSHLUQ_N:28:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHLU_N/VQSHLUQ_N:29:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 1) overflow output: -VQSHLU_N/VQSHLUQ_N:30:vqshlu_n_s8 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:31:vqshlu_n_s16 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:32:vqshlu_n_s32 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:33:vqshlu_n_s64 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:34:vqshluq_n_s8 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:35:vqshluq_n_s16 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:36:vqshluq_n_s32 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:37:vqshluq_n_s64 Neon overflow 0 - -VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 1) output: +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:30:vqshlu_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:31:vqshlu_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:32:vqshlu_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:33:vqshlu_n_s64 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:34:vqshluq_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:35:vqshluq_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:36:vqshluq_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:37:vqshluq_n_s64 Neon cumulative saturation 0 + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) output: VQSHLU_N/VQSHLUQ_N:38:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQSHLU_N/VQSHLUQ_N:39:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQSHLU_N/VQSHLUQ_N:40:result_int32x2 [] = { 33333333, 33333333, } @@ -6230,17 +6230,17 @@ VQSHLU_N/VQSHLUQ_N:57:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, VQSHLU_N/VQSHLUQ_N:58:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHLU_N/VQSHLUQ_N:59:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 2) overflow output: -VQSHLU_N/VQSHLUQ_N:60:vqshlu_n_s8 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:61:vqshlu_n_s16 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:62:vqshlu_n_s32 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:63:vqshlu_n_s64 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:64:vqshluq_n_s8 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:65:vqshluq_n_s16 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:66:vqshluq_n_s32 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:67:vqshluq_n_s64 Neon overflow 1 - -VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 2) output: +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:60:vqshlu_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:61:vqshlu_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:62:vqshlu_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:63:vqshlu_n_s64 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:64:vqshluq_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:65:vqshluq_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:66:vqshluq_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:67:vqshluq_n_s64 Neon cumulative saturation 1 + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) output: VQSHLU_N/VQSHLUQ_N:68:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQSHLU_N/VQSHLUQ_N:69:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQSHLU_N/VQSHLUQ_N:70:result_int32x2 [] = { 33333333, 33333333, } @@ -6264,15 +6264,15 @@ VQSHLU_N/VQSHLUQ_N:87:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, VQSHLU_N/VQSHLUQ_N:88:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHLU_N/VQSHLUQ_N:89:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHLU_N/VQSHLUQ_N overflow output: -VQSHLU_N/VQSHLUQ_N:90:vqshlu_n_s8 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:91:vqshlu_n_s16 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:92:vqshlu_n_s32 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:93:vqshlu_n_s64 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:94:vqshluq_n_s8 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:95:vqshluq_n_s16 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:96:vqshluq_n_s32 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:97:vqshluq_n_s64 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:90:vqshlu_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:91:vqshlu_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:92:vqshlu_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:93:vqshlu_n_s64 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:94:vqshluq_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:95:vqshluq_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:96:vqshluq_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:97:vqshluq_n_s64 Neon cumulative saturation 0 VQSHLU_N/VQSHLUQ_N output: VQSHLU_N/VQSHLUQ_N:98:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6418,13 +6418,13 @@ VCNT/VCNTQ:19:result_poly8x16 [] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 VCNT/VCNTQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VCNT/VCNTQ:21:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHRN_N overflow output: -VQSHRN_N:0:vqshrn_n_s16 Neon overflow 0 -VQSHRN_N:1:vqshrn_n_s32 Neon overflow 0 -VQSHRN_N:2:vqshrn_n_s64 Neon overflow 0 -VQSHRN_N:3:vqshrn_n_u16 Neon overflow 1 -VQSHRN_N:4:vqshrn_n_u32 Neon overflow 1 -VQSHRN_N:5:vqshrn_n_u64 Neon overflow 1 +VQSHRN_N cumulative saturation output: +VQSHRN_N:0:vqshrn_n_s16 Neon cumulative saturation 0 +VQSHRN_N:1:vqshrn_n_s32 Neon cumulative saturation 0 +VQSHRN_N:2:vqshrn_n_s64 Neon cumulative saturation 0 +VQSHRN_N:3:vqshrn_n_u16 Neon cumulative saturation 1 +VQSHRN_N:4:vqshrn_n_u32 Neon cumulative saturation 1 +VQSHRN_N:5:vqshrn_n_u64 Neon cumulative saturation 1 VQSHRN_N output: VQSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } @@ -6450,13 +6450,13 @@ VQSHRN_N:25:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQSHRN_N:26:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHRN_N:27:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHRN_N (check saturation: shift by 3) overflow output: -VQSHRN_N:28:vqshrn_n_s16 Neon overflow 1 -VQSHRN_N:29:vqshrn_n_s32 Neon overflow 1 -VQSHRN_N:30:vqshrn_n_s64 Neon overflow 1 -VQSHRN_N:31:vqshrn_n_u16 Neon overflow 1 -VQSHRN_N:32:vqshrn_n_u32 Neon overflow 1 -VQSHRN_N:33:vqshrn_n_u64 Neon overflow 1 +VQSHRN_N (check saturation: shift by 3) cumulative saturation output: +VQSHRN_N:28:vqshrn_n_s16 Neon cumulative saturation 1 +VQSHRN_N:29:vqshrn_n_s32 Neon cumulative saturation 1 +VQSHRN_N:30:vqshrn_n_s64 Neon cumulative saturation 1 +VQSHRN_N:31:vqshrn_n_u16 Neon cumulative saturation 1 +VQSHRN_N:32:vqshrn_n_u32 Neon cumulative saturation 1 +VQSHRN_N:33:vqshrn_n_u64 Neon cumulative saturation 1 VQSHRN_N (check saturation: shift by 3) output: VQSHRN_N:34:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -6482,13 +6482,13 @@ VQSHRN_N:53:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 3 VQSHRN_N:54:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHRN_N:55:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHRN_N (check saturation: shift by max) overflow output: -VQSHRN_N:56:vqshrn_n_s16 Neon overflow 0 -VQSHRN_N:57:vqshrn_n_s32 Neon overflow 0 -VQSHRN_N:58:vqshrn_n_s64 Neon overflow 0 -VQSHRN_N:59:vqshrn_n_u16 Neon overflow 0 -VQSHRN_N:60:vqshrn_n_u32 Neon overflow 0 -VQSHRN_N:61:vqshrn_n_u64 Neon overflow 0 +VQSHRN_N (check saturation: shift by max) cumulative saturation output: +VQSHRN_N:56:vqshrn_n_s16 Neon cumulative saturation 0 +VQSHRN_N:57:vqshrn_n_s32 Neon cumulative saturation 0 +VQSHRN_N:58:vqshrn_n_s64 Neon cumulative saturation 0 +VQSHRN_N:59:vqshrn_n_u16 Neon cumulative saturation 0 +VQSHRN_N:60:vqshrn_n_u32 Neon cumulative saturation 0 +VQSHRN_N:61:vqshrn_n_u64 Neon cumulative saturation 0 VQSHRN_N (check saturation: shift by max) output: VQSHRN_N:62:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -6562,10 +6562,10 @@ VPMIN:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VPMIN:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VPMIN:21:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHRUN_N (negative input) overflow output: -VQSHRUN_N:0:vqshrun_n_s16 Neon overflow 1 -VQSHRUN_N:1:vqshrun_n_s32 Neon overflow 1 -VQSHRUN_N:2:vqshrun_n_s64 Neon overflow 1 +VQSHRUN_N (negative input) cumulative saturation output: +VQSHRUN_N:0:vqshrun_n_s16 Neon cumulative saturation 1 +VQSHRUN_N:1:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:2:vqshrun_n_s64 Neon cumulative saturation 1 VQSHRUN_N (negative input) output: VQSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6591,12 +6591,12 @@ VQSHRUN_N:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQSHRUN_N:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHRUN_N:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHRUN_N (check saturation/overflow) overflow output: -VQSHRUN_N:25:vqshrun_n_s16 Neon overflow 1 -VQSHRUN_N:26:vqshrun_n_s32 Neon overflow 1 -VQSHRUN_N:27:vqshrun_n_s64 Neon overflow 1 +VQSHRUN_N (check cumulative saturation) cumulative saturation output: +VQSHRUN_N:25:vqshrun_n_s16 Neon cumulative saturation 1 +VQSHRUN_N:26:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:27:vqshrun_n_s64 Neon cumulative saturation 1 -VQSHRUN_N (check saturation/overflow) output: +VQSHRUN_N (check cumulative saturation) output: VQSHRUN_N:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQSHRUN_N:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQSHRUN_N:30:result_int32x2 [] = { 33333333, 33333333, } @@ -6620,10 +6620,10 @@ VQSHRUN_N:47:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQSHRUN_N:48:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHRUN_N:49:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQSHRUN_N overflow output: -VQSHRUN_N:50:vqshrun_n_s16 Neon overflow 0 -VQSHRUN_N:51:vqshrun_n_s32 Neon overflow 1 -VQSHRUN_N:52:vqshrun_n_s64 Neon overflow 0 +VQSHRUN_N cumulative saturation output: +VQSHRUN_N:50:vqshrun_n_s16 Neon cumulative saturation 0 +VQSHRUN_N:51:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:52:vqshrun_n_s64 Neon cumulative saturation 0 VQSHRUN_N output: VQSHRUN_N:53:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6649,10 +6649,10 @@ VQSHRUN_N:72:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQSHRUN_N:73:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQSHRUN_N:74:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHRUN_N (negative input) overflow output: -VQRSHRUN_N:0:vqrshrun_n_s16 Neon overflow 0 -VQRSHRUN_N:1:vqrshrun_n_s32 Neon overflow 0 -VQRSHRUN_N:2:vqrshrun_n_s64 Neon overflow 1 +VQRSHRUN_N (negative input) cumulative saturation output: +VQRSHRUN_N:0:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:1:vqrshrun_n_s32 Neon cumulative saturation 0 +VQRSHRUN_N:2:vqrshrun_n_s64 Neon cumulative saturation 1 VQRSHRUN_N (negative input) output: VQRSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6678,12 +6678,12 @@ VQRSHRUN_N:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRSHRUN_N:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHRUN_N:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHRUN_N (check saturation/overflow: shift by 1) overflow output: -VQRSHRUN_N:25:vqrshrun_n_s16 Neon overflow 1 -VQRSHRUN_N:26:vqrshrun_n_s32 Neon overflow 1 -VQRSHRUN_N:27:vqrshrun_n_s64 Neon overflow 1 +VQRSHRUN_N (check cumulative saturation: shift by 1) cumulative saturation output: +VQRSHRUN_N:25:vqrshrun_n_s16 Neon cumulative saturation 1 +VQRSHRUN_N:26:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:27:vqrshrun_n_s64 Neon cumulative saturation 1 -VQRSHRUN_N (check saturation/overflow: shift by 1) output: +VQRSHRUN_N (check cumulative saturation: shift by 1) output: VQRSHRUN_N:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRSHRUN_N:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQRSHRUN_N:30:result_int32x2 [] = { 33333333, 33333333, } @@ -6707,12 +6707,12 @@ VQRSHRUN_N:47:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRSHRUN_N:48:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHRUN_N:49:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHRUN_N (check saturation/overflow: shift by max, positive input) overflow output: -VQRSHRUN_N:50:vqrshrun_n_s16 Neon overflow 0 -VQRSHRUN_N:51:vqrshrun_n_s32 Neon overflow 0 -VQRSHRUN_N:52:vqrshrun_n_s64 Neon overflow 0 +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) cumulative saturation output: +VQRSHRUN_N:50:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:51:vqrshrun_n_s32 Neon cumulative saturation 0 +VQRSHRUN_N:52:vqrshrun_n_s64 Neon cumulative saturation 0 -VQRSHRUN_N (check saturation/overflow: shift by max, positive input) output: +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) output: VQRSHRUN_N:53:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRSHRUN_N:54:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQRSHRUN_N:55:result_int32x2 [] = { 33333333, 33333333, } @@ -6736,12 +6736,12 @@ VQRSHRUN_N:72:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRSHRUN_N:73:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHRUN_N:74:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHRUN_N (check saturation/overflow: shift by max, negative input) overflow output: -VQRSHRUN_N:75:vqrshrun_n_s16 Neon overflow 1 -VQRSHRUN_N:76:vqrshrun_n_s32 Neon overflow 1 -VQRSHRUN_N:77:vqrshrun_n_s64 Neon overflow 1 +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) cumulative saturation output: +VQRSHRUN_N:75:vqrshrun_n_s16 Neon cumulative saturation 1 +VQRSHRUN_N:76:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:77:vqrshrun_n_s64 Neon cumulative saturation 1 -VQRSHRUN_N (check saturation/overflow: shift by max, negative input) output: +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) output: VQRSHRUN_N:78:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRSHRUN_N:79:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQRSHRUN_N:80:result_int32x2 [] = { 33333333, 33333333, } @@ -6765,10 +6765,10 @@ VQRSHRUN_N:97:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, VQRSHRUN_N:98:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } VQRSHRUN_N:99:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } -VQRSHRUN_N overflow output: -VQRSHRUN_N:100:vqrshrun_n_s16 Neon overflow 0 -VQRSHRUN_N:101:vqrshrun_n_s32 Neon overflow 1 -VQRSHRUN_N:102:vqrshrun_n_s64 Neon overflow 0 +VQRSHRUN_N cumulative saturation output: +VQRSHRUN_N:100:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:101:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:102:vqrshrun_n_s64 Neon cumulative saturation 0 VQRSHRUN_N output: VQRSHRUN_N:103:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } diff --git a/ref-rvct-neon.txt b/ref-rvct-neon.txt index 2dd22e8..5915ec2 100644 --- a/ref-rvct-neon.txt +++ b/ref-rvct-neon.txt @@ -367,9 +367,9 @@ VGET_LOW:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VGET_LOW:22:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VGET_LOW:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL_LANE overflow output: -VQDMLAL_LANE:0:vqdmlal_lane_s16 Neon overflow 0 -VQDMLAL_LANE:1:vqdmlal_lane_s32 Neon overflow 0 +VQDMLAL_LANE cumulative saturation output: +VQDMLAL_LANE:0:vqdmlal_lane_s16 Neon cumulative saturation 0 +VQDMLAL_LANE:1:vqdmlal_lane_s32 Neon cumulative saturation 0 VQDMLAL_LANE output: VQDMLAL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -397,9 +397,9 @@ VQDMLAL_LANE:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMLAL_LANE:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL_LANE:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL_LANE (mul with input=0) overflow output: -VQDMLAL_LANE:26:vqdmlal_lane_s16 Neon overflow 0 -VQDMLAL_LANE:27:vqdmlal_lane_s32 Neon overflow 0 +VQDMLAL_LANE (mul with input=0) cumulative saturation output: +VQDMLAL_LANE:26:vqdmlal_lane_s16 Neon cumulative saturation 0 +VQDMLAL_LANE:27:vqdmlal_lane_s32 Neon cumulative saturation 0 VQDMLAL_LANE (mul with input=0) output: VQDMLAL_LANE:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -427,11 +427,11 @@ VQDMLAL_LANE:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMLAL_LANE:50:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL_LANE:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL_LANE (check mul overflow) overflow output: -VQDMLAL_LANE:52:vqdmlal_lane_s16 Neon overflow 1 -VQDMLAL_LANE:53:vqdmlal_lane_s32 Neon overflow 1 +VQDMLAL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMLAL_LANE:52:vqdmlal_lane_s16 Neon cumulative saturation 1 +VQDMLAL_LANE:53:vqdmlal_lane_s32 Neon cumulative saturation 1 -VQDMLAL_LANE (check mul overflow) output: +VQDMLAL_LANE (check mul cumulative saturation) output: VQDMLAL_LANE:54:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLAL_LANE:55:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLAL_LANE:56:result_int32x2 [] = { 33333333, 33333333, } @@ -457,9 +457,9 @@ VQDMLAL_LANE:75:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMLAL_LANE:76:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL_LANE:77:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL_LANE overflow output: -VQDMLSL_LANE:0:vqdmlsl_lane_s16 Neon overflow 0 -VQDMLSL_LANE:1:vqdmlsl_lane_s32 Neon overflow 0 +VQDMLSL_LANE cumulative saturation output: +VQDMLSL_LANE:0:vqdmlsl_lane_s16 Neon cumulative saturation 0 +VQDMLSL_LANE:1:vqdmlsl_lane_s32 Neon cumulative saturation 0 VQDMLSL_LANE output: VQDMLSL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -487,9 +487,9 @@ VQDMLSL_LANE:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMLSL_LANE:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLSL_LANE:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL_LANE (mul with input=0) overflow output: -VQDMLSL_LANE:26:vqdmlsl_lane_s16 Neon overflow 0 -VQDMLSL_LANE:27:vqdmlsl_lane_s32 Neon overflow 0 +VQDMLSL_LANE (mul with input=0) cumulative saturation output: +VQDMLSL_LANE:26:vqdmlsl_lane_s16 Neon cumulative saturation 0 +VQDMLSL_LANE:27:vqdmlsl_lane_s32 Neon cumulative saturation 0 VQDMLSL_LANE (mul with input=0) output: VQDMLSL_LANE:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -517,11 +517,11 @@ VQDMLSL_LANE:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMLSL_LANE:50:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLSL_LANE:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL_LANE (check mul overflow) overflow output: -VQDMLSL_LANE:52:vqdmlsl_lane_s16 Neon overflow 1 -VQDMLSL_LANE:53:vqdmlsl_lane_s32 Neon overflow 1 +VQDMLSL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMLSL_LANE:52:vqdmlsl_lane_s16 Neon cumulative saturation 1 +VQDMLSL_LANE:53:vqdmlsl_lane_s32 Neon cumulative saturation 1 -VQDMLSL_LANE (check mul overflow) output: +VQDMLSL_LANE (check mul cumulative saturation) output: VQDMLSL_LANE:54:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLSL_LANE:55:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLSL_LANE:56:result_int32x2 [] = { 33333333, 33333333, } @@ -547,9 +547,9 @@ VQDMLSL_LANE:75:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMLSL_LANE:76:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLSL_LANE:77:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL_N overflow output: -VQDMLAL_N:0:vqdmlal_n_s16 Neon overflow 0 -VQDMLAL_N:1:vqdmlal_n_s32 Neon overflow 0 +VQDMLAL_N cumulative saturation output: +VQDMLAL_N:0:vqdmlal_n_s16 Neon cumulative saturation 0 +VQDMLAL_N:1:vqdmlal_n_s32 Neon cumulative saturation 0 VQDMLAL_N output: VQDMLAL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -577,11 +577,11 @@ VQDMLAL_N:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQDMLAL_N:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL_N:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL_N (check mul overflow) overflow output: -VQDMLAL_N:26:vqdmlal_n_s16 Neon overflow 1 -VQDMLAL_N:27:vqdmlal_n_s32 Neon overflow 1 +VQDMLAL_N (check mul cumulative saturation) cumulative saturation output: +VQDMLAL_N:26:vqdmlal_n_s16 Neon cumulative saturation 1 +VQDMLAL_N:27:vqdmlal_n_s32 Neon cumulative saturation 1 -VQDMLAL_N (check mul overflow) output: +VQDMLAL_N (check mul cumulative saturation) output: VQDMLAL_N:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLAL_N:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLAL_N:30:result_int32x2 [] = { 33333333, 33333333, } @@ -607,9 +607,9 @@ VQDMLAL_N:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQDMLAL_N:50:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL_N:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL_N overflow output: -VQDMLSL_N:0:vqdmlsl_n_s16 Neon overflow 0 -VQDMLSL_N:1:vqdmlsl_n_s32 Neon overflow 0 +VQDMLSL_N cumulative saturation output: +VQDMLSL_N:0:vqdmlsl_n_s16 Neon cumulative saturation 0 +VQDMLSL_N:1:vqdmlsl_n_s32 Neon cumulative saturation 0 VQDMLSL_N output: VQDMLSL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -637,11 +637,11 @@ VQDMLSL_N:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQDMLSL_N:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLSL_N:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL_N (check mul overflow) overflow output: -VQDMLSL_N:26:vqdmlsl_n_s16 Neon overflow 1 -VQDMLSL_N:27:vqdmlsl_n_s32 Neon overflow 1 +VQDMLSL_N (check mul cumulative saturation) cumulative saturation output: +VQDMLSL_N:26:vqdmlsl_n_s16 Neon cumulative saturation 1 +VQDMLSL_N:27:vqdmlsl_n_s32 Neon cumulative saturation 1 -VQDMLSL_N (check mul overflow) output: +VQDMLSL_N (check mul cumulative saturation) output: VQDMLSL_N:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLSL_N:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLSL_N:30:result_int32x2 [] = { 33333333, 33333333, } @@ -823,13 +823,13 @@ VRSHRN_N:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VRSHRN_N:70:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VRSHRN_N:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRN_N overflow output: -VQRSHRN_N:0:vqrshrn_n_s16 Neon overflow 0 -VQRSHRN_N:1:vqrshrn_n_s32 Neon overflow 0 -VQRSHRN_N:2:vqrshrn_n_s64 Neon overflow 0 -VQRSHRN_N:3:vqrshrn_n_u16 Neon overflow 1 -VQRSHRN_N:4:vqrshrn_n_u32 Neon overflow 1 -VQRSHRN_N:5:vqrshrn_n_u64 Neon overflow 1 +VQRSHRN_N cumulative saturation output: +VQRSHRN_N:0:vqrshrn_n_s16 Neon cumulative saturation 0 +VQRSHRN_N:1:vqrshrn_n_s32 Neon cumulative saturation 0 +VQRSHRN_N:2:vqrshrn_n_s64 Neon cumulative saturation 0 +VQRSHRN_N:3:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:4:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:5:vqrshrn_n_u64 Neon cumulative saturation 1 VQRSHRN_N output: VQRSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } @@ -857,13 +857,13 @@ VQRSHRN_N:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHRN_N:28:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHRN_N:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRN_N (check saturation: shift by 3) overflow output: -VQRSHRN_N:30:vqrshrn_n_s16 Neon overflow 1 -VQRSHRN_N:31:vqrshrn_n_s32 Neon overflow 1 -VQRSHRN_N:32:vqrshrn_n_s64 Neon overflow 1 -VQRSHRN_N:33:vqrshrn_n_u16 Neon overflow 1 -VQRSHRN_N:34:vqrshrn_n_u32 Neon overflow 1 -VQRSHRN_N:35:vqrshrn_n_u64 Neon overflow 1 +VQRSHRN_N (check saturation: shift by 3) cumulative saturation output: +VQRSHRN_N:30:vqrshrn_n_s16 Neon cumulative saturation 1 +VQRSHRN_N:31:vqrshrn_n_s32 Neon cumulative saturation 1 +VQRSHRN_N:32:vqrshrn_n_s64 Neon cumulative saturation 1 +VQRSHRN_N:33:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:34:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:35:vqrshrn_n_u64 Neon cumulative saturation 1 VQRSHRN_N (check saturation: shift by 3) output: VQRSHRN_N:36:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -891,13 +891,13 @@ VQRSHRN_N:57:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHRN_N:58:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHRN_N:59:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRN_N (check saturation: shift by max) overflow output: -VQRSHRN_N:60:vqrshrn_n_s16 Neon overflow 1 -VQRSHRN_N:61:vqrshrn_n_s32 Neon overflow 1 -VQRSHRN_N:62:vqrshrn_n_s64 Neon overflow 1 -VQRSHRN_N:63:vqrshrn_n_u16 Neon overflow 1 -VQRSHRN_N:64:vqrshrn_n_u32 Neon overflow 1 -VQRSHRN_N:65:vqrshrn_n_u64 Neon overflow 1 +VQRSHRN_N (check saturation: shift by max) cumulative saturation output: +VQRSHRN_N:60:vqrshrn_n_s16 Neon cumulative saturation 1 +VQRSHRN_N:61:vqrshrn_n_s32 Neon cumulative saturation 1 +VQRSHRN_N:62:vqrshrn_n_s64 Neon cumulative saturation 1 +VQRSHRN_N:63:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:64:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:65:vqrshrn_n_u64 Neon cumulative saturation 1 VQRSHRN_N (check saturation: shift by max) output: VQRSHRN_N:66:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -976,23 +976,23 @@ vgetq_lane_p16: fff6 vgetq_lane_f32: c1500000 -VQSUB/VQSUBQ overflow output: -VQSUB/VQSUBQ:0:vqsub_s8 Neon overflow 0 -VQSUB/VQSUBQ:1:vqsub_s16 Neon overflow 0 -VQSUB/VQSUBQ:2:vqsub_s32 Neon overflow 0 -VQSUB/VQSUBQ:3:vqsub_s64 Neon overflow 0 -VQSUB/VQSUBQ:4:vqsub_u8 Neon overflow 0 -VQSUB/VQSUBQ:5:vqsub_u16 Neon overflow 0 -VQSUB/VQSUBQ:6:vqsub_u32 Neon overflow 0 -VQSUB/VQSUBQ:7:vqsub_u64 Neon overflow 0 -VQSUB/VQSUBQ:8:vqsubq_s8 Neon overflow 0 -VQSUB/VQSUBQ:9:vqsubq_s16 Neon overflow 0 -VQSUB/VQSUBQ:10:vqsubq_s32 Neon overflow 0 -VQSUB/VQSUBQ:11:vqsubq_s64 Neon overflow 0 -VQSUB/VQSUBQ:12:vqsubq_u8 Neon overflow 0 -VQSUB/VQSUBQ:13:vqsubq_u16 Neon overflow 0 -VQSUB/VQSUBQ:14:vqsubq_u32 Neon overflow 0 -VQSUB/VQSUBQ:15:vqsubq_u64 Neon overflow 0 +VQSUB/VQSUBQ cumulative saturation output: +VQSUB/VQSUBQ:0:vqsub_s8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:1:vqsub_s16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:2:vqsub_s32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:3:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:4:vqsub_u8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:5:vqsub_u16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:6:vqsub_u32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:7:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:8:vqsubq_s8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:9:vqsubq_s16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:10:vqsubq_s32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:11:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:12:vqsubq_u8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:13:vqsubq_u16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:14:vqsubq_u32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:15:vqsubq_u64 Neon cumulative saturation 0 VQSUB/VQSUBQ output: VQSUB/VQSUBQ:16:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, } @@ -1020,11 +1020,11 @@ VQSUB/VQSUBQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQSUB/VQSUBQ:38:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSUB/VQSUBQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSUB/VQSUBQ 64 bits saturation overflow output: -VQSUB/VQSUBQ:40:vqsub_s64 Neon overflow 0 -VQSUB/VQSUBQ:41:vqsub_u64 Neon overflow 0 -VQSUB/VQSUBQ:42:vqsubq_s64 Neon overflow 0 -VQSUB/VQSUBQ:43:vqsubq_u64 Neon overflow 0 +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:40:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:41:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:42:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:43:vqsubq_u64 Neon cumulative saturation 0 64 bits saturation: VQSUB/VQSUBQ:44:result_int64x1 [] = { fffffffffffffff0, } @@ -1032,33 +1032,33 @@ VQSUB/VQSUBQ:45:result_uint64x1 [] = { fffffffffffffff0, } VQSUB/VQSUBQ:46:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } VQSUB/VQSUBQ:47:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } -VQSUB/VQSUBQ 64 bits saturation overflow output: -VQSUB/VQSUBQ:48:vqsub_s64 Neon overflow 0 -VQSUB/VQSUBQ:49:vqsub_u64 Neon overflow 0 -VQSUB/VQSUBQ:50:vqsubq_s64 Neon overflow 0 -VQSUB/VQSUBQ:51:vqsubq_u64 Neon overflow 0 +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:48:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:49:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:50:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:51:vqsubq_u64 Neon cumulative saturation 0 VQSUB/VQSUBQ:52:result_int64x1 [] = { ffffffffffffffac, } VQSUB/VQSUBQ:53:result_uint64x1 [] = { ffffffffffffff68, } VQSUB/VQSUBQ:54:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad, } VQSUB/VQSUBQ:55:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69, } -VQSUB/VQSUBQ 64 bits saturation overflow output: -VQSUB/VQSUBQ:56:vqsub_s64 Neon overflow 1 -VQSUB/VQSUBQ:57:vqsub_u64 Neon overflow 1 -VQSUB/VQSUBQ:58:vqsubq_s64 Neon overflow 1 -VQSUB/VQSUBQ:59:vqsubq_u64 Neon overflow 1 +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:56:vqsub_s64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:57:vqsub_u64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:58:vqsubq_s64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:59:vqsubq_u64 Neon cumulative saturation 1 VQSUB/VQSUBQ:60:result_int64x1 [] = { 8000000000000000, } VQSUB/VQSUBQ:61:result_uint64x1 [] = { 0, } VQSUB/VQSUBQ:62:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } VQSUB/VQSUBQ:63:result_uint64x2 [] = { 0, 0, } less than 64 bits saturation: -VQSUB/VQSUBQ:64:vqsub_s8 Neon overflow 1 -VQSUB/VQSUBQ:65:vqsub_s16 Neon overflow 1 -VQSUB/VQSUBQ:66:vqsub_s32 Neon overflow 1 -VQSUB/VQSUBQ:67:vqsubq_s8 Neon overflow 1 -VQSUB/VQSUBQ:68:vqsubq_s16 Neon overflow 1 -VQSUB/VQSUBQ:69:vqsubq_s32 Neon overflow 1 +VQSUB/VQSUBQ:64:vqsub_s8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:65:vqsub_s16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:66:vqsub_s32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:67:vqsubq_s8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:68:vqsubq_s16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:69:vqsubq_s32 Neon cumulative saturation 1 VQSUB/VQSUBQ:70:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } VQSUB/VQSUBQ:71:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } VQSUB/VQSUBQ:72:result_int32x2 [] = { 80000000, 80000000, } @@ -1066,13 +1066,13 @@ VQSUB/VQSUBQ:73:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ff VQSUB/VQSUBQ:74:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } VQSUB/VQSUBQ:75:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } -VQSUB/VQSUBQ less than 64 bits saturation overflow output: -VQSUB/VQSUBQ:76:vqsub_u8 Neon overflow 1 -VQSUB/VQSUBQ:77:vqsub_u16 Neon overflow 1 -VQSUB/VQSUBQ:78:vqsub_u32 Neon overflow 1 -VQSUB/VQSUBQ:79:vqsubq_u8 Neon overflow 1 -VQSUB/VQSUBQ:80:vqsubq_u16 Neon overflow 1 -VQSUB/VQSUBQ:81:vqsubq_u32 Neon overflow 1 +VQSUB/VQSUBQ less than 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:76:vqsub_u8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:77:vqsub_u16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:78:vqsub_u32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:79:vqsubq_u8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:80:vqsubq_u16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:81:vqsubq_u32 Neon cumulative saturation 1 VQSUB/VQSUBQ:82:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } VQSUB/VQSUBQ:83:result_uint16x4 [] = { 0, 0, 0, 0, } VQSUB/VQSUBQ:84:result_uint32x2 [] = { 0, 0, } @@ -1080,11 +1080,11 @@ VQSUB/VQSUBQ:85:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, VQSUB/VQSUBQ:86:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } VQSUB/VQSUBQ:87:result_uint32x4 [] = { 0, 0, 0, 0, } -VQDMULH overflow output: -VQDMULH:0:vqdmulh_s16 Neon overflow 0 -VQDMULH:1:vqdmulh_s32 Neon overflow 0 -VQDMULH:2:vqdmulhq_s16 Neon overflow 0 -VQDMULH:3:vqdmulhq_s32 Neon overflow 0 +VQDMULH cumulative saturation output: +VQDMULH:0:vqdmulh_s16 Neon cumulative saturation 0 +VQDMULH:1:vqdmulh_s32 Neon cumulative saturation 0 +VQDMULH:2:vqdmulhq_s16 Neon cumulative saturation 0 +VQDMULH:3:vqdmulhq_s32 Neon cumulative saturation 0 VQDMULH output: VQDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1112,11 +1112,11 @@ VQDMULH:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMULH:26:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULH:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULH overflow output: -VQDMULH:28:vqdmulh_s16 Neon overflow 1 -VQDMULH:29:vqdmulh_s32 Neon overflow 1 -VQDMULH:30:vqdmulhq_s16 Neon overflow 1 -VQDMULH:31:vqdmulhq_s32 Neon overflow 1 +VQDMULH cumulative saturation output: +VQDMULH:28:vqdmulh_s16 Neon cumulative saturation 1 +VQDMULH:29:vqdmulh_s32 Neon cumulative saturation 1 +VQDMULH:30:vqdmulhq_s16 Neon cumulative saturation 1 +VQDMULH:31:vqdmulhq_s32 Neon cumulative saturation 1 VQDMULH output: VQDMULH:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1144,11 +1144,11 @@ VQDMULH:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMULH:54:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULH:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULH_LANE overflow output: -VQDMULH_LANE:0:vqdmulh_lane_s16 Neon overflow 0 -VQDMULH_LANE:1:vqdmulh_lane_s32 Neon overflow 0 -VQDMULH_LANE:2:vqdmulhq_lane_s16 Neon overflow 0 -VQDMULH_LANE:3:vqdmulhq_lane_s32 Neon overflow 0 +VQDMULH_LANE cumulative saturation output: +VQDMULH_LANE:0:vqdmulh_lane_s16 Neon cumulative saturation 0 +VQDMULH_LANE:1:vqdmulh_lane_s32 Neon cumulative saturation 0 +VQDMULH_LANE:2:vqdmulhq_lane_s16 Neon cumulative saturation 0 +VQDMULH_LANE:3:vqdmulhq_lane_s32 Neon cumulative saturation 0 VQDMULH_LANE output: VQDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1176,13 +1176,13 @@ VQDMULH_LANE:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMULH_LANE:26:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULH_LANE:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULH_LANE (check mul overflow) overflow output: -VQDMULH_LANE:28:vqdmulh_lane_s16 Neon overflow 1 -VQDMULH_LANE:29:vqdmulh_lane_s32 Neon overflow 1 -VQDMULH_LANE:30:vqdmulhq_lane_s16 Neon overflow 1 -VQDMULH_LANE:31:vqdmulhq_lane_s32 Neon overflow 1 +VQDMULH_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMULH_LANE:28:vqdmulh_lane_s16 Neon cumulative saturation 1 +VQDMULH_LANE:29:vqdmulh_lane_s32 Neon cumulative saturation 1 +VQDMULH_LANE:30:vqdmulhq_lane_s16 Neon cumulative saturation 1 +VQDMULH_LANE:31:vqdmulhq_lane_s32 Neon cumulative saturation 1 -VQDMULH_LANE (check mul overflow) output: +VQDMULH_LANE (check mul cumulative saturation) output: VQDMULH_LANE:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMULH_LANE:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQDMULH_LANE:34:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -1208,11 +1208,11 @@ VQDMULH_LANE:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQDMULH_LANE:54:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULH_LANE:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULH_N overflow output: -VQDMULH_N:0:vqdmulh_n_s16 Neon overflow 0 -VQDMULH_N:1:vqdmulh_n_s32 Neon overflow 0 -VQDMULH_N:2:vqdmulhq_n_s16 Neon overflow 0 -VQDMULH_N:3:vqdmulhq_n_s32 Neon overflow 0 +VQDMULH_N cumulative saturation output: +VQDMULH_N:0:vqdmulh_n_s16 Neon cumulative saturation 0 +VQDMULH_N:1:vqdmulh_n_s32 Neon cumulative saturation 0 +VQDMULH_N:2:vqdmulhq_n_s16 Neon cumulative saturation 0 +VQDMULH_N:3:vqdmulhq_n_s32 Neon cumulative saturation 0 VQDMULH_N output: VQDMULH_N:4:result_int16x4 [] = { 19, 19, 19, 19, } @@ -1220,13 +1220,13 @@ VQDMULH_N:5:result_int32x2 [] = { 4, 4, } VQDMULH_N:6:result_int16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } VQDMULH_N:7:result_int32x4 [] = { a, a, a, a, } -VQDMULH_N (check mul overflow) overflow output: -VQDMULH_N:8:vqdmulh_n_s16 Neon overflow 1 -VQDMULH_N:9:vqdmulh_n_s32 Neon overflow 1 -VQDMULH_N:10:vqdmulhq_n_s16 Neon overflow 1 -VQDMULH_N:11:vqdmulhq_n_s32 Neon overflow 1 +VQDMULH_N (check mul cumulative saturation) cumulative saturation output: +VQDMULH_N:8:vqdmulh_n_s16 Neon cumulative saturation 1 +VQDMULH_N:9:vqdmulh_n_s32 Neon cumulative saturation 1 +VQDMULH_N:10:vqdmulhq_n_s16 Neon cumulative saturation 1 +VQDMULH_N:11:vqdmulhq_n_s32 Neon cumulative saturation 1 -VQDMULH_N (check mul overflow) output: +VQDMULH_N (check mul cumulative saturation) output: VQDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -1252,9 +1252,9 @@ VQDMULH_N:33:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQDMULH_N:34:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULH_N:35:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULL overflow output: -VQDMULL:0:vqdmull_s16 Neon overflow 0 -VQDMULL:1:vqdmull_s32 Neon overflow 0 +VQDMULL cumulative saturation output: +VQDMULL:0:vqdmull_s16 Neon cumulative saturation 0 +VQDMULL:1:vqdmull_s32 Neon cumulative saturation 0 VQDMULL output: VQDMULL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1282,11 +1282,11 @@ VQDMULL:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMULL:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULL:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULL (check mul overflow) overflow output: -VQDMULL:26:vqdmull_s16 Neon overflow 1 -VQDMULL:27:vqdmull_s32 Neon overflow 1 +VQDMULL (check mul cumulative saturation) cumulative saturation output: +VQDMULL:26:vqdmull_s16 Neon cumulative saturation 1 +VQDMULL:27:vqdmull_s32 Neon cumulative saturation 1 -VQDMULL (check mul overflow) output: +VQDMULL (check mul cumulative saturation) output: VQDMULL:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMULL:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMULL:30:result_int32x2 [] = { 33333333, 33333333, } @@ -1312,9 +1312,9 @@ VQDMULL:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMULL:50:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMULL:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL overflow output: -VQDMLAL:0:vqdmlal_s16 Neon overflow 0 -VQDMLAL:1:vqdmlal_s32 Neon overflow 0 +VQDMLAL cumulative saturation output: +VQDMLAL:0:vqdmlal_s16 Neon cumulative saturation 0 +VQDMLAL:1:vqdmlal_s32 Neon cumulative saturation 0 VQDMLAL output: VQDMLAL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1342,11 +1342,11 @@ VQDMLAL:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMLAL:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLAL (check mul overflow) overflow output: -VQDMLAL:26:vqdmlal_s16 Neon overflow 1 -VQDMLAL:27:vqdmlal_s32 Neon overflow 1 +VQDMLAL (check mul cumulative saturation) cumulative saturation output: +VQDMLAL:26:vqdmlal_s16 Neon cumulative saturation 1 +VQDMLAL:27:vqdmlal_s32 Neon cumulative saturation 1 -VQDMLAL (check mul overflow) output: +VQDMLAL (check mul cumulative saturation) output: VQDMLAL:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLAL:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLAL:30:result_int32x2 [] = { 33333333, 33333333, } @@ -1372,9 +1372,9 @@ VQDMLAL:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMLAL:50:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLAL:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL overflow output: -VQDMLSL:0:vqdmlsl_s16 Neon overflow 0 -VQDMLSL:1:vqdmlsl_s32 Neon overflow 0 +VQDMLSL cumulative saturation output: +VQDMLSL:0:vqdmlsl_s16 Neon cumulative saturation 0 +VQDMLSL:1:vqdmlsl_s32 Neon cumulative saturation 0 VQDMLSL output: VQDMLSL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -1402,11 +1402,11 @@ VQDMLSL:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQDMLSL:24:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQDMLSL:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMLSL (check mul overflow) overflow output: -VQDMLSL:26:vqdmlsl_s16 Neon overflow 1 -VQDMLSL:27:vqdmlsl_s32 Neon overflow 1 +VQDMLSL (check mul cumulative saturation) cumulative saturation output: +VQDMLSL:26:vqdmlsl_s16 Neon cumulative saturation 1 +VQDMLSL:27:vqdmlsl_s32 Neon cumulative saturation 1 -VQDMLSL (check mul overflow) output: +VQDMLSL (check mul cumulative saturation) output: VQDMLSL:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQDMLSL:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQDMLSL:30:result_int32x2 [] = { 33333333, 33333333, } @@ -1696,23 +1696,23 @@ VSHL_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, VSHL_N:22:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VSHL_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (with input = 0) overflow output: -VQSHL/VQSHLQ:0:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:1:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:2:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:3:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:4:vqshl_u8 Neon overflow 0 -VQSHL/VQSHLQ:5:vqshl_u16 Neon overflow 0 -VQSHL/VQSHLQ:6:vqshl_u32 Neon overflow 0 -VQSHL/VQSHLQ:7:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:8:vqshlq_s8 Neon overflow 0 -VQSHL/VQSHLQ:9:vqshlq_s16 Neon overflow 0 -VQSHL/VQSHLQ:10:vqshlq_s32 Neon overflow 0 -VQSHL/VQSHLQ:11:vqshlq_s64 Neon overflow 0 -VQSHL/VQSHLQ:12:vqshlq_u8 Neon overflow 0 -VQSHL/VQSHLQ:13:vqshlq_u16 Neon overflow 0 -VQSHL/VQSHLQ:14:vqshlq_u32 Neon overflow 0 -VQSHL/VQSHLQ:15:vqshlq_u64 Neon overflow 0 +VQSHL/VQSHLQ (with input = 0) cumulative saturation output: +VQSHL/VQSHLQ:0:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:1:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:2:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:3:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:4:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:5:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:6:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:7:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:8:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:9:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:10:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:11:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:12:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:13:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:14:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:15:vqshlq_u64 Neon cumulative saturation 0 VQSHL/VQSHLQ (with input = 0) output: VQSHL/VQSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } @@ -1740,23 +1740,23 @@ VQSHL/VQSHLQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQSHL/VQSHLQ:38:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (input 0 and negative shift amount) overflow output: -VQSHL/VQSHLQ:40:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:41:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:42:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:43:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:44:vqshl_u8 Neon overflow 0 -VQSHL/VQSHLQ:45:vqshl_u16 Neon overflow 0 -VQSHL/VQSHLQ:46:vqshl_u32 Neon overflow 0 -VQSHL/VQSHLQ:47:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:48:vqshlq_s8 Neon overflow 0 -VQSHL/VQSHLQ:49:vqshlq_s16 Neon overflow 0 -VQSHL/VQSHLQ:50:vqshlq_s32 Neon overflow 0 -VQSHL/VQSHLQ:51:vqshlq_s64 Neon overflow 0 -VQSHL/VQSHLQ:52:vqshlq_u8 Neon overflow 0 -VQSHL/VQSHLQ:53:vqshlq_u16 Neon overflow 0 -VQSHL/VQSHLQ:54:vqshlq_u32 Neon overflow 0 -VQSHL/VQSHLQ:55:vqshlq_u64 Neon overflow 0 +VQSHL/VQSHLQ (input 0 and negative shift amount) cumulative saturation output: +VQSHL/VQSHLQ:40:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:41:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:42:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:43:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:44:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:45:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:46:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:47:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:48:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:49:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:50:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:51:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:52:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:53:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:54:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:55:vqshlq_u64 Neon cumulative saturation 0 VQSHL/VQSHLQ (input 0 and negative shift amount) output: VQSHL/VQSHLQ:56:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } @@ -1784,23 +1784,23 @@ VQSHL/VQSHLQ:77:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQSHL/VQSHLQ:78:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:79:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ overflow output: -VQSHL/VQSHLQ:80:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:81:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:82:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:83:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:84:vqshl_u8 Neon overflow 1 -VQSHL/VQSHLQ:85:vqshl_u16 Neon overflow 1 -VQSHL/VQSHLQ:86:vqshl_u32 Neon overflow 1 -VQSHL/VQSHLQ:87:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:88:vqshlq_s8 Neon overflow 1 -VQSHL/VQSHLQ:89:vqshlq_s16 Neon overflow 1 -VQSHL/VQSHLQ:90:vqshlq_s32 Neon overflow 1 -VQSHL/VQSHLQ:91:vqshlq_s64 Neon overflow 1 -VQSHL/VQSHLQ:92:vqshlq_u8 Neon overflow 1 -VQSHL/VQSHLQ:93:vqshlq_u16 Neon overflow 1 -VQSHL/VQSHLQ:94:vqshlq_u32 Neon overflow 1 -VQSHL/VQSHLQ:95:vqshlq_u64 Neon overflow 1 +VQSHL/VQSHLQ cumulative saturation output: +VQSHL/VQSHLQ:80:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:81:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:82:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:83:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:84:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:85:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:86:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:87:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:88:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:89:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:90:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:91:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:92:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:93:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:94:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:95:vqshlq_u64 Neon cumulative saturation 1 VQSHL/VQSHLQ output: VQSHL/VQSHLQ:96:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } @@ -1828,23 +1828,23 @@ VQSHL/VQSHLQ:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQSHL/VQSHLQ:118:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (negative shift amount) overflow output: -VQSHL/VQSHLQ:120:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:121:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:122:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:123:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:124:vqshl_u8 Neon overflow 0 -VQSHL/VQSHLQ:125:vqshl_u16 Neon overflow 0 -VQSHL/VQSHLQ:126:vqshl_u32 Neon overflow 0 -VQSHL/VQSHLQ:127:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:128:vqshlq_s8 Neon overflow 0 -VQSHL/VQSHLQ:129:vqshlq_s16 Neon overflow 0 -VQSHL/VQSHLQ:130:vqshlq_s32 Neon overflow 0 -VQSHL/VQSHLQ:131:vqshlq_s64 Neon overflow 0 -VQSHL/VQSHLQ:132:vqshlq_u8 Neon overflow 0 -VQSHL/VQSHLQ:133:vqshlq_u16 Neon overflow 0 -VQSHL/VQSHLQ:134:vqshlq_u32 Neon overflow 0 -VQSHL/VQSHLQ:135:vqshlq_u64 Neon overflow 0 +VQSHL/VQSHLQ (negative shift amount) cumulative saturation output: +VQSHL/VQSHLQ:120:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:121:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:122:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:123:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:124:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:125:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:126:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:127:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:128:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:129:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:130:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:131:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:132:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:133:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:134:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:135:vqshlq_u64 Neon cumulative saturation 0 VQSHL/VQSHLQ (negative shift amount) output: VQSHL/VQSHLQ:136:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } @@ -1872,23 +1872,23 @@ VQSHL/VQSHLQ:157:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQSHL/VQSHLQ:158:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:159:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (large shift amount, negative input) overflow output: -VQSHL/VQSHLQ:160:vqshl_s8 Neon overflow 1 -VQSHL/VQSHLQ:161:vqshl_s16 Neon overflow 1 -VQSHL/VQSHLQ:162:vqshl_s32 Neon overflow 1 -VQSHL/VQSHLQ:163:vqshl_s64 Neon overflow 1 -VQSHL/VQSHLQ:164:vqshl_u8 Neon overflow 1 -VQSHL/VQSHLQ:165:vqshl_u16 Neon overflow 1 -VQSHL/VQSHLQ:166:vqshl_u32 Neon overflow 1 -VQSHL/VQSHLQ:167:vqshl_u64 Neon overflow 1 -VQSHL/VQSHLQ:168:vqshlq_s8 Neon overflow 1 -VQSHL/VQSHLQ:169:vqshlq_s16 Neon overflow 1 -VQSHL/VQSHLQ:170:vqshlq_s32 Neon overflow 1 -VQSHL/VQSHLQ:171:vqshlq_s64 Neon overflow 1 -VQSHL/VQSHLQ:172:vqshlq_u8 Neon overflow 1 -VQSHL/VQSHLQ:173:vqshlq_u16 Neon overflow 1 -VQSHL/VQSHLQ:174:vqshlq_u32 Neon overflow 1 -VQSHL/VQSHLQ:175:vqshlq_u64 Neon overflow 1 +VQSHL/VQSHLQ (large shift amount, negative input) cumulative saturation output: +VQSHL/VQSHLQ:160:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:161:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:162:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:163:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:164:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:165:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:166:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:167:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:168:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:169:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:170:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:171:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:172:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:173:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:174:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:175:vqshlq_u64 Neon cumulative saturation 1 VQSHL/VQSHLQ (large shift amount, negative input) output: VQSHL/VQSHLQ:176:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } @@ -1916,25 +1916,25 @@ VQSHL/VQSHLQ:197:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQSHL/VQSHLQ:198:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:199:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (check saturation/overflow) overflow output: -VQSHL/VQSHLQ:200:vqshl_s8 Neon overflow 0 -VQSHL/VQSHLQ:201:vqshl_s16 Neon overflow 0 -VQSHL/VQSHLQ:202:vqshl_s32 Neon overflow 0 -VQSHL/VQSHLQ:203:vqshl_s64 Neon overflow 0 -VQSHL/VQSHLQ:204:vqshl_u8 Neon overflow 0 -VQSHL/VQSHLQ:205:vqshl_u16 Neon overflow 0 -VQSHL/VQSHLQ:206:vqshl_u32 Neon overflow 0 -VQSHL/VQSHLQ:207:vqshl_u64 Neon overflow 0 -VQSHL/VQSHLQ:208:vqshlq_s8 Neon overflow 0 -VQSHL/VQSHLQ:209:vqshlq_s16 Neon overflow 0 -VQSHL/VQSHLQ:210:vqshlq_s32 Neon overflow 0 -VQSHL/VQSHLQ:211:vqshlq_s64 Neon overflow 0 -VQSHL/VQSHLQ:212:vqshlq_u8 Neon overflow 0 -VQSHL/VQSHLQ:213:vqshlq_u16 Neon overflow 0 -VQSHL/VQSHLQ:214:vqshlq_u32 Neon overflow 0 -VQSHL/VQSHLQ:215:vqshlq_u64 Neon overflow 0 - -VQSHL/VQSHLQ (check saturation/overflow) output: +VQSHL/VQSHLQ (check cumulative saturation) cumulative saturation output: +VQSHL/VQSHLQ:200:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:201:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:202:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:203:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:204:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:205:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:206:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:207:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:208:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:209:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:210:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:211:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:212:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:213:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:214:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:215:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (check cumulative saturation) output: VQSHL/VQSHLQ:216:result_int8x8 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, } VQSHL/VQSHLQ:217:result_int16x4 [] = { 3fff, 3fff, 3fff, 3fff, } VQSHL/VQSHLQ:218:result_int32x2 [] = { 3fffffff, 3fffffff, } @@ -1960,23 +1960,23 @@ VQSHL/VQSHLQ:237:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQSHL/VQSHLQ:238:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:239:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (large shift amount, positive input) overflow output: -VQSHL/VQSHLQ:240:vqshl_s8 Neon overflow 1 -VQSHL/VQSHLQ:241:vqshl_s16 Neon overflow 1 -VQSHL/VQSHLQ:242:vqshl_s32 Neon overflow 1 -VQSHL/VQSHLQ:243:vqshl_s64 Neon overflow 1 -VQSHL/VQSHLQ:244:vqshl_u8 Neon overflow 1 -VQSHL/VQSHLQ:245:vqshl_u16 Neon overflow 1 -VQSHL/VQSHLQ:246:vqshl_u32 Neon overflow 1 -VQSHL/VQSHLQ:247:vqshl_u64 Neon overflow 1 -VQSHL/VQSHLQ:248:vqshlq_s8 Neon overflow 1 -VQSHL/VQSHLQ:249:vqshlq_s16 Neon overflow 1 -VQSHL/VQSHLQ:250:vqshlq_s32 Neon overflow 1 -VQSHL/VQSHLQ:251:vqshlq_s64 Neon overflow 1 -VQSHL/VQSHLQ:252:vqshlq_u8 Neon overflow 1 -VQSHL/VQSHLQ:253:vqshlq_u16 Neon overflow 1 -VQSHL/VQSHLQ:254:vqshlq_u32 Neon overflow 1 -VQSHL/VQSHLQ:255:vqshlq_u64 Neon overflow 1 +VQSHL/VQSHLQ (large shift amount, positive input) cumulative saturation output: +VQSHL/VQSHLQ:240:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:241:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:242:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:243:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:244:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:245:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:246:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:247:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:248:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:249:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:250:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:251:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:252:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:253:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:254:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:255:vqshlq_u64 Neon cumulative saturation 1 VQSHL/VQSHLQ (large shift amount, positive input) output: VQSHL/VQSHLQ:256:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -2004,23 +2004,23 @@ VQSHL/VQSHLQ:277:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQSHL/VQSHLQ:278:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:279:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL/VQSHLQ (check saturation on 64 bits) overflow output: -VQSHL/VQSHLQ:280:vqshl_s8 Neon overflow 1 -VQSHL/VQSHLQ:281:vqshl_s16 Neon overflow 1 -VQSHL/VQSHLQ:282:vqshl_s32 Neon overflow 1 -VQSHL/VQSHLQ:283:vqshl_s64 Neon overflow 1 -VQSHL/VQSHLQ:284:vqshl_u8 Neon overflow 1 -VQSHL/VQSHLQ:285:vqshl_u16 Neon overflow 1 -VQSHL/VQSHLQ:286:vqshl_u32 Neon overflow 1 -VQSHL/VQSHLQ:287:vqshl_u64 Neon overflow 1 -VQSHL/VQSHLQ:288:vqshlq_s8 Neon overflow 1 -VQSHL/VQSHLQ:289:vqshlq_s16 Neon overflow 1 -VQSHL/VQSHLQ:290:vqshlq_s32 Neon overflow 1 -VQSHL/VQSHLQ:291:vqshlq_s64 Neon overflow 1 -VQSHL/VQSHLQ:292:vqshlq_u8 Neon overflow 1 -VQSHL/VQSHLQ:293:vqshlq_u16 Neon overflow 1 -VQSHL/VQSHLQ:294:vqshlq_u32 Neon overflow 1 -VQSHL/VQSHLQ:295:vqshlq_u64 Neon overflow 1 +VQSHL/VQSHLQ (check saturation on 64 bits) cumulative saturation output: +VQSHL/VQSHLQ:280:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:281:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:282:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:283:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:284:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:285:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:286:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:287:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:288:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:289:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:290:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:291:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:292:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:293:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:294:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:295:vqshlq_u64 Neon cumulative saturation 1 VQSHL/VQSHLQ (check saturation on 64 bits) output: VQSHL/VQSHLQ:296:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -2048,23 +2048,23 @@ VQSHL/VQSHLQ:317:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQSHL/VQSHLQ:318:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL/VQSHLQ:319:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL_N/VQSHLQ_N overflow output: -VQSHL_N/VQSHLQ_N:0:vqshl_n_s8 Neon overflow 0 -VQSHL_N/VQSHLQ_N:1:vqshl_n_s16 Neon overflow 0 -VQSHL_N/VQSHLQ_N:2:vqshl_n_s32 Neon overflow 0 -VQSHL_N/VQSHLQ_N:3:vqshl_n_s64 Neon overflow 0 -VQSHL_N/VQSHLQ_N:4:vqshl_n_u8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:5:vqshl_n_u16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:6:vqshl_n_u32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:7:vqshl_n_u64 Neon overflow 1 -VQSHL_N/VQSHLQ_N:8:vqshlq_n_s8 Neon overflow 0 -VQSHL_N/VQSHLQ_N:9:vqshlq_n_s16 Neon overflow 0 -VQSHL_N/VQSHLQ_N:10:vqshlq_n_s32 Neon overflow 0 -VQSHL_N/VQSHLQ_N:11:vqshlq_n_s64 Neon overflow 0 -VQSHL_N/VQSHLQ_N:12:vqshlq_n_u8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:13:vqshlq_n_u16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:14:vqshlq_n_u32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:15:vqshlq_n_u64 Neon overflow 1 +VQSHL_N/VQSHLQ_N cumulative saturation output: +VQSHL_N/VQSHLQ_N:0:vqshl_n_s8 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:1:vqshl_n_s16 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:2:vqshl_n_s32 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:3:vqshl_n_s64 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:4:vqshl_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:5:vqshl_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:6:vqshl_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:7:vqshl_n_u64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:8:vqshlq_n_s8 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:9:vqshlq_n_s16 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:10:vqshlq_n_s32 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:11:vqshlq_n_s64 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:12:vqshlq_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:13:vqshlq_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:14:vqshlq_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:15:vqshlq_n_u64 Neon cumulative saturation 1 VQSHL_N/VQSHLQ_N output: VQSHL_N/VQSHLQ_N:16:result_int8x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, } @@ -2092,23 +2092,23 @@ VQSHL_N/VQSHLQ_N:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3 VQSHL_N/VQSHLQ_N:38:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHL_N/VQSHLQ_N:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHL_N/VQSHLQ_N (check saturation with large positive input) overflow output: -VQSHL_N/VQSHLQ_N:40:vqshl_n_s8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:41:vqshl_n_s16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:42:vqshl_n_s32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:43:vqshl_n_s64 Neon overflow 1 -VQSHL_N/VQSHLQ_N:44:vqshl_n_u8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:45:vqshl_n_u16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:46:vqshl_n_u32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:47:vqshl_n_u64 Neon overflow 1 -VQSHL_N/VQSHLQ_N:48:vqshlq_n_s8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:49:vqshlq_n_s16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:50:vqshlq_n_s32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:51:vqshlq_n_s64 Neon overflow 1 -VQSHL_N/VQSHLQ_N:52:vqshlq_n_u8 Neon overflow 1 -VQSHL_N/VQSHLQ_N:53:vqshlq_n_u16 Neon overflow 1 -VQSHL_N/VQSHLQ_N:54:vqshlq_n_u32 Neon overflow 1 -VQSHL_N/VQSHLQ_N:55:vqshlq_n_u64 Neon overflow 1 +VQSHL_N/VQSHLQ_N (check saturation with large positive input) cumulative saturation output: +VQSHL_N/VQSHLQ_N:40:vqshl_n_s8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:41:vqshl_n_s16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:42:vqshl_n_s32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:43:vqshl_n_s64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:44:vqshl_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:45:vqshl_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:46:vqshl_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:47:vqshl_n_u64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:48:vqshlq_n_s8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:49:vqshlq_n_s16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:50:vqshlq_n_s32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:51:vqshlq_n_s64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:52:vqshlq_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:53:vqshlq_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:54:vqshlq_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:55:vqshlq_n_u64 Neon cumulative saturation 1 VQSHL_N/VQSHLQ_N (check saturation with large positive input) output: VQSHL_N/VQSHLQ_N:56:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -2630,35 +2630,35 @@ VDUP_LANE/VDUP_LANEQ:21:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff VDUP_LANE/VDUP_LANEQ:22:result_float32x4 [] = { c1700000 -0x1.e000000p+3 -15, c1700000 -0x1.e000000p+3 -15, c1700000 -0x1.e000000p+3 -15, c1700000 -0x1.e000000p+3 -15, } VDUP_LANE/VDUP_LANEQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQDMULL_LANE overflow output: -VQDMULL_LANE:0:vqdmull_lane_s16 Neon overflow 0 -VQDMULL_LANE:1:vqdmull_lane_s32 Neon overflow 0 +VQDMULL_LANE cumulative saturation output: +VQDMULL_LANE:0:vqdmull_lane_s16 Neon cumulative saturation 0 +VQDMULL_LANE:1:vqdmull_lane_s32 Neon cumulative saturation 0 VQDMULL_LANE output: VQDMULL_LANE:2:result_int32x4 [] = { 8000, 8000, 8000, 8000, } VQDMULL_LANE:3:result_int64x2 [] = { 4000, 4000, } -VQDMULL_LANE (check mul overflow) overflow output: -VQDMULL_LANE:4:vqdmull_lane_s16 Neon overflow 1 -VQDMULL_LANE:5:vqdmull_lane_s32 Neon overflow 1 +VQDMULL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMULL_LANE:4:vqdmull_lane_s16 Neon cumulative saturation 1 +VQDMULL_LANE:5:vqdmull_lane_s32 Neon cumulative saturation 1 -VQDMULL_LANE (check mul overflow) output: +VQDMULL_LANE (check mul cumulative saturation) output: VQDMULL_LANE:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } VQDMULL_LANE:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } -VQDMULL_N overflow output: -VQDMULL_N:0:vqdmull_n_s16 Neon overflow 0 -VQDMULL_N:1:vqdmull_n_s32 Neon overflow 0 +VQDMULL_N cumulative saturation output: +VQDMULL_N:0:vqdmull_n_s16 Neon cumulative saturation 0 +VQDMULL_N:1:vqdmull_n_s32 Neon cumulative saturation 0 VQDMULL_N output: VQDMULL_N:2:result_int32x4 [] = { 44000, 44000, 44000, 44000, } VQDMULL_N:3:result_int64x2 [] = { aa000, aa000, } -VQDMULL_N (check mul overflow) overflow output: -VQDMULL_N:4:vqdmull_n_s16 Neon overflow 1 -VQDMULL_N:5:vqdmull_n_s32 Neon overflow 1 +VQDMULL_N (check mul cumulative saturation) cumulative saturation output: +VQDMULL_N:4:vqdmull_n_s16 Neon cumulative saturation 1 +VQDMULL_N:5:vqdmull_n_s32 Neon cumulative saturation 1 -VQDMULL_N (check mul overflow) output: +VQDMULL_N (check mul cumulative saturation) output: VQDMULL_N:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } VQDMULL_N:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } @@ -2718,23 +2718,23 @@ float32: VSUB/VSUBQ:24:result_float32x2 [] = { c00ccccd -0x1.19999a0p+1 -2.2, c00ccccd -0x1.19999a0p+1 -2.2, } VSUB/VSUBQ:25:result_float32x4 [] = { c00ccccc -0x1.1999980p+1 -2.2, c00ccccc -0x1.1999980p+1 -2.2, c00ccccc -0x1.1999980p+1 -2.2, c00ccccc -0x1.1999980p+1 -2.2, } -VQADD/VQADDQ overflow output: -VQADD/VQADDQ:0:vqadd_s8 Neon overflow 0 -VQADD/VQADDQ:1:vqadd_s16 Neon overflow 0 -VQADD/VQADDQ:2:vqadd_s32 Neon overflow 0 -VQADD/VQADDQ:3:vqadd_s64 Neon overflow 0 -VQADD/VQADDQ:4:vqadd_u8 Neon overflow 1 -VQADD/VQADDQ:5:vqadd_u16 Neon overflow 1 -VQADD/VQADDQ:6:vqadd_u32 Neon overflow 1 -VQADD/VQADDQ:7:vqadd_u64 Neon overflow 1 -VQADD/VQADDQ:8:vqaddq_s8 Neon overflow 0 -VQADD/VQADDQ:9:vqaddq_s16 Neon overflow 0 -VQADD/VQADDQ:10:vqaddq_s32 Neon overflow 0 -VQADD/VQADDQ:11:vqaddq_s64 Neon overflow 0 -VQADD/VQADDQ:12:vqaddq_u8 Neon overflow 1 -VQADD/VQADDQ:13:vqaddq_u16 Neon overflow 1 -VQADD/VQADDQ:14:vqaddq_u32 Neon overflow 1 -VQADD/VQADDQ:15:vqaddq_u64 Neon overflow 1 +VQADD/VQADDQ cumulative saturation output: +VQADD/VQADDQ:0:vqadd_s8 Neon cumulative saturation 0 +VQADD/VQADDQ:1:vqadd_s16 Neon cumulative saturation 0 +VQADD/VQADDQ:2:vqadd_s32 Neon cumulative saturation 0 +VQADD/VQADDQ:3:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:4:vqadd_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:5:vqadd_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:6:vqadd_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:7:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:8:vqaddq_s8 Neon cumulative saturation 0 +VQADD/VQADDQ:9:vqaddq_s16 Neon cumulative saturation 0 +VQADD/VQADDQ:10:vqaddq_s32 Neon cumulative saturation 0 +VQADD/VQADDQ:11:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:12:vqaddq_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:13:vqaddq_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:14:vqaddq_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:15:vqaddq_u64 Neon cumulative saturation 1 VQADD/VQADDQ output: VQADD/VQADDQ:16:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8, } @@ -2762,11 +2762,11 @@ VQADD/VQADDQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQADD/VQADDQ:38:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQADD/VQADDQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQADD/VQADDQ 64 bits saturation overflow output: -VQADD/VQADDQ:40:vqadd_s64 Neon overflow 0 -VQADD/VQADDQ:41:vqadd_u64 Neon overflow 0 -VQADD/VQADDQ:42:vqaddq_s64 Neon overflow 0 -VQADD/VQADDQ:43:vqaddq_u64 Neon overflow 0 +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:40:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:41:vqadd_u64 Neon cumulative saturation 0 +VQADD/VQADDQ:42:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:43:vqaddq_u64 Neon cumulative saturation 0 64 bits saturation: VQADD/VQADDQ:44:result_int64x1 [] = { fffffffffffffff0, } @@ -2774,33 +2774,33 @@ VQADD/VQADDQ:45:result_uint64x1 [] = { fffffffffffffff0, } VQADD/VQADDQ:46:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } VQADD/VQADDQ:47:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } -VQADD/VQADDQ 64 bits saturation overflow output: -VQADD/VQADDQ:48:vqadd_s64 Neon overflow 0 -VQADD/VQADDQ:49:vqadd_u64 Neon overflow 1 -VQADD/VQADDQ:50:vqaddq_s64 Neon overflow 0 -VQADD/VQADDQ:51:vqaddq_u64 Neon overflow 1 +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:48:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:49:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:50:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:51:vqaddq_u64 Neon cumulative saturation 1 VQADD/VQADDQ:52:result_int64x1 [] = { 34, } VQADD/VQADDQ:53:result_uint64x1 [] = { ffffffffffffffff, } VQADD/VQADDQ:54:result_int64x2 [] = { 34, 35, } VQADD/VQADDQ:55:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } -VQADD/VQADDQ 64 bits saturation overflow output: -VQADD/VQADDQ:56:vqadd_s64 Neon overflow 1 -VQADD/VQADDQ:57:vqadd_u64 Neon overflow 1 -VQADD/VQADDQ:58:vqaddq_s64 Neon overflow 1 -VQADD/VQADDQ:59:vqaddq_u64 Neon overflow 1 +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:56:vqadd_s64 Neon cumulative saturation 1 +VQADD/VQADDQ:57:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:58:vqaddq_s64 Neon cumulative saturation 1 +VQADD/VQADDQ:59:vqaddq_u64 Neon cumulative saturation 1 VQADD/VQADDQ:60:result_int64x1 [] = { 8000000000000000, } VQADD/VQADDQ:61:result_uint64x1 [] = { ffffffffffffffff, } VQADD/VQADDQ:62:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } VQADD/VQADDQ:63:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } less than 64 bits saturation: -VQADD/VQADDQ:64:vqadd_s8 Neon overflow 1 -VQADD/VQADDQ:65:vqadd_s16 Neon overflow 1 -VQADD/VQADDQ:66:vqadd_s32 Neon overflow 1 -VQADD/VQADDQ:67:vqaddq_s8 Neon overflow 1 -VQADD/VQADDQ:68:vqaddq_s16 Neon overflow 1 -VQADD/VQADDQ:69:vqaddq_s32 Neon overflow 1 +VQADD/VQADDQ:64:vqadd_s8 Neon cumulative saturation 1 +VQADD/VQADDQ:65:vqadd_s16 Neon cumulative saturation 1 +VQADD/VQADDQ:66:vqadd_s32 Neon cumulative saturation 1 +VQADD/VQADDQ:67:vqaddq_s8 Neon cumulative saturation 1 +VQADD/VQADDQ:68:vqaddq_s16 Neon cumulative saturation 1 +VQADD/VQADDQ:69:vqaddq_s32 Neon cumulative saturation 1 VQADD/VQADDQ:70:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } VQADD/VQADDQ:71:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } VQADD/VQADDQ:72:result_int32x2 [] = { 80000000, 80000000, } @@ -2808,13 +2808,13 @@ VQADD/VQADDQ:73:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ff VQADD/VQADDQ:74:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } VQADD/VQADDQ:75:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } -VQADD/VQADDQ less than 64 bits saturation overflow output: -VQADD/VQADDQ:76:vqadd_u8 Neon overflow 1 -VQADD/VQADDQ:77:vqadd_u16 Neon overflow 1 -VQADD/VQADDQ:78:vqadd_u32 Neon overflow 1 -VQADD/VQADDQ:79:vqaddq_u8 Neon overflow 1 -VQADD/VQADDQ:80:vqaddq_u16 Neon overflow 1 -VQADD/VQADDQ:81:vqaddq_u32 Neon overflow 1 +VQADD/VQADDQ less than 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:76:vqadd_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:77:vqadd_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:78:vqadd_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:79:vqaddq_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:80:vqaddq_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:81:vqaddq_u32 Neon cumulative saturation 1 VQADD/VQADDQ:82:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } VQADD/VQADDQ:83:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } VQADD/VQADDQ:84:result_uint32x2 [] = { ffffffff, ffffffff, } @@ -2852,13 +2852,13 @@ float32: VABS/VABSQ:24:result_float32x2 [] = { 40133333 0x1.2666660p+1 2.3, 40133333 0x1.2666660p+1 2.3, } VABS/VABSQ:25:result_float32x4 [] = { 4059999a 0x1.b333340p+1 3.4, 4059999a 0x1.b333340p+1 3.4, 4059999a 0x1.b333340p+1 3.4, 4059999a 0x1.b333340p+1 3.4, } -VQABS/VQABSQ overflow output: -VQABS/VQABSQ:0:vqabs_s8 Neon overflow 0 -VQABS/VQABSQ:1:vqabs_s16 Neon overflow 0 -VQABS/VQABSQ:2:vqabs_s32 Neon overflow 0 -VQABS/VQABSQ:3:vqabsq_s8 Neon overflow 0 -VQABS/VQABSQ:4:vqabsq_s16 Neon overflow 0 -VQABS/VQABSQ:5:vqabsq_s32 Neon overflow 0 +VQABS/VQABSQ cumulative saturation output: +VQABS/VQABSQ:0:vqabs_s8 Neon cumulative saturation 0 +VQABS/VQABSQ:1:vqabs_s16 Neon cumulative saturation 0 +VQABS/VQABSQ:2:vqabs_s32 Neon cumulative saturation 0 +VQABS/VQABSQ:3:vqabsq_s8 Neon cumulative saturation 0 +VQABS/VQABSQ:4:vqabsq_s16 Neon cumulative saturation 0 +VQABS/VQABSQ:5:vqabsq_s32 Neon cumulative saturation 0 VQABS/VQABSQ output: VQABS/VQABSQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } @@ -2886,13 +2886,13 @@ VQABS/VQABSQ:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQABS/VQABSQ:28:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQABS/VQABSQ:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQABS/VQABSQ overflow output: -VQABS/VQABSQ:0:vqabs_s8 Neon overflow 1 -VQABS/VQABSQ:1:vqabs_s16 Neon overflow 1 -VQABS/VQABSQ:2:vqabs_s32 Neon overflow 1 -VQABS/VQABSQ:3:vqabsq_s8 Neon overflow 1 -VQABS/VQABSQ:4:vqabsq_s16 Neon overflow 1 -VQABS/VQABSQ:5:vqabsq_s32 Neon overflow 1 +VQABS/VQABSQ cumulative saturation output: +VQABS/VQABSQ:0:vqabs_s8 Neon cumulative saturation 1 +VQABS/VQABSQ:1:vqabs_s16 Neon cumulative saturation 1 +VQABS/VQABSQ:2:vqabs_s32 Neon cumulative saturation 1 +VQABS/VQABSQ:3:vqabsq_s8 Neon cumulative saturation 1 +VQABS/VQABSQ:4:vqabsq_s16 Neon cumulative saturation 1 +VQABS/VQABSQ:5:vqabsq_s32 Neon cumulative saturation 1 VQABS/VQABSQ output: VQABS/VQABSQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -3040,13 +3040,13 @@ float32: VNEG/VNEGQ:24:result_float32x2 [] = { c0133333 -0x1.2666660p+1 -2.3, c0133333 -0x1.2666660p+1 -2.3, } VNEG/VNEGQ:25:result_float32x4 [] = { c059999a -0x1.b333340p+1 -3.4, c059999a -0x1.b333340p+1 -3.4, c059999a -0x1.b333340p+1 -3.4, c059999a -0x1.b333340p+1 -3.4, } -VQNEG/VQNEGQ overflow output: -VQNEG/VQNEGQ:0:vqneg_s8 Neon overflow 0 -VQNEG/VQNEGQ:1:vqneg_s16 Neon overflow 0 -VQNEG/VQNEGQ:2:vqneg_s32 Neon overflow 0 -VQNEG/VQNEGQ:3:vqnegq_s8 Neon overflow 0 -VQNEG/VQNEGQ:4:vqnegq_s16 Neon overflow 0 -VQNEG/VQNEGQ:5:vqnegq_s32 Neon overflow 0 +VQNEG/VQNEGQ cumulative saturation output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon cumulative saturation 0 +VQNEG/VQNEGQ:1:vqneg_s16 Neon cumulative saturation 0 +VQNEG/VQNEGQ:2:vqneg_s32 Neon cumulative saturation 0 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon cumulative saturation 0 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon cumulative saturation 0 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon cumulative saturation 0 VQNEG/VQNEGQ output: VQNEG/VQNEGQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } @@ -3074,13 +3074,13 @@ VQNEG/VQNEGQ:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQNEG/VQNEGQ:28:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQNEG/VQNEGQ:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQNEG/VQNEGQ overflow output: -VQNEG/VQNEGQ:0:vqneg_s8 Neon overflow 1 -VQNEG/VQNEGQ:1:vqneg_s16 Neon overflow 1 -VQNEG/VQNEGQ:2:vqneg_s32 Neon overflow 1 -VQNEG/VQNEGQ:3:vqnegq_s8 Neon overflow 1 -VQNEG/VQNEGQ:4:vqnegq_s16 Neon overflow 1 -VQNEG/VQNEGQ:5:vqnegq_s32 Neon overflow 1 +VQNEG/VQNEGQ cumulative saturation output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon cumulative saturation 1 +VQNEG/VQNEGQ:1:vqneg_s16 Neon cumulative saturation 1 +VQNEG/VQNEGQ:2:vqneg_s32 Neon cumulative saturation 1 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon cumulative saturation 1 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon cumulative saturation 1 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon cumulative saturation 1 VQNEG/VQNEGQ output: VQNEG/VQNEGQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -3876,11 +3876,11 @@ VREINTERPRET/VREINTERPRETQ:243:result_poly8x16 [] = { 0, cc, 80, cb, 0, cb, 80, VREINTERPRET/VREINTERPRETQ:244:result_poly16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } VREINTERPRET/VREINTERPRETQ:245:result_float32x4 [] = { cb80cc00 -0x1.0198000p+24 -1.688166e+07, ca80cb00 -0x1.0196000p+22 -4220288, c980ca00 -0x1.0194000p+20 -1055040, c880c900 -0x1.0192000p+18 -263752, } -VQRDMULH overflow output: -VQRDMULH:0:vqrdmulh_s16 Neon overflow 0 -VQRDMULH:1:vqrdmulh_s32 Neon overflow 0 -VQRDMULH:2:vqrdmulhq_s16 Neon overflow 0 -VQRDMULH:3:vqrdmulhq_s32 Neon overflow 0 +VQRDMULH cumulative saturation output: +VQRDMULH:0:vqrdmulh_s16 Neon cumulative saturation 0 +VQRDMULH:1:vqrdmulh_s32 Neon cumulative saturation 0 +VQRDMULH:2:vqrdmulhq_s16 Neon cumulative saturation 0 +VQRDMULH:3:vqrdmulhq_s32 Neon cumulative saturation 0 VQRDMULH output: VQRDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -3908,13 +3908,13 @@ VQRDMULH:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VQRDMULH:26:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH (check mul overflow) overflow output: -VQRDMULH:28:vqrdmulh_s16 Neon overflow 1 -VQRDMULH:29:vqrdmulh_s32 Neon overflow 1 -VQRDMULH:30:vqrdmulhq_s16 Neon overflow 1 -VQRDMULH:31:vqrdmulhq_s32 Neon overflow 1 +VQRDMULH (check mul cumulative saturation) cumulative saturation output: +VQRDMULH:28:vqrdmulh_s16 Neon cumulative saturation 1 +VQRDMULH:29:vqrdmulh_s32 Neon cumulative saturation 1 +VQRDMULH:30:vqrdmulhq_s16 Neon cumulative saturation 1 +VQRDMULH:31:vqrdmulhq_s32 Neon cumulative saturation 1 -VQRDMULH (check mul overflow) output: +VQRDMULH (check mul cumulative saturation) output: VQRDMULH:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH:34:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -3940,13 +3940,13 @@ VQRDMULH:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VQRDMULH:54:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH (check rounding overflow) overflow output: -VQRDMULH:56:vqrdmulh_s16 Neon overflow 0 -VQRDMULH:57:vqrdmulh_s32 Neon overflow 0 -VQRDMULH:58:vqrdmulhq_s16 Neon overflow 0 -VQRDMULH:59:vqrdmulhq_s32 Neon overflow 0 +VQRDMULH (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH:56:vqrdmulh_s16 Neon cumulative saturation 0 +VQRDMULH:57:vqrdmulh_s32 Neon cumulative saturation 0 +VQRDMULH:58:vqrdmulhq_s16 Neon cumulative saturation 0 +VQRDMULH:59:vqrdmulhq_s32 Neon cumulative saturation 0 -VQRDMULH (check rounding overflow) output: +VQRDMULH (check rounding cumulative saturation) output: VQRDMULH:60:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH:61:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH:62:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -3972,11 +3972,11 @@ VQRDMULH:81:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VQRDMULH:82:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH:83:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH_LANE overflow output: -VQRDMULH_LANE:0:vqrdmulh_lane_s16 Neon overflow 0 -VQRDMULH_LANE:1:vqrdmulh_lane_s32 Neon overflow 0 -VQRDMULH_LANE:2:vqrdmulhq_lane_s16 Neon overflow 0 -VQRDMULH_LANE:3:vqrdmulhq_lane_s32 Neon overflow 0 +VQRDMULH_LANE cumulative saturation output: +VQRDMULH_LANE:0:vqrdmulh_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:1:vqrdmulh_lane_s32 Neon cumulative saturation 0 +VQRDMULH_LANE:2:vqrdmulhq_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:3:vqrdmulhq_lane_s32 Neon cumulative saturation 0 VQRDMULH_LANE output: VQRDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -4004,13 +4004,13 @@ VQRDMULH_LANE:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQRDMULH_LANE:26:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH_LANE:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH_LANE (check mul overflow) overflow output: -VQRDMULH_LANE:28:vqrdmulh_lane_s16 Neon overflow 1 -VQRDMULH_LANE:29:vqrdmulh_lane_s32 Neon overflow 1 -VQRDMULH_LANE:30:vqrdmulhq_lane_s16 Neon overflow 1 -VQRDMULH_LANE:31:vqrdmulhq_lane_s32 Neon overflow 1 +VQRDMULH_LANE (check mul cumulative saturation) cumulative saturation output: +VQRDMULH_LANE:28:vqrdmulh_lane_s16 Neon cumulative saturation 1 +VQRDMULH_LANE:29:vqrdmulh_lane_s32 Neon cumulative saturation 1 +VQRDMULH_LANE:30:vqrdmulhq_lane_s16 Neon cumulative saturation 1 +VQRDMULH_LANE:31:vqrdmulhq_lane_s32 Neon cumulative saturation 1 -VQRDMULH_LANE (check mul overflow) output: +VQRDMULH_LANE (check mul cumulative saturation) output: VQRDMULH_LANE:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH_LANE:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH_LANE:34:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -4036,13 +4036,13 @@ VQRDMULH_LANE:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQRDMULH_LANE:54:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH_LANE:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH_LANE (check rounding overflow) overflow output: -VQRDMULH_LANE:56:vqrdmulh_lane_s16 Neon overflow 0 -VQRDMULH_LANE:57:vqrdmulh_lane_s32 Neon overflow 0 -VQRDMULH_LANE:58:vqrdmulhq_lane_s16 Neon overflow 0 -VQRDMULH_LANE:59:vqrdmulhq_lane_s32 Neon overflow 0 +VQRDMULH_LANE (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH_LANE:56:vqrdmulh_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:57:vqrdmulh_lane_s32 Neon cumulative saturation 0 +VQRDMULH_LANE:58:vqrdmulhq_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:59:vqrdmulhq_lane_s32 Neon cumulative saturation 0 -VQRDMULH_LANE (check rounding overflow) output: +VQRDMULH_LANE (check rounding cumulative saturation) output: VQRDMULH_LANE:60:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH_LANE:61:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH_LANE:62:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -4068,11 +4068,11 @@ VQRDMULH_LANE:81:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQRDMULH_LANE:82:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH_LANE:83:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH_N overflow output: -VQRDMULH_N:0:vqrdmulh_n_s16 Neon overflow 0 -VQRDMULH_N:1:vqrdmulh_n_s32 Neon overflow 0 -VQRDMULH_N:2:vqrdmulhq_n_s16 Neon overflow 0 -VQRDMULH_N:3:vqrdmulhq_n_s32 Neon overflow 0 +VQRDMULH_N cumulative saturation output: +VQRDMULH_N:0:vqrdmulh_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:1:vqrdmulh_n_s32 Neon cumulative saturation 0 +VQRDMULH_N:2:vqrdmulhq_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:3:vqrdmulhq_n_s32 Neon cumulative saturation 0 VQRDMULH_N output: VQRDMULH_N:4:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffd, } @@ -4080,13 +4080,13 @@ VQRDMULH_N:5:result_int32x2 [] = { fffffffe, fffffffe, } VQRDMULH_N:6:result_int16x8 [] = { 6, 6, 6, 5, 5, 4, 4, 4, } VQRDMULH_N:7:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } -VQRDMULH_N (check mul overflow) overflow output: -VQRDMULH_N:8:vqrdmulh_n_s16 Neon overflow 1 -VQRDMULH_N:9:vqrdmulh_n_s32 Neon overflow 1 -VQRDMULH_N:10:vqrdmulhq_n_s16 Neon overflow 1 -VQRDMULH_N:11:vqrdmulhq_n_s32 Neon overflow 1 +VQRDMULH_N (check mul cumulative saturation) cumulative saturation output: +VQRDMULH_N:8:vqrdmulh_n_s16 Neon cumulative saturation 1 +VQRDMULH_N:9:vqrdmulh_n_s32 Neon cumulative saturation 1 +VQRDMULH_N:10:vqrdmulhq_n_s16 Neon cumulative saturation 1 +VQRDMULH_N:11:vqrdmulhq_n_s32 Neon cumulative saturation 1 -VQRDMULH_N (check mul overflow) output: +VQRDMULH_N (check mul cumulative saturation) output: VQRDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -4112,13 +4112,13 @@ VQRDMULH_N:33:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VQRDMULH_N:34:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH_N:35:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRDMULH_N (check rounding overflow) overflow output: -VQRDMULH_N:36:vqrdmulh_n_s16 Neon overflow 0 -VQRDMULH_N:37:vqrdmulh_n_s32 Neon overflow 0 -VQRDMULH_N:38:vqrdmulhq_n_s16 Neon overflow 0 -VQRDMULH_N:39:vqrdmulhq_n_s32 Neon overflow 0 +VQRDMULH_N (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH_N:36:vqrdmulh_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:37:vqrdmulh_n_s32 Neon cumulative saturation 0 +VQRDMULH_N:38:vqrdmulhq_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:39:vqrdmulhq_n_s32 Neon cumulative saturation 0 -VQRDMULH_N (check rounding overflow) output: +VQRDMULH_N (check rounding cumulative saturation) output: VQRDMULH_N:40:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRDMULH_N:41:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRDMULH_N:42:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -4144,23 +4144,23 @@ VQRDMULH_N:61:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VQRDMULH_N:62:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRDMULH_N:63:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (with input = 0) overflow output: -VQRSHL/VQRSHLQ:0:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:1:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:2:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:3:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:4:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:5:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:6:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:7:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:8:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:9:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:10:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:11:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:12:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:13:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:14:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:15:vqrshlq_u64 Neon overflow 0 +VQRSHL/VQRSHLQ (with input = 0) cumulative saturation output: +VQRSHL/VQRSHLQ:0:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:1:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:2:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:3:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:4:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:5:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:6:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:7:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:8:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:9:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:10:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:11:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:12:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:13:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:14:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:15:vqrshlq_u64 Neon cumulative saturation 0 VQRSHL/VQRSHLQ (with input = 0) output: VQRSHL/VQRSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } @@ -4188,23 +4188,23 @@ VQRSHL/VQRSHLQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 333 VQRSHL/VQRSHLQ:38:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (input 0 and negative shift amount) overflow output: -VQRSHL/VQRSHLQ:40:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:41:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:42:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:43:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:44:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:45:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:46:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:47:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:48:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:49:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:50:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:51:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:52:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:53:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:54:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:55:vqrshlq_u64 Neon overflow 0 +VQRSHL/VQRSHLQ (input 0 and negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:40:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:41:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:42:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:43:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:44:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:45:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:46:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:47:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:48:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:49:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:50:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:51:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:52:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:53:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:54:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:55:vqrshlq_u64 Neon cumulative saturation 0 VQRSHL/VQRSHLQ (input 0 and negative shift amount) output: VQRSHL/VQRSHLQ:56:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } @@ -4232,23 +4232,23 @@ VQRSHL/VQRSHLQ:77:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 333 VQRSHL/VQRSHLQ:78:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:79:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ overflow output: -VQRSHL/VQRSHLQ:80:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:81:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:82:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:83:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:84:vqrshl_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:85:vqrshl_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:86:vqrshl_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:87:vqrshl_u64 Neon overflow 1 -VQRSHL/VQRSHLQ:88:vqrshlq_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:89:vqrshlq_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:90:vqrshlq_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:91:vqrshlq_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:92:vqrshlq_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:93:vqrshlq_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:94:vqrshlq_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:95:vqrshlq_u64 Neon overflow 1 +VQRSHL/VQRSHLQ cumulative saturation output: +VQRSHL/VQRSHLQ:80:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:81:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:82:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:83:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:84:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:85:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:86:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:87:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:88:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:89:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:90:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:91:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:92:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:93:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:94:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:95:vqrshlq_u64 Neon cumulative saturation 1 VQRSHL/VQRSHLQ output: VQRSHL/VQRSHLQ:96:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } @@ -4276,23 +4276,23 @@ VQRSHL/VQRSHLQ:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:118:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (negative shift amount) overflow output: -VQRSHL/VQRSHLQ:120:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:121:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:122:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:123:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:124:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:125:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:126:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:127:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:128:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:129:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:130:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:131:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:132:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:133:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:134:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:135:vqrshlq_u64 Neon overflow 0 +VQRSHL/VQRSHLQ (negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:120:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:121:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:122:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:123:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:124:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:125:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:126:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:127:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:128:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:129:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:130:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:131:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:132:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:133:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:134:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:135:vqrshlq_u64 Neon cumulative saturation 0 VQRSHL/VQRSHLQ (negative shift amount) output: VQRSHL/VQRSHLQ:136:result_int8x8 [] = { fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe, } @@ -4320,25 +4320,25 @@ VQRSHL/VQRSHLQ:157:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:158:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:159:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (checking overflow: shift by -1) overflow output: -VQRSHL/VQRSHLQ:160:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:161:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:162:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:163:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:164:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:165:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:166:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:167:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:168:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:169:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:170:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:171:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:172:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:173:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:174:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:175:vqrshlq_u64 Neon overflow 0 - -VQRSHL/VQRSHLQ (checking overflow: shift by -1) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) cumulative saturation output: +VQRSHL/VQRSHLQ:160:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:161:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:162:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:163:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:164:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:165:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:166:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:167:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:168:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:169:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:170:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:171:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:172:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:173:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:174:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:175:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) output: VQRSHL/VQRSHLQ:176:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } VQRSHL/VQRSHLQ:177:result_int16x4 [] = { 4000, 4000, 4000, 4000, } VQRSHL/VQRSHLQ:178:result_int32x2 [] = { 40000000, 40000000, } @@ -4364,25 +4364,25 @@ VQRSHL/VQRSHLQ:197:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:198:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:199:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (checking overflow: shift by -3) overflow output: -VQRSHL/VQRSHLQ:200:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:201:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:202:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:203:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:204:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:205:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:206:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:207:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:208:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:209:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:210:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:211:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:212:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:213:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:214:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:215:vqrshlq_u64 Neon overflow 0 - -VQRSHL/VQRSHLQ (checking overflow: shift by -3) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) cumulative saturation output: +VQRSHL/VQRSHLQ:200:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:201:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:202:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:203:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:204:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:205:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:206:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:207:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:208:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:209:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:210:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:211:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:212:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:213:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:214:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:215:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) output: VQRSHL/VQRSHLQ:216:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } VQRSHL/VQRSHLQ:217:result_int16x4 [] = { 1000, 1000, 1000, 1000, } VQRSHL/VQRSHLQ:218:result_int32x2 [] = { 10000000, 10000000, } @@ -4408,25 +4408,25 @@ VQRSHL/VQRSHLQ:237:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:238:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:239:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (checking overflow: large shift amount) overflow output: -VQRSHL/VQRSHLQ:240:vqrshl_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:241:vqrshl_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:242:vqrshl_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:243:vqrshl_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:244:vqrshl_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:245:vqrshl_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:246:vqrshl_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:247:vqrshl_u64 Neon overflow 1 -VQRSHL/VQRSHLQ:248:vqrshlq_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:249:vqrshlq_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:250:vqrshlq_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:251:vqrshlq_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:252:vqrshlq_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:253:vqrshlq_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:254:vqrshlq_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:255:vqrshlq_u64 Neon overflow 1 - -VQRSHL/VQRSHLQ (checking overflow: large shift amount) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:240:vqrshl_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:241:vqrshl_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:242:vqrshl_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:243:vqrshl_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:244:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:245:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:246:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:247:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:248:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:249:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:250:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:251:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:252:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:253:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:254:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:255:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) output: VQRSHL/VQRSHLQ:256:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } VQRSHL/VQRSHLQ:257:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } VQRSHL/VQRSHLQ:258:result_int32x2 [] = { 7fffffff, 7fffffff, } @@ -4452,25 +4452,25 @@ VQRSHL/VQRSHLQ:277:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:278:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:279:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (checking overflow: large shift amount with negative input) overflow output: -VQRSHL/VQRSHLQ:280:vqrshl_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:281:vqrshl_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:282:vqrshl_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:283:vqrshl_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:284:vqrshl_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:285:vqrshl_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:286:vqrshl_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:287:vqrshl_u64 Neon overflow 1 -VQRSHL/VQRSHLQ:288:vqrshlq_s8 Neon overflow 1 -VQRSHL/VQRSHLQ:289:vqrshlq_s16 Neon overflow 1 -VQRSHL/VQRSHLQ:290:vqrshlq_s32 Neon overflow 1 -VQRSHL/VQRSHLQ:291:vqrshlq_s64 Neon overflow 1 -VQRSHL/VQRSHLQ:292:vqrshlq_u8 Neon overflow 1 -VQRSHL/VQRSHLQ:293:vqrshlq_u16 Neon overflow 1 -VQRSHL/VQRSHLQ:294:vqrshlq_u32 Neon overflow 1 -VQRSHL/VQRSHLQ:295:vqrshlq_u64 Neon overflow 1 - -VQRSHL/VQRSHLQ (checking overflow: large shift amount with negative input) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) cumulative saturation output: +VQRSHL/VQRSHLQ:280:vqrshl_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:281:vqrshl_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:282:vqrshl_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:283:vqrshl_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:284:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:285:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:286:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:287:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:288:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:289:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:290:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:291:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:292:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:293:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:294:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:295:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) output: VQRSHL/VQRSHLQ:296:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } VQRSHL/VQRSHLQ:297:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } VQRSHL/VQRSHLQ:298:result_int32x2 [] = { 80000000, 80000000, } @@ -4496,25 +4496,25 @@ VQRSHL/VQRSHLQ:317:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:318:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:319:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (checking overflow: large negative shift amount) overflow output: -VQRSHL/VQRSHLQ:320:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:321:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:322:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:323:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:324:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:325:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:326:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:327:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:328:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:329:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:330:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:331:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:332:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:333:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:334:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:335:vqrshlq_u64 Neon overflow 0 - -VQRSHL/VQRSHLQ (checking overflow: large negative shift amount) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:320:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:321:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:322:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:323:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:324:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:325:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:326:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:327:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:328:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:329:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:330:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:331:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:332:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:333:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:334:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:335:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) output: VQRSHL/VQRSHLQ:336:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } VQRSHL/VQRSHLQ:337:result_int16x4 [] = { 0, 0, 0, 0, } VQRSHL/VQRSHLQ:338:result_int32x2 [] = { 0, 0, } @@ -4540,25 +4540,25 @@ VQRSHL/VQRSHLQ:357:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 33 VQRSHL/VQRSHLQ:358:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHL/VQRSHLQ:359:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHL/VQRSHLQ (checking overflow: large shift amount with 0 input) overflow output: -VQRSHL/VQRSHLQ:360:vqrshl_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:361:vqrshl_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:362:vqrshl_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:363:vqrshl_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:364:vqrshl_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:365:vqrshl_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:366:vqrshl_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:367:vqrshl_u64 Neon overflow 0 -VQRSHL/VQRSHLQ:368:vqrshlq_s8 Neon overflow 0 -VQRSHL/VQRSHLQ:369:vqrshlq_s16 Neon overflow 0 -VQRSHL/VQRSHLQ:370:vqrshlq_s32 Neon overflow 0 -VQRSHL/VQRSHLQ:371:vqrshlq_s64 Neon overflow 0 -VQRSHL/VQRSHLQ:372:vqrshlq_u8 Neon overflow 0 -VQRSHL/VQRSHLQ:373:vqrshlq_u16 Neon overflow 0 -VQRSHL/VQRSHLQ:374:vqrshlq_u32 Neon overflow 0 -VQRSHL/VQRSHLQ:375:vqrshlq_u64 Neon overflow 0 - -VQRSHL/VQRSHLQ (checking overflow: large shift amount with 0 input) output: +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) cumulative saturation output: +VQRSHL/VQRSHLQ:360:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:361:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:362:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:363:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:364:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:365:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:366:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:367:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:368:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:369:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:370:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:371:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:372:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:373:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:374:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:375:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) output: VQRSHL/VQRSHLQ:376:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } VQRSHL/VQRSHLQ:377:result_int16x4 [] = { 0, 0, 0, 0, } VQRSHL/VQRSHLQ:378:result_int32x2 [] = { 0, 0, } @@ -6048,13 +6048,13 @@ VMVN/VMVNQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VMVN/VMVNQ:22:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VMVN/VMVNQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQMOVN overflow output: -VQMOVN:0:vqmovn_s16 Neon overflow 0 -VQMOVN:1:vqmovn_s32 Neon overflow 0 -VQMOVN:2:vqmovn_s64 Neon overflow 0 -VQMOVN:3:vqmovn_u16 Neon overflow 0 -VQMOVN:4:vqmovn_u32 Neon overflow 0 -VQMOVN:5:vqmovn_u64 Neon overflow 0 +VQMOVN cumulative saturation output: +VQMOVN:0:vqmovn_s16 Neon cumulative saturation 0 +VQMOVN:1:vqmovn_s32 Neon cumulative saturation 0 +VQMOVN:2:vqmovn_s64 Neon cumulative saturation 0 +VQMOVN:3:vqmovn_u16 Neon cumulative saturation 0 +VQMOVN:4:vqmovn_u32 Neon cumulative saturation 0 +VQMOVN:5:vqmovn_u64 Neon cumulative saturation 0 VQMOVN output: VQMOVN:6:result_int8x8 [] = { 12, 12, 12, 12, 12, 12, 12, 12, } @@ -6082,13 +6082,13 @@ VQMOVN:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQMOVN:28:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQMOVN:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQMOVN overflow output: -VQMOVN:30:vqmovn_s16 Neon overflow 1 -VQMOVN:31:vqmovn_s32 Neon overflow 1 -VQMOVN:32:vqmovn_s64 Neon overflow 1 -VQMOVN:33:vqmovn_u16 Neon overflow 1 -VQMOVN:34:vqmovn_u32 Neon overflow 1 -VQMOVN:35:vqmovn_u64 Neon overflow 1 +VQMOVN cumulative saturation output: +VQMOVN:30:vqmovn_s16 Neon cumulative saturation 1 +VQMOVN:31:vqmovn_s32 Neon cumulative saturation 1 +VQMOVN:32:vqmovn_s64 Neon cumulative saturation 1 +VQMOVN:33:vqmovn_u16 Neon cumulative saturation 1 +VQMOVN:34:vqmovn_u32 Neon cumulative saturation 1 +VQMOVN:35:vqmovn_u64 Neon cumulative saturation 1 VQMOVN output: VQMOVN:36:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -6116,10 +6116,10 @@ VQMOVN:57:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQMOVN:58:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQMOVN:59:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQMOVUN overflow output: -VQMOVUN:0:vqmovun_s16 Neon overflow 0 -VQMOVUN:1:vqmovun_s32 Neon overflow 0 -VQMOVUN:2:vqmovun_s64 Neon overflow 0 +VQMOVUN cumulative saturation output: +VQMOVUN:0:vqmovun_s16 Neon cumulative saturation 0 +VQMOVUN:1:vqmovun_s32 Neon cumulative saturation 0 +VQMOVUN:2:vqmovun_s64 Neon cumulative saturation 0 VQMOVUN output: VQMOVUN:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6147,10 +6147,10 @@ VQMOVUN:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333 VQMOVUN:25:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQMOVUN:26:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQMOVUN (negative input) overflow output: -VQMOVUN:27:vqmovun_s16 Neon overflow 1 -VQMOVUN:28:vqmovun_s32 Neon overflow 1 -VQMOVUN:29:vqmovun_s64 Neon overflow 1 +VQMOVUN (negative input) cumulative saturation output: +VQMOVUN:27:vqmovun_s16 Neon cumulative saturation 1 +VQMOVUN:28:vqmovun_s32 Neon cumulative saturation 1 +VQMOVUN:29:vqmovun_s64 Neon cumulative saturation 1 VQMOVUN (negative input) output: VQMOVUN:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6620,15 +6620,15 @@ VPADAL/VPADALQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 333 VPADAL/VPADALQ:22:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VPADAL/VPADALQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHLU_N/VQSHLUQ_N (negative input) overflow output: -VQSHLU_N/VQSHLUQ_N:0:vqshlu_n_s8 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:1:vqshlu_n_s16 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:2:vqshlu_n_s32 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:3:vqshlu_n_s64 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:4:vqshluq_n_s8 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:5:vqshluq_n_s16 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:6:vqshluq_n_s32 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:7:vqshluq_n_s64 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N (negative input) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:0:vqshlu_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:1:vqshlu_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:2:vqshlu_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:3:vqshlu_n_s64 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:4:vqshluq_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:5:vqshluq_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:6:vqshluq_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:7:vqshluq_n_s64 Neon cumulative saturation 1 VQSHLU_N/VQSHLUQ_N (negative input) output: VQSHLU_N/VQSHLUQ_N:8:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6656,17 +6656,17 @@ VQSHLU_N/VQSHLUQ_N:29:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, VQSHLU_N/VQSHLUQ_N:30:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHLU_N/VQSHLUQ_N:31:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 1) overflow output: -VQSHLU_N/VQSHLUQ_N:32:vqshlu_n_s8 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:33:vqshlu_n_s16 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:34:vqshlu_n_s32 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:35:vqshlu_n_s64 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:36:vqshluq_n_s8 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:37:vqshluq_n_s16 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:38:vqshluq_n_s32 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:39:vqshluq_n_s64 Neon overflow 0 - -VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 1) output: +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:32:vqshlu_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:33:vqshlu_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:34:vqshlu_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:35:vqshlu_n_s64 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:36:vqshluq_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:37:vqshluq_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:38:vqshluq_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:39:vqshluq_n_s64 Neon cumulative saturation 0 + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) output: VQSHLU_N/VQSHLUQ_N:40:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQSHLU_N/VQSHLUQ_N:41:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQSHLU_N/VQSHLUQ_N:42:result_int32x2 [] = { 33333333, 33333333, } @@ -6692,17 +6692,17 @@ VQSHLU_N/VQSHLUQ_N:61:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, VQSHLU_N/VQSHLUQ_N:62:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHLU_N/VQSHLUQ_N:63:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 2) overflow output: -VQSHLU_N/VQSHLUQ_N:64:vqshlu_n_s8 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:65:vqshlu_n_s16 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:66:vqshlu_n_s32 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:67:vqshlu_n_s64 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:68:vqshluq_n_s8 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:69:vqshluq_n_s16 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:70:vqshluq_n_s32 Neon overflow 1 -VQSHLU_N/VQSHLUQ_N:71:vqshluq_n_s64 Neon overflow 1 - -VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 2) output: +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:64:vqshlu_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:65:vqshlu_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:66:vqshlu_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:67:vqshlu_n_s64 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:68:vqshluq_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:69:vqshluq_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:70:vqshluq_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:71:vqshluq_n_s64 Neon cumulative saturation 1 + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) output: VQSHLU_N/VQSHLUQ_N:72:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQSHLU_N/VQSHLUQ_N:73:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQSHLU_N/VQSHLUQ_N:74:result_int32x2 [] = { 33333333, 33333333, } @@ -6728,15 +6728,15 @@ VQSHLU_N/VQSHLUQ_N:93:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, VQSHLU_N/VQSHLUQ_N:94:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHLU_N/VQSHLUQ_N:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHLU_N/VQSHLUQ_N overflow output: -VQSHLU_N/VQSHLUQ_N:96:vqshlu_n_s8 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:97:vqshlu_n_s16 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:98:vqshlu_n_s32 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:99:vqshlu_n_s64 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:100:vqshluq_n_s8 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:101:vqshluq_n_s16 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:102:vqshluq_n_s32 Neon overflow 0 -VQSHLU_N/VQSHLUQ_N:103:vqshluq_n_s64 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:96:vqshlu_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:97:vqshlu_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:98:vqshlu_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:99:vqshlu_n_s64 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:100:vqshluq_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:101:vqshluq_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:102:vqshluq_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:103:vqshluq_n_s64 Neon cumulative saturation 0 VQSHLU_N/VQSHLUQ_N output: VQSHLU_N/VQSHLUQ_N:104:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -6894,13 +6894,13 @@ VCNT/VCNTQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VCNT/VCNTQ:22:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VCNT/VCNTQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHRN_N overflow output: -VQSHRN_N:0:vqshrn_n_s16 Neon overflow 0 -VQSHRN_N:1:vqshrn_n_s32 Neon overflow 0 -VQSHRN_N:2:vqshrn_n_s64 Neon overflow 0 -VQSHRN_N:3:vqshrn_n_u16 Neon overflow 1 -VQSHRN_N:4:vqshrn_n_u32 Neon overflow 1 -VQSHRN_N:5:vqshrn_n_u64 Neon overflow 1 +VQSHRN_N cumulative saturation output: +VQSHRN_N:0:vqshrn_n_s16 Neon cumulative saturation 0 +VQSHRN_N:1:vqshrn_n_s32 Neon cumulative saturation 0 +VQSHRN_N:2:vqshrn_n_s64 Neon cumulative saturation 0 +VQSHRN_N:3:vqshrn_n_u16 Neon cumulative saturation 1 +VQSHRN_N:4:vqshrn_n_u32 Neon cumulative saturation 1 +VQSHRN_N:5:vqshrn_n_u64 Neon cumulative saturation 1 VQSHRN_N output: VQSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } @@ -6928,13 +6928,13 @@ VQSHRN_N:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VQSHRN_N:28:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHRN_N:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHRN_N (check saturation: shift by 3) overflow output: -VQSHRN_N:30:vqshrn_n_s16 Neon overflow 1 -VQSHRN_N:31:vqshrn_n_s32 Neon overflow 1 -VQSHRN_N:32:vqshrn_n_s64 Neon overflow 1 -VQSHRN_N:33:vqshrn_n_u16 Neon overflow 1 -VQSHRN_N:34:vqshrn_n_u32 Neon overflow 1 -VQSHRN_N:35:vqshrn_n_u64 Neon overflow 1 +VQSHRN_N (check saturation: shift by 3) cumulative saturation output: +VQSHRN_N:30:vqshrn_n_s16 Neon cumulative saturation 1 +VQSHRN_N:31:vqshrn_n_s32 Neon cumulative saturation 1 +VQSHRN_N:32:vqshrn_n_s64 Neon cumulative saturation 1 +VQSHRN_N:33:vqshrn_n_u16 Neon cumulative saturation 1 +VQSHRN_N:34:vqshrn_n_u32 Neon cumulative saturation 1 +VQSHRN_N:35:vqshrn_n_u64 Neon cumulative saturation 1 VQSHRN_N (check saturation: shift by 3) output: VQSHRN_N:36:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -6962,13 +6962,13 @@ VQSHRN_N:57:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 333 VQSHRN_N:58:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHRN_N:59:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHRN_N (check saturation: shift by max) overflow output: -VQSHRN_N:60:vqshrn_n_s16 Neon overflow 0 -VQSHRN_N:61:vqshrn_n_s32 Neon overflow 0 -VQSHRN_N:62:vqshrn_n_s64 Neon overflow 0 -VQSHRN_N:63:vqshrn_n_u16 Neon overflow 0 -VQSHRN_N:64:vqshrn_n_u32 Neon overflow 0 -VQSHRN_N:65:vqshrn_n_u64 Neon overflow 0 +VQSHRN_N (check saturation: shift by max) cumulative saturation output: +VQSHRN_N:60:vqshrn_n_s16 Neon cumulative saturation 0 +VQSHRN_N:61:vqshrn_n_s32 Neon cumulative saturation 0 +VQSHRN_N:62:vqshrn_n_s64 Neon cumulative saturation 0 +VQSHRN_N:63:vqshrn_n_u16 Neon cumulative saturation 0 +VQSHRN_N:64:vqshrn_n_u32 Neon cumulative saturation 0 +VQSHRN_N:65:vqshrn_n_u64 Neon cumulative saturation 0 VQSHRN_N (check saturation: shift by max) output: VQSHRN_N:66:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } @@ -7048,10 +7048,10 @@ VPMIN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, VPMIN:22:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VPMIN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHRUN_N (negative input) overflow output: -VQSHRUN_N:0:vqshrun_n_s16 Neon overflow 1 -VQSHRUN_N:1:vqshrun_n_s32 Neon overflow 1 -VQSHRUN_N:2:vqshrun_n_s64 Neon overflow 1 +VQSHRUN_N (negative input) cumulative saturation output: +VQSHRUN_N:0:vqshrun_n_s16 Neon cumulative saturation 1 +VQSHRUN_N:1:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:2:vqshrun_n_s64 Neon cumulative saturation 1 VQSHRUN_N (negative input) output: VQSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -7079,12 +7079,12 @@ VQSHRUN_N:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQSHRUN_N:25:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHRUN_N:26:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHRUN_N (check saturation/overflow) overflow output: -VQSHRUN_N:27:vqshrun_n_s16 Neon overflow 1 -VQSHRUN_N:28:vqshrun_n_s32 Neon overflow 1 -VQSHRUN_N:29:vqshrun_n_s64 Neon overflow 1 +VQSHRUN_N (check cumulative saturation) cumulative saturation output: +VQSHRUN_N:27:vqshrun_n_s16 Neon cumulative saturation 1 +VQSHRUN_N:28:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:29:vqshrun_n_s64 Neon cumulative saturation 1 -VQSHRUN_N (check saturation/overflow) output: +VQSHRUN_N (check cumulative saturation) output: VQSHRUN_N:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQSHRUN_N:31:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQSHRUN_N:32:result_int32x2 [] = { 33333333, 33333333, } @@ -7110,10 +7110,10 @@ VQSHRUN_N:51:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQSHRUN_N:52:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHRUN_N:53:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQSHRUN_N overflow output: -VQSHRUN_N:54:vqshrun_n_s16 Neon overflow 0 -VQSHRUN_N:55:vqshrun_n_s32 Neon overflow 1 -VQSHRUN_N:56:vqshrun_n_s64 Neon overflow 0 +VQSHRUN_N cumulative saturation output: +VQSHRUN_N:54:vqshrun_n_s16 Neon cumulative saturation 0 +VQSHRUN_N:55:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:56:vqshrun_n_s64 Neon cumulative saturation 0 VQSHRUN_N output: VQSHRUN_N:57:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -7141,10 +7141,10 @@ VQSHRUN_N:78:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 33 VQSHRUN_N:79:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQSHRUN_N:80:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRUN_N (negative input) overflow output: -VQRSHRUN_N:0:vqrshrun_n_s16 Neon overflow 0 -VQRSHRUN_N:1:vqrshrun_n_s32 Neon overflow 0 -VQRSHRUN_N:2:vqrshrun_n_s64 Neon overflow 1 +VQRSHRUN_N (negative input) cumulative saturation output: +VQRSHRUN_N:0:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:1:vqrshrun_n_s32 Neon cumulative saturation 0 +VQRSHRUN_N:2:vqrshrun_n_s64 Neon cumulative saturation 1 VQRSHRUN_N (negative input) output: VQRSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } @@ -7172,12 +7172,12 @@ VQRSHRUN_N:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VQRSHRUN_N:25:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHRUN_N:26:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRUN_N (check saturation/overflow: shift by 1) overflow output: -VQRSHRUN_N:27:vqrshrun_n_s16 Neon overflow 1 -VQRSHRUN_N:28:vqrshrun_n_s32 Neon overflow 1 -VQRSHRUN_N:29:vqrshrun_n_s64 Neon overflow 1 +VQRSHRUN_N (check cumulative saturation: shift by 1) cumulative saturation output: +VQRSHRUN_N:27:vqrshrun_n_s16 Neon cumulative saturation 1 +VQRSHRUN_N:28:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:29:vqrshrun_n_s64 Neon cumulative saturation 1 -VQRSHRUN_N (check saturation/overflow: shift by 1) output: +VQRSHRUN_N (check cumulative saturation: shift by 1) output: VQRSHRUN_N:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRSHRUN_N:31:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQRSHRUN_N:32:result_int32x2 [] = { 33333333, 33333333, } @@ -7203,12 +7203,12 @@ VQRSHRUN_N:51:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VQRSHRUN_N:52:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHRUN_N:53:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRUN_N (check saturation/overflow: shift by max, positive input) overflow output: -VQRSHRUN_N:54:vqrshrun_n_s16 Neon overflow 0 -VQRSHRUN_N:55:vqrshrun_n_s32 Neon overflow 0 -VQRSHRUN_N:56:vqrshrun_n_s64 Neon overflow 0 +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) cumulative saturation output: +VQRSHRUN_N:54:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:55:vqrshrun_n_s32 Neon cumulative saturation 0 +VQRSHRUN_N:56:vqrshrun_n_s64 Neon cumulative saturation 0 -VQRSHRUN_N (check saturation/overflow: shift by max, positive input) output: +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) output: VQRSHRUN_N:57:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRSHRUN_N:58:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQRSHRUN_N:59:result_int32x2 [] = { 33333333, 33333333, } @@ -7234,12 +7234,12 @@ VQRSHRUN_N:78:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3 VQRSHRUN_N:79:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHRUN_N:80:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRUN_N (check saturation/overflow: shift by max, negative input) overflow output: -VQRSHRUN_N:81:vqrshrun_n_s16 Neon overflow 1 -VQRSHRUN_N:82:vqrshrun_n_s32 Neon overflow 1 -VQRSHRUN_N:83:vqrshrun_n_s64 Neon overflow 1 +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) cumulative saturation output: +VQRSHRUN_N:81:vqrshrun_n_s16 Neon cumulative saturation 1 +VQRSHRUN_N:82:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:83:vqrshrun_n_s64 Neon cumulative saturation 1 -VQRSHRUN_N (check saturation/overflow: shift by max, negative input) output: +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) output: VQRSHRUN_N:84:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VQRSHRUN_N:85:result_int16x4 [] = { 3333, 3333, 3333, 3333, } VQRSHRUN_N:86:result_int32x2 [] = { 33333333, 33333333, } @@ -7265,10 +7265,10 @@ VQRSHRUN_N:105:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, VQRSHRUN_N:106:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, } VQRSHRUN_N:107:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } -VQRSHRUN_N overflow output: -VQRSHRUN_N:108:vqrshrun_n_s16 Neon overflow 0 -VQRSHRUN_N:109:vqrshrun_n_s32 Neon overflow 1 -VQRSHRUN_N:110:vqrshrun_n_s64 Neon overflow 0 +VQRSHRUN_N cumulative saturation output: +VQRSHRUN_N:108:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:109:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:110:vqrshrun_n_s64 Neon cumulative saturation 0 VQRSHRUN_N output: VQRSHRUN_N:111:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } diff --git a/ref_v_binary_sat_op.c b/ref_v_binary_sat_op.c index 7c72e15..b9a880c 100644 --- a/ref_v_binary_sat_op.c +++ b/ref_v_binary_sat_op.c @@ -40,15 +40,15 @@ FNNAME (INSN_NAME) { /* vector_res = OP(vector1,vector2), then store the result. */ -#define TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ - VECT_VAR(vector2, T1, W, N)); \ - vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ - xSTR(T1), W, N) +#define TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) #define TEST_BINARY_SAT_OP(INSN, Q, T1, T2, W, N) \ TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N) @@ -83,7 +83,7 @@ FNNAME (INSN_NAME) TEST_VDUP(vector2, q, uint, u, 32, 4, 0x77); TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8); TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4); TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2); diff --git a/ref_v_unary_sat_op.c b/ref_v_unary_sat_op.c index 37c1757..afdd9d1 100644 --- a/ref_v_unary_sat_op.c +++ b/ref_v_unary_sat_op.c @@ -39,14 +39,14 @@ THE SOFTWARE. FNNAME (INSN_NAME) { /* Basic test: y=OP(x), then store the result. */ -#define TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ - vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ - xSTR(T1), W, N) +#define TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) #define TEST_UNARY_SAT_OP(INSN, Q, T1, T2, W, N) \ TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N) @@ -80,7 +80,7 @@ FNNAME (INSN_NAME) TEST_VLOAD(vector, buffer, q, int, s, 32, 4); /* Apply a unary operator named INSN_NAME */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8); TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4); TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2); diff --git a/ref_vqabs.c b/ref_vqabs.c index 4a4d04e..a7a6466 100644 --- a/ref_vqabs.c +++ b/ref_vqabs.c @@ -61,7 +61,7 @@ void vqabs_extra() TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); /* Apply a unary operator named INSN_NAME */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8); TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4); TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2); diff --git a/ref_vqadd.c b/ref_vqadd.c index 6c9b508..d8c38b4 100644 --- a/ref_vqadd.c +++ b/ref_vqadd.c @@ -49,7 +49,8 @@ void vqadd_64(void) TEST_VDUP(vector2, q, int, s, 64, 2, 0x0); TEST_VDUP(vector2, q, uint, u, 64, 2, 0x0); - fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG); + fprintf(ref_file, + "\n%s 64 bits saturation cumulative saturation output:\n", TEST_MSG); TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); @@ -67,7 +68,8 @@ void vqadd_64(void) TEST_VDUP(vector2, q, int, s, 64, 2, 0x44); TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88); - fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG); + fprintf(ref_file, + "\n%s 64 bits saturation cumulative saturation output:\n", TEST_MSG); TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); @@ -87,7 +89,8 @@ void vqadd_64(void) TEST_VDUP(vector2, q, uint, u, 64, 2, 0x22); - fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG); + fprintf(ref_file, + "\n%s 64 bits saturation cumulative saturation output:\n", TEST_MSG); TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); @@ -135,7 +138,8 @@ void vqadd_64(void) TEST_VDUP(vector2, q, uint, u, 16, 8, 0x20); TEST_VDUP(vector2, q, uint, u, 32, 4, 0x20); - fprintf(ref_file, "\n%s less than 64 bits saturation overflow output:\n", + fprintf(ref_file, + "\n%s less than 64 bits saturation cumulative saturation output:\n", TEST_MSG); TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8); TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4); diff --git a/ref_vqdmlal.c b/ref_vqdmlal.c index c686c9d..9033b25 100644 --- a/ref_vqdmlal.c +++ b/ref_vqdmlal.c @@ -44,16 +44,16 @@ FNNAME (INSN_NAME) { /* vector_res = OP(vector, vector3, vector4), then store the result. */ -#define TEST_VQDMLXL1(INSN, T1, T2, W, W2, N) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector3, T1, W2, N), \ - VECT_VAR(vector4, T1, W2, N)); \ - vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W2), \ - xSTR(T1), W, N) +#define TEST_VQDMLXL1(INSN, T1, T2, W, W2, N) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + VECT_VAR(vector4, T1, W2, N)); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W2), \ + xSTR(T1), W, N) #define TEST_VQDMLXL(INSN, T1, T2, W, W2, N) \ TEST_VQDMLXL1(INSN, T1, T2, W, W2, N) @@ -79,7 +79,7 @@ FNNAME (INSN_NAME) TEST_VDUP(vector3, , int, s, 32, 2, 0x55); TEST_VDUP(vector4, , int, s, 32, 2, 0xBB); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQDMLXL(INSN_NAME, int, s, 32, 16, 4); TEST_VQDMLXL(INSN_NAME, int, s, 64, 32, 2); dump_results_hex (TEST_MSG); @@ -90,9 +90,9 @@ FNNAME (INSN_NAME) TEST_VDUP(vector3, , int, s, 32, 2, 0x80000000); TEST_VDUP(vector4, , int, s, 32, 2, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check mul overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); TEST_VQDMLXL(INSN_NAME, int, s, 32, 16, 4); TEST_VQDMLXL(INSN_NAME, int, s, 64, 32, 2); - dump_results_hex2 (TEST_MSG, " (check mul overflow)"); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); } diff --git a/ref_vqdmlal_lane.c b/ref_vqdmlal_lane.c index 903b822..b0a44c2 100644 --- a/ref_vqdmlal_lane.c +++ b/ref_vqdmlal_lane.c @@ -43,17 +43,17 @@ FNNAME (INSN_NAME) { /* vector_res = vqdmlxl_lane(vector, vector3, vector4, lane), then store the result. */ -#define TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector3, T1, W2, N), \ - VECT_VAR(vector4, T1, W2, N), \ - V); \ - vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W2), \ - xSTR(T1), W, N) +#define TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + VECT_VAR(vector4, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W2), \ + xSTR(T1), W, N) #define TEST_VQDMLXL_LANE(INSN, T1, T2, W, W2, N, V) \ TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V) @@ -80,14 +80,15 @@ FNNAME (INSN_NAME) TEST_VDUP(vector3, , int, s, 32, 2, 0x55); TEST_VDUP(vector4, , int, s, 32, 2, 0xBB); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0); TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0); dump_results_hex (TEST_MSG); TEST_VDUP(vector3, , int, s, 16, 4, 0); TEST_VDUP(vector3, , int, s, 32, 2, 0); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (mul with input=0)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (mul with input=0)"); TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0); TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0); dump_results_hex2 (TEST_MSG, " (mul with input=0)"); @@ -96,9 +97,9 @@ FNNAME (INSN_NAME) TEST_VDUP(vector3, , int, s, 32, 2, 0x80000000); TEST_VDUP(vector4, , int, s, 16, 4, 0x8000); TEST_VDUP(vector4, , int, s, 32, 2, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check mul overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0); TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0); - dump_results_hex2 (TEST_MSG, " (check mul overflow)"); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); } diff --git a/ref_vqdmlal_n.c b/ref_vqdmlal_n.c index c9a3dc1..e7f97dd 100644 --- a/ref_vqdmlal_n.c +++ b/ref_vqdmlal_n.c @@ -43,16 +43,16 @@ FNNAME (INSN_NAME) { /* vector_res = vqdmlxl_n(vector, vector3, val), then store the result. */ -#define TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector3, T1, W2, N), \ - V); \ - vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W2), \ - xSTR(T1), W, N) +#define TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W2), \ + xSTR(T1), W, N) #define TEST_VQDMLXL_N(INSN, T1, T2, W, W2, N, V) \ TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V) @@ -76,7 +76,7 @@ FNNAME (INSN_NAME) TEST_VDUP(vector3, , int, s, 32, 2, 0x55); /* Choose val arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQDMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x22); TEST_VQDMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x33); @@ -84,9 +84,9 @@ FNNAME (INSN_NAME) TEST_VDUP(vector3, , int, s, 16, 4, 0x8000); TEST_VDUP(vector3, , int, s, 32, 2, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check mul overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); TEST_VQDMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x8000); TEST_VQDMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x80000000); - dump_results_hex2 (TEST_MSG, " (check mul overflow)"); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); } diff --git a/ref_vqdmulh.c b/ref_vqdmulh.c index a9ee93f..fd48fa1 100644 --- a/ref_vqdmulh.c +++ b/ref_vqdmulh.c @@ -40,15 +40,15 @@ THE SOFTWARE. FNNAME (INSN) { /* vector_res = vqdmulh(vector,vector2,lane), then store the result. */ -#define TEST_VQDMULH2(INSN, Q, T1, T2, W, N) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector2, T1, W, N)); \ - vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQDMULH2(INSN, Q, T1, T2, W, N) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQDMULH1(INSN, Q, T1, T2, W, N) \ @@ -87,7 +87,7 @@ FNNAME (INSN) TEST_VDUP(vector2, q, int, s, 16, 8, 0x33); TEST_VDUP(vector2, q, int, s, 32, 4, 0x22); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQDMULH(, int, s, 16, 4); TEST_VQDMULH(, int, s, 32, 2); TEST_VQDMULH(q, int, s, 16, 8); @@ -106,7 +106,7 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQDMULH(, int, s, 16, 4); TEST_VQDMULH(, int, s, 32, 2); TEST_VQDMULH(q, int, s, 16, 8); diff --git a/ref_vqdmulh_lane.c b/ref_vqdmulh_lane.c index 2d6bb39..bc39f97 100644 --- a/ref_vqdmulh_lane.c +++ b/ref_vqdmulh_lane.c @@ -39,16 +39,16 @@ THE SOFTWARE. FNNAME (INSN) { /* vector_res = vqdmulh_lane(vector,vector2,lane), then store the result. */ -#define TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector2, T1, W, N2), \ - L); \ - vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_lane_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N2), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_lane_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L) \ @@ -88,7 +88,7 @@ FNNAME (INSN) TEST_VDUP(vector2, , int, s, 32, 2, 0xBB); /* Choose lane arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 2); TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1); TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 3); @@ -106,11 +106,11 @@ FNNAME (INSN) TEST_VDUP(vector2, , int, s, 16, 4, 0x8000); TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check mul overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 3); TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1); TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 2); TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 1); - dump_results_hex2 (TEST_MSG, " (check mul overflow)"); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); } diff --git a/ref_vqdmulh_n.c b/ref_vqdmulh_n.c index 847edba..761c7c0 100644 --- a/ref_vqdmulh_n.c +++ b/ref_vqdmulh_n.c @@ -42,14 +42,14 @@ FNNAME (INSN) /* vector_res = vqdmulh_n(vector,val), then store the result. */ #define TEST_VQDMULH_N2(INSN, Q, T1, T2, W, N, L) \ - Set_Neon_Overflow(0); \ + Set_Neon_Cumulative_Sat(0); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ L); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ - xSTR(T1), W, N) + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQDMULH_N1(INSN, Q, T1, T2, W, N, L) \ @@ -81,7 +81,7 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 32, 4, 0x100045); /* Choose multiplier arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQDMULH_N(, int, s, 16, 4, 0xCF); TEST_VQDMULH_N(, int, s, 32, 2, 0x2344); TEST_VQDMULH_N(q, int, s, 16, 8, 0x80); @@ -99,11 +99,11 @@ FNNAME (INSN) TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check mul overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); TEST_VQDMULH_N(, int, s, 16, 4, 0x8000); TEST_VQDMULH_N(, int, s, 32, 2, 0x80000000); TEST_VQDMULH_N(q, int, s, 16, 8, 0x8000); TEST_VQDMULH_N(q, int, s, 32, 4, 0x80000000); - dump_results_hex2 (TEST_MSG, " (check mul overflow)"); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); } diff --git a/ref_vqdmull.c b/ref_vqdmull.c index 1d6bb6e..b9c194b 100644 --- a/ref_vqdmull.c +++ b/ref_vqdmull.c @@ -39,15 +39,15 @@ THE SOFTWARE. FNNAME (INSN) { /* Basic test: y=vqdmull(x,x), then store the result. */ -#define TEST_VQDMULL2(INSN, T1, T2, W, W2, N) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W2, N) = \ - INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector2, T1, W, N)); \ - vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ - VECT_VAR(vector_res, T1, W2, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQDMULL2(INSN, T1, T2, W, W2, N) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQDMULL1(INSN, T1, T2, W, W2, N) \ @@ -73,7 +73,7 @@ FNNAME (INSN) TEST_VLOAD(vector2, buffer, , int, s, 16, 4); TEST_VLOAD(vector2, buffer, , int, s, 32, 2); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQDMULL(int, s, 16, 32, 4); TEST_VQDMULL(int, s, 32, 64, 2); @@ -85,9 +85,9 @@ FNNAME (INSN) TEST_VDUP(vector2, , int, s, 16, 4, 0x8000); TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check mul overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); TEST_VQDMULL(int, s, 16, 32, 4); TEST_VQDMULL(int, s, 32, 64, 2); - dump_results_hex2 (TEST_MSG, " (check mul overflow)"); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); } diff --git a/ref_vqdmull_lane.c b/ref_vqdmull_lane.c index ae50a2e..c7e938a 100644 --- a/ref_vqdmull_lane.c +++ b/ref_vqdmull_lane.c @@ -41,16 +41,16 @@ FNNAME (INSN) int i; /* vector_res = vqdmull_lane(vector,vector2,lane), then store the result. */ -#define TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W2, N) = \ - INSN##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector2, T1, W, N), \ - L); \ - vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ - VECT_VAR(vector_res, T1, W2, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##_lane_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_lane_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQDMULL_LANE1(INSN, T1, T2, W, W2, N, L) \ @@ -81,7 +81,7 @@ FNNAME (INSN) TEST_VDUP(vector2, , int, s, 32, 2, 0x2); /* Choose lane arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2); TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1); @@ -96,11 +96,13 @@ FNNAME (INSN) TEST_VDUP(vector2, , int, s, 16, 4, 0x8000); TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (check mul overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2); TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1); - fprintf (ref_file, "\n%s output:\n", TEST_MSG " (check mul overflow)"); + fprintf (ref_file, "\n%s output:\n", + TEST_MSG " (check mul cumulative saturation)"); DUMP(TEST_MSG, int, 32, 4, PRIx32); DUMP(TEST_MSG, int, 64, 2, PRIx64); } diff --git a/ref_vqdmull_n.c b/ref_vqdmull_n.c index e41593d..3e8c66a 100644 --- a/ref_vqdmull_n.c +++ b/ref_vqdmull_n.c @@ -41,15 +41,15 @@ FNNAME (INSN) int i; /* vector_res = vqdmull_n(vector,val), then store the result. */ -#define TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W2, N) = \ - INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ - L); \ - vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ - VECT_VAR(vector_res, T1, W2, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##_n_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_n_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQDMULL_N1(INSN, T1, T2, W, W2, N, L) \ @@ -80,7 +80,7 @@ FNNAME (INSN) TEST_VDUP(vector2, , int, s, 32, 2, 0x2); /* Choose multiplier arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQDMULL_N(int, s, 16, 32, 4, 0x22); TEST_VQDMULL_N(int, s, 32, 64, 2, 0x55); @@ -91,12 +91,13 @@ FNNAME (INSN) TEST_VDUP(vector, , int, s, 16, 4, 0x8000); TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check mul overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); TEST_VQDMULL_N(int, s, 16, 32, 4, 0x8000); TEST_VQDMULL_N(int, s, 32, 64, 2, 0x80000000); - fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check mul overflow)"); + fprintf(ref_file, "\n%s output:\n", + TEST_MSG " (check mul cumulative saturation)"); DUMP(TEST_MSG, int, 32, 4, PRIx32); DUMP(TEST_MSG, int, 64, 2, PRIx64); } diff --git a/ref_vqmovn.c b/ref_vqmovn.c index 303aba7..04bae5c 100644 --- a/ref_vqmovn.c +++ b/ref_vqmovn.c @@ -40,14 +40,14 @@ THE SOFTWARE. FNNAME (INSN_NAME) { /* Basic test: y=OP(x), then store the result. */ -#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##_##T2##W2(VECT_VAR(vector, T1, W2, N)); \ - vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W2), \ - xSTR(T1), W, N) +#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W2, N)); \ + vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W2), \ + xSTR(T1), W, N) #define TEST_UNARY_OP(INSN, T1, T2, W, W2, N) \ TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ @@ -81,7 +81,7 @@ FNNAME (INSN_NAME) TEST_VDUP(vector, q, uint, u, 64, 2, 0x87654321); /* Apply a unary operator named INSN_NAME */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8); TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4); TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2); @@ -92,7 +92,8 @@ FNNAME (INSN_NAME) dump_results_hex (TEST_MSG); - /* Fill input vector with arbitrary values which cause an overflow */ + /* Fill input vector with arbitrary values which cause an cumulative + saturation. */ TEST_VDUP(vector, q, int, s, 16, 8, 0x1234); TEST_VDUP(vector, q, int, s, 32, 4, 0x12345678); TEST_VDUP(vector, q, int, s, 64, 2, 0x1234567890ABLL); @@ -101,7 +102,7 @@ FNNAME (INSN_NAME) TEST_VDUP(vector, q, uint, u, 64, 2, 0x8765432187654321ULL); /* Apply a unary operator named INSN_NAME */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8); TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4); TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2); diff --git a/ref_vqmovun.c b/ref_vqmovun.c index 292d116..92c220e 100644 --- a/ref_vqmovun.c +++ b/ref_vqmovun.c @@ -40,14 +40,14 @@ THE SOFTWARE. FNNAME (INSN_NAME) { /* Basic test: y=OP(x), then store the result. */ -#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##_s##W2(VECT_VAR(vector, int, W2, N)); \ - vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##_s##W2), \ - xSTR(T1), W, N) +#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_s##W2(VECT_VAR(vector, int, W2, N)); \ + vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_s##W2), \ + xSTR(T1), W, N) #define TEST_UNARY_OP(INSN, T1, T2, W, W2, N) \ TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ @@ -72,7 +72,7 @@ FNNAME (INSN_NAME) TEST_VDUP(vector, q, int, s, 64, 2, 0x12345678); /* Apply a unary operator named INSN_NAME */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8); TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4); TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2); @@ -85,7 +85,8 @@ FNNAME (INSN_NAME) TEST_VDUP(vector, q, int, s, 64, 2, 0x8765432187654321LL); /* Apply a unary operator named INSN_NAME */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (negative input)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (negative input)"); TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8); TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4); TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2); diff --git a/ref_vqneg.c b/ref_vqneg.c index 4ae0af8..b1c9402 100644 --- a/ref_vqneg.c +++ b/ref_vqneg.c @@ -61,7 +61,7 @@ void vqneg_extra() TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); /* Apply a unary operator named INSN_NAME */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8); TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4); TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2); diff --git a/ref_vqrdmulh.c b/ref_vqrdmulh.c index 4d9ced3..db38f96 100644 --- a/ref_vqrdmulh.c +++ b/ref_vqrdmulh.c @@ -40,15 +40,15 @@ THE SOFTWARE. FNNAME (INSN) { /* vector_res = vqrdmulh(vector,vector2), then store the result. */ -#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector2, T1, W, N)); \ - vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQRDMULH1(INSN, Q, T1, T2, W, N) \ @@ -88,7 +88,7 @@ FNNAME (INSN) TEST_VDUP(vector2, q, int, s, 16, 8, 0x33); TEST_VDUP(vector2, q, int, s, 32, 4, 0x22); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQRDMULH(, int, s, 16, 4); TEST_VQRDMULH(, int, s, 32, 2); TEST_VQRDMULH(q, int, s, 16, 8); @@ -107,13 +107,13 @@ FNNAME (INSN) TEST_VDUP(vector2, q, int, s, 16, 8, 0x8000); TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check mul overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); TEST_VQRDMULH(, int, s, 16, 4); TEST_VQRDMULH(, int, s, 32, 2); TEST_VQRDMULH(q, int, s, 16, 8); TEST_VQRDMULH(q, int, s, 32, 4); - dump_results_hex2 (TEST_MSG, " (check mul overflow)"); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); TEST_VDUP(vector, , int, s, 16, 4, 0x8000); @@ -125,11 +125,11 @@ FNNAME (INSN) TEST_VDUP(vector2, q, int, s, 16, 8, 0x8001); TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000001); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check rounding overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check rounding cumulative saturation)"); TEST_VQRDMULH(, int, s, 16, 4); TEST_VQRDMULH(, int, s, 32, 2); TEST_VQRDMULH(q, int, s, 16, 8); TEST_VQRDMULH(q, int, s, 32, 4); - dump_results_hex2 (TEST_MSG, " (check rounding overflow)"); + dump_results_hex2 (TEST_MSG, " (check rounding cumulative saturation)"); } diff --git a/ref_vqrdmulh_lane.c b/ref_vqrdmulh_lane.c index 93890c4..3e4ef64 100644 --- a/ref_vqrdmulh_lane.c +++ b/ref_vqrdmulh_lane.c @@ -40,16 +40,16 @@ THE SOFTWARE. FNNAME (INSN) { /* vector_res = vqrdmulh_lane(vector,vector2,lane), then store the result. */ -#define TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector2, T1, W, N2), \ - L); \ - vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_lane_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N2), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_lane_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L) \ @@ -90,7 +90,7 @@ FNNAME (INSN) TEST_VDUP(vector2, , int, s, 32, 2, 0xBB); /* Choose lane arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2); TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1); TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3); @@ -108,13 +108,13 @@ FNNAME (INSN) TEST_VDUP(vector2, , int, s, 16, 4, 0x8000); TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check mul overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2); TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1); TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3); TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0); - dump_results_hex2 (TEST_MSG, " (check mul overflow)"); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); TEST_VDUP(vector, , int, s, 16, 4, 0x8000); @@ -124,11 +124,11 @@ FNNAME (INSN) TEST_VDUP(vector2, , int, s, 16, 4, 0x8001); TEST_VDUP(vector2, , int, s, 32, 2, 0x80000001); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check rounding overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check rounding cumulative saturation)"); TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2); TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1); TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3); TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0); - dump_results_hex2 (TEST_MSG, " (check rounding overflow)"); + dump_results_hex2 (TEST_MSG, " (check rounding cumulative saturation)"); } diff --git a/ref_vqrdmulh_n.c b/ref_vqrdmulh_n.c index 3b7e8d8..cd6a481 100644 --- a/ref_vqrdmulh_n.c +++ b/ref_vqrdmulh_n.c @@ -43,14 +43,14 @@ FNNAME (INSN) /* vector_res = vqrdmulh_n(vector,val), then store the result. */ #define TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L) \ - Set_Neon_Overflow(0); \ + Set_Neon_Cumulative_Sat(0); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ L); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ - xSTR(T1), W, N) + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L) \ @@ -81,7 +81,7 @@ FNNAME (INSN) TEST_VLOAD(vector, buffer, q, int, s, 32, 4); /* Choose multiplier arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQRDMULH_N(, int, s, 16, 4, 0x2233); TEST_VQRDMULH_N(, int, s, 32, 2, 0x12345678); TEST_VQRDMULH_N(q, int, s, 16, 8, 0xCD12); @@ -99,13 +99,13 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check mul overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); TEST_VQRDMULH_N(, int, s, 16, 4, 0x8000); TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000000); TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8000); TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000000); - dump_results_hex2 (TEST_MSG, " (check mul overflow)"); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); TEST_VDUP(vector, , int, s, 16, 4, 0x8000); @@ -113,11 +113,11 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check rounding overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check rounding cumulative saturation)"); TEST_VQRDMULH_N(, int, s, 16, 4, 0x8001); TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000001); TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8001); TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000001); - dump_results_hex2 (TEST_MSG, " (check rounding overflow)"); + dump_results_hex2 (TEST_MSG, " (check rounding cumulative saturation)"); } diff --git a/ref_vqrshl.c b/ref_vqrshl.c index 9e09619..27640f6 100644 --- a/ref_vqrshl.c +++ b/ref_vqrshl.c @@ -41,14 +41,14 @@ FNNAME (INSN) { /* Basic test: v3=vqrshl(v1,v2), then store the result. */ #define TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N) \ - Set_Neon_Overflow(0); \ + Set_Neon_Cumulative_Sat(0); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector_shift, T3, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ - xSTR(T1), W, N) + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N) \ @@ -97,7 +97,8 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, 32); TEST_VDUP(vector_shift, q, int, s, 64, 2, 64); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (with input = 0)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (with input = 0)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); dump_results_hex2 (TEST_MSG, " (with input = 0)"); @@ -111,7 +112,7 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, -13); TEST_VDUP(vector_shift, q, int, s, 64, 2, -20); - fprintf(ref_file, "\n%s overflow output:\n", + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG " (input 0 and negative shift amount)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); dump_results_hex2 (TEST_MSG, " (input 0 and negative shift amount)"); @@ -129,7 +130,7 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, 31); TEST_VDUP(vector_shift, q, int, s, 64, 2, 63); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); dump_results_hex (TEST_MSG); @@ -143,7 +144,7 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, -13); TEST_VDUP(vector_shift, q, int, s, 64, 2, -20); - fprintf(ref_file, "\n%s overflow output:\n", + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG " (negative shift amount)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); dump_results_hex2 (TEST_MSG, " (negative shift amount)"); @@ -167,7 +168,7 @@ FNNAME (INSN) TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); - /* Use -1 shift amount to check overflow with round_const */ + /* Use -1 shift amount to check cumulative saturation with round_const */ TEST_VDUP(vector_shift, , int, s, 8, 8, -1); TEST_VDUP(vector_shift, , int, s, 16, 4, -1); TEST_VDUP(vector_shift, , int, s, 32, 2, -1); @@ -177,13 +178,14 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, -1); TEST_VDUP(vector_shift, q, int, s, 64, 2, -1); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (checking overflow: shift by -1)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (checking cumulative saturation: shift by -1)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); - dump_results_hex2 (TEST_MSG, " (checking overflow: shift by -1)"); + dump_results_hex2 (TEST_MSG, + " (checking cumulative saturation: shift by -1)"); - /* Use -3 shift amount to check overflow with round_const */ + /* Use -3 shift amount to check cumulative saturation with round_const */ TEST_VDUP(vector_shift, , int, s, 8, 8, -3); TEST_VDUP(vector_shift, , int, s, 16, 4, -3); TEST_VDUP(vector_shift, , int, s, 32, 2, -3); @@ -193,10 +195,11 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, -3); TEST_VDUP(vector_shift, q, int, s, 64, 2, -3); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (checking overflow: shift by -3)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (checking cumulative saturation: shift by -3)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); - dump_results_hex2 (TEST_MSG, " (checking overflow: shift by -3)"); + dump_results_hex2 (TEST_MSG, + " (checking cumulative saturation: shift by -3)"); /* Use large shift amount */ @@ -209,10 +212,11 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, 40); TEST_VDUP(vector_shift, q, int, s, 64, 2, 70); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (checking overflow: large shift amount)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (checking cumulative saturation: large shift amount)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); - dump_results_hex2 (TEST_MSG, " (checking overflow: large shift amount)"); + dump_results_hex2 (TEST_MSG, + " (checking cumulative saturation: large shift amount)"); /* Fill input vector with negative values, to check saturation on limits */ @@ -235,10 +239,11 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, 40); TEST_VDUP(vector_shift, q, int, s, 64, 2, 70); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (checking overflow: large shift amount with negative input)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (checking cumulative saturation: large shift amount with negative input)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); - dump_results_hex2 (TEST_MSG, " (checking overflow: large shift amount with negative input)"); + dump_results_hex2 (TEST_MSG, + " (checking cumulative saturation: large shift amount with negative input)"); /* Fill input vector with negative and positive values, to check @@ -262,10 +267,11 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, -40); TEST_VDUP(vector_shift, q, int, s, 64, 2, -70); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (checking overflow: large negative shift amount)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (checking cumulative saturation: large negative shift amount)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); - dump_results_hex2 (TEST_MSG, " (checking overflow: large negative shift amount)"); + dump_results_hex2 (TEST_MSG, + " (checking cumulative saturation: large negative shift amount)"); /* Fill input vector with 0, to check saturation in case of large @@ -289,8 +295,9 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, -40); TEST_VDUP(vector_shift, q, int, s, 64, 2, -70); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (checking overflow: large shift amount with 0 input)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (checking cumulative saturation: large shift amount with 0 input)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); - dump_results_hex2 (TEST_MSG, " (checking overflow: large shift amount with 0 input)"); + dump_results_hex2 (TEST_MSG, + " (checking cumulative saturation: large shift amount with 0 input)"); } diff --git a/ref_vqrshrn_n.c b/ref_vqrshrn_n.c index c73da4c..f4438b6 100644 --- a/ref_vqrshrn_n.c +++ b/ref_vqrshrn_n.c @@ -40,15 +40,15 @@ THE SOFTWARE. FNNAME (INSN) { /* Basic test: y=vqrshrn_n(x,v), then store the result. */ -#define TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W2, N) = \ - INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ - V); \ - vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \ - VECT_VAR(vector_res, T1, W2, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V) \ @@ -86,7 +86,7 @@ FNNAME (INSN) TEST_VLOAD(vector, buffer, q, uint, u, 64, 2); /* Choose shift amount arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQRSHRN_N(int, s, 16, 8, 8, 1); TEST_VQRSHRN_N(int, s, 32, 16, 4, 1); TEST_VQRSHRN_N(int, s, 64, 32, 2, 2); @@ -106,7 +106,7 @@ FNNAME (INSN) TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); /* shift by 3 to exercise saturation code in the lib */ - fprintf(ref_file, "\n%s overflow output:\n", + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG " (check saturation: shift by 3)"); TEST_VQRSHRN_N(int, s, 16, 8, 8, 3); TEST_VQRSHRN_N(int, s, 32, 16, 4, 3); @@ -120,7 +120,7 @@ FNNAME (INSN) /* shift by max to exercise saturation code in the lib */ - fprintf(ref_file, "\n%s overflow output:\n", + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG " (check saturation: shift by max)"); TEST_VQRSHRN_N(int, s, 16, 8, 8, 8); TEST_VQRSHRN_N(int, s, 32, 16, 4, 16); diff --git a/ref_vqrshrun_n.c b/ref_vqrshrun_n.c index c525f4a..abd8890 100644 --- a/ref_vqrshrun_n.c +++ b/ref_vqrshrun_n.c @@ -40,15 +40,15 @@ THE SOFTWARE. FNNAME (INSN) { /* Basic test: y=vqrshrun_n(x,v), then store the result. */ -#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, uint, W2, N) = \ - INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ - V); \ - vst1_u##W2(VECT_VAR(result, uint, W2, N), \ - VECT_VAR(vector_res, uint, W2, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, uint, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_u##W2(VECT_VAR(result, uint, W2, N), \ + VECT_VAR(vector_res, uint, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V) \ @@ -78,7 +78,8 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 64, 2, -4); /* Choose shift amount arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (negative input)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (negative input)"); TEST_VQRSHRUN_N(int, s, 16, 8, 8, 3); TEST_VQRSHRUN_N(int, s, 32, 16, 4, 4); TEST_VQRSHRUN_N(int, s, 64, 32, 2, 2); @@ -92,22 +93,23 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); /* shift by 1 */ - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check saturation/overflow: shift by 1)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation: shift by 1)"); TEST_VQRSHRUN_N(int, s, 16, 8, 8, 1); TEST_VQRSHRUN_N(int, s, 32, 16, 4, 1); TEST_VQRSHRUN_N(int, s, 64, 32, 2, 1); - dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by 1)"); + dump_results_hex2 (TEST_MSG, " (check cumulative saturation: shift by 1)"); /* shift by max */ - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check saturation/overflow: shift by max, positive input)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation: shift by max, positive input)"); TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8); TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16); TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32); - dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by max, positive input)"); + dump_results_hex2 (TEST_MSG, + " (check cumulative saturation: shift by max, positive input)"); /* Fill input vector with min value, to check saturation on limits */ @@ -116,13 +118,14 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); /* shift by max */ - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check saturation/overflow: shift by max, negative input)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation: shift by max, negative input)"); TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8); TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16); TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32); - dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by max, negative input)"); + dump_results_hex2 (TEST_MSG, + " (check cumulative saturation: shift by max, negative input)"); /* Fill input vector with positive values, to check normal case */ TEST_VDUP(vector, q, int, s, 16, 8, 0x1234); @@ -130,7 +133,7 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF); /* shift arbitrary amount */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQRSHRUN_N(int, s, 16, 8, 8, 6); TEST_VQRSHRUN_N(int, s, 32, 16, 4, 7); TEST_VQRSHRUN_N(int, s, 64, 32, 2, 8); diff --git a/ref_vqshl.c b/ref_vqshl.c index 7f9c02b..edbd796 100644 --- a/ref_vqshl.c +++ b/ref_vqshl.c @@ -41,14 +41,14 @@ FNNAME (INSN) { /* Basic test: v3=vqshl(v1,v2), then store the result. */ #define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N) \ - Set_Neon_Overflow(0); \ + Set_Neon_Cumulative_Sat(0); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector_shift, T3, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ - xSTR(T1), W, N) + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N) \ @@ -113,7 +113,8 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 64, 2, 64); TEST_VSET_LANE(vector_shift, q, int, s, 64, 2, 1, 62); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (with input = 0)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (with input = 0)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); dump_results_hex2 (TEST_MSG, " (with input = 0)"); @@ -128,7 +129,7 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, -13); TEST_VDUP(vector_shift, q, int, s, 64, 2, -20); - fprintf(ref_file, "\n%s overflow output:\n", + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG " (input 0 and negative shift amount)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); dump_results_hex2 (TEST_MSG, " (input 0 and negative shift amount)"); @@ -146,7 +147,7 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, 32); TEST_VDUP(vector_shift, q, int, s, 64, 2, 63); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); dump_results_hex (TEST_MSG); @@ -160,7 +161,7 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, -13); TEST_VDUP(vector_shift, q, int, s, 64, 2, -20); - fprintf(ref_file, "\n%s overflow output:\n", + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG " (negative shift amount)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); dump_results_hex2 (TEST_MSG, " (negative shift amount)"); @@ -175,7 +176,7 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, 32); TEST_VDUP(vector_shift, q, int, s, 64, 2, 64); - fprintf(ref_file, "\n%s overflow output:\n", + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG " (large shift amount, negative input)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); dump_results_hex2 (TEST_MSG, " (large shift amount, negative input)"); @@ -208,10 +209,10 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, -1); TEST_VDUP(vector_shift, q, int, s, 64, 2, -1); - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check saturation/overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); - dump_results_hex2 (TEST_MSG, " (check saturation/overflow)"); + dump_results_hex2 (TEST_MSG, " (check cumulative saturation)"); /* Use large shift amounts */ TEST_VDUP(vector_shift, , int, s, 8, 8, 8); @@ -223,7 +224,7 @@ FNNAME (INSN) TEST_VDUP(vector_shift, q, int, s, 32, 4, 32); TEST_VDUP(vector_shift, q, int, s, 64, 2, 64); - fprintf(ref_file, "\n%s overflow output:\n", + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG " (large shift amount, positive input)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); dump_results_hex2 (TEST_MSG, " (large shift amount, positive input)"); @@ -233,7 +234,7 @@ FNNAME (INSN) TEST_VDUP(vector_shift, , int, s, 64, 1, 64); TEST_VDUP(vector, q, int, s, 64, 2, 10); TEST_VDUP(vector_shift, q, int, s, 64, 2, 64); - fprintf(ref_file, "\n%s overflow output:\n", + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG " (check saturation on 64 bits)"); TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); dump_results_hex2 (TEST_MSG, " (check saturation on 64 bits)"); diff --git a/ref_vqshl_n.c b/ref_vqshl_n.c index 731c4ca..490938a 100644 --- a/ref_vqshl_n.c +++ b/ref_vqshl_n.c @@ -41,14 +41,14 @@ FNNAME (INSN) { /* Basic test: v2=vqshl_n(v1,v), then store the result. */ #define TEST_VQSHL_N2(INSN, Q, T1, T2, W, N, V) \ - Set_Neon_Overflow(0); \ + Set_Neon_Cumulative_Sat(0); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ V); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ - xSTR(T1), W, N) + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N) \ @@ -67,7 +67,7 @@ FNNAME (INSN) TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); /* Choose shift amount arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQSHL_N(, int, s, 8, 8, 2); TEST_VQSHL_N(, int, s, 16, 4, 1); TEST_VQSHL_N(, int, s, 32, 2, 1); @@ -108,7 +108,8 @@ FNNAME (INSN) TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (check saturation with large positive input)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check saturation with large positive input)"); TEST_VQSHL_N(, int, s, 8, 8, 2); TEST_VQSHL_N(, int, s, 16, 4, 1); TEST_VQSHL_N(, int, s, 32, 2, 1); diff --git a/ref_vqshlu_n.c b/ref_vqshlu_n.c index c8b14a6..29a7865 100644 --- a/ref_vqshlu_n.c +++ b/ref_vqshlu_n.c @@ -41,14 +41,14 @@ FNNAME (INSN) { /* Basic test: v2=vqshlu_n(v1,v), then store the result. */ #define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V) \ - Set_Neon_Overflow(0); \ + Set_Neon_Cumulative_Sat(0); \ VECT_VAR(vector_res, T3, W, N) = \ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ V); \ vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N), \ VECT_VAR(vector_res, T3, W, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ - xSTR(T1), W, N) + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V) \ @@ -76,7 +76,8 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 64, 2, -4); /* Choose shift amount arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (negative input)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (negative input)"); TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2); TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1); TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1); @@ -101,8 +102,8 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFULL); /* shift by 1 */ - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check saturation/overflow: shift by 1)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation: shift by 1)"); TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1); TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1); TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1); @@ -113,11 +114,11 @@ FNNAME (INSN) TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1); TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 1); - dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by 1)"); + dump_results_hex2 (TEST_MSG, " (check cumulative saturation: shift by 1)"); /* shift by 2 to force saturation */ - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check saturation/overflow: shift by 2)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation: shift by 2)"); TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2); TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2); TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 2); @@ -128,7 +129,7 @@ FNNAME (INSN) TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 2); TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2); - dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by 2)"); + dump_results_hex2 (TEST_MSG, " (check cumulative saturation: shift by 2)"); /* Fill input vector with positive values, to check normal case */ TEST_VDUP(vector, , int, s, 8, 8, 1); @@ -141,7 +142,7 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 64, 2, 8); /* shift arbitrary amount */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1); TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2); TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 3); diff --git a/ref_vqshrn_n.c b/ref_vqshrn_n.c index 21c080c..3f629d3 100644 --- a/ref_vqshrn_n.c +++ b/ref_vqshrn_n.c @@ -40,15 +40,15 @@ THE SOFTWARE. FNNAME (INSN) { /* Basic test: y=vqshrn_n(x,v), then store the result. */ -#define TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, T1, W2, N) = \ - INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ - V); \ - vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \ - VECT_VAR(vector_res, T1, W2, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V) \ @@ -86,7 +86,7 @@ FNNAME (INSN) TEST_VLOAD(vector, buffer, q, uint, u, 64, 2); /* Choose shift amount arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQSHRN_N(int, s, 16, 8, 8, 1); TEST_VQSHRN_N(int, s, 32, 16, 4, 1); TEST_VQSHRN_N(int, s, 64, 32, 2, 2); @@ -107,7 +107,7 @@ FNNAME (INSN) TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); /* shift by 3 to exercise saturation code in the lib */ - fprintf(ref_file, "\n%s overflow output:\n", + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG " (check saturation: shift by 3)"); TEST_VQSHRN_N(int, s, 16, 8, 8, 3); TEST_VQSHRN_N(int, s, 32, 16, 4, 3); @@ -121,7 +121,7 @@ FNNAME (INSN) /* shift by max to exercise saturation code in the lib */ - fprintf(ref_file, "\n%s overflow output:\n", + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG " (check saturation: shift by max)"); TEST_VQSHRN_N(int, s, 16, 8, 8, 8); TEST_VQSHRN_N(int, s, 32, 16, 4, 16); diff --git a/ref_vqshrun_n.c b/ref_vqshrun_n.c index bdf47bb..d746af6 100644 --- a/ref_vqshrun_n.c +++ b/ref_vqshrun_n.c @@ -40,15 +40,15 @@ THE SOFTWARE. FNNAME (INSN) { /* Basic test: y=vqshrun_n(x,v), then store the result. */ -#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V) \ - Set_Neon_Overflow(0); \ - VECT_VAR(vector_res, uint, W2, N) = \ - INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ - V); \ - vst1_u##W2(VECT_VAR(result, uint, W2, N), \ - VECT_VAR(vector_res, uint, W2, N)); \ - dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0); \ + VECT_VAR(vector_res, uint, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_u##W2(VECT_VAR(result, uint, W2, N), \ + VECT_VAR(vector_res, uint, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V) \ @@ -78,7 +78,8 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 64, 2, -4); /* Choose shift amount arbitrarily */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (negative input)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (negative input)"); TEST_VQSHRUN_N(int, s, 16, 8, 8, 3); TEST_VQSHRUN_N(int, s, 32, 16, 4, 4); TEST_VQSHRUN_N(int, s, 64, 32, 2, 2); @@ -92,13 +93,13 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); /* shift by 1 */ - fprintf(ref_file, "\n%s overflow output:\n", - TEST_MSG " (check saturation/overflow)"); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation)"); TEST_VQSHRUN_N(int, s, 16, 8, 8, 1); TEST_VQSHRUN_N(int, s, 32, 16, 4, 1); TEST_VQSHRUN_N(int, s, 64, 32, 2, 1); - dump_results_hex2 (TEST_MSG, " (check saturation/overflow)"); + dump_results_hex2 (TEST_MSG, " (check cumulative saturation)"); /* Fill input vector with positive values, to check normal case */ TEST_VDUP(vector, q, int, s, 16, 8, 0x1234); @@ -106,7 +107,7 @@ FNNAME (INSN) TEST_VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF); /* shift arbitrary amount */ - fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); TEST_VQSHRUN_N(int, s, 16, 8, 8, 6); TEST_VQSHRUN_N(int, s, 32, 16, 4, 7); TEST_VQSHRUN_N(int, s, 64, 32, 2, 8); diff --git a/ref_vqsub.c b/ref_vqsub.c index 3cd28ae..f765b27 100644 --- a/ref_vqsub.c +++ b/ref_vqsub.c @@ -49,7 +49,9 @@ void vqsub_64(void) TEST_VDUP(vector2, q, int, s, 64, 2, 0x0); TEST_VDUP(vector2, q, uint, u, 64, 2, 0x0); - fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG); + fprintf(ref_file, + "\n%s 64 bits saturation cumulative saturation output:\n", + TEST_MSG); TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); @@ -67,7 +69,9 @@ void vqsub_64(void) TEST_VDUP(vector2, q, int, s, 64, 2, 0x44); TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88); - fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG); + fprintf(ref_file, + "\n%s 64 bits saturation cumulative saturation output:\n", + TEST_MSG); TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); @@ -89,7 +93,9 @@ void vqsub_64(void) TEST_VDUP(vector2, q, uint, u, 64, 2, 0xffffffffffffffffULL); - fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG); + fprintf(ref_file, + "\n%s 64 bits saturation cumulative saturation output:\n", + TEST_MSG); TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); @@ -138,7 +144,8 @@ void vqsub_64(void) TEST_VDUP(vector2, q, uint, u, 16, 8, 0x20); TEST_VDUP(vector2, q, uint, u, 32, 4, 0x20); - fprintf(ref_file, "\n%s less than 64 bits saturation overflow output:\n", + fprintf(ref_file, + "\n%s less than 64 bits saturation cumulative saturation output:\n", TEST_MSG); TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8); TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4); diff --git a/stm-arm-neon-ref.h b/stm-arm-neon-ref.h index ba29f10..43d2ce7 100644 --- a/stm-arm-neon-ref.h +++ b/stm-arm-neon-ref.h @@ -562,36 +562,37 @@ typedef union { #ifdef __ARMCC_VERSION register _ARM_FPSCR _afpscr_for_qc __asm("fpscr"); -#define Neon_Overflow _afpscr_for_qc.b.QC -#define Set_Neon_Overflow(x) {Neon_Overflow = (x);} +#define Neon_Cumulative_Sat _afpscr_for_qc.b.QC +#define Set_Neon_Cumulative_Sat(x) {Neon_Cumulative_Sat = (x);} #else /* GCC/ARM does not know this register */ -#define Neon_Overflow __read_neon_overflow() -static int __read_neon_overflow() { - _ARM_FPSCR _afpscr_for_qc; - asm("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); - return _afpscr_for_qc.b.QC; +#define Neon_Cumulative_Sat __read_neon_cumulative_sat() +static int __read_neon_cumulative_sat() { + _ARM_FPSCR _afpscr_for_qc; + asm("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); + return _afpscr_for_qc.b.QC; } -#define Set_Neon_Overflow(x) __set_neon_overflow((x)) -static void __set_neon_overflow(int x) { - _ARM_FPSCR _afpscr_for_qc; - asm("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); - _afpscr_for_qc.b.QC = x; - asm("vmsr fpscr,%0" : : "r" (_afpscr_for_qc)); - return; +#define Set_Neon_Cumulative_Sat(x) __set_neon_cumulative_sat((x)) +static void __set_neon_cumulative_sat(int x) { + _ARM_FPSCR _afpscr_for_qc; + asm("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); + _afpscr_for_qc.b.QC = x; + asm("vmsr fpscr,%0" : : "r" (_afpscr_for_qc)); + return; } #endif #endif /* STM_ARM_NEON_MODELS */ -static void dump_neon_overflow(const char* msg, const char *name, - const char* t1, int w, int n) +static void dump_neon_cumulative_sat(const char* msg, const char *name, + const char* t1, int w, int n) { - fprintf(ref_file, "%s:%d:%s Neon overflow %d\n", msg, result_idx++, - name, Neon_Overflow); - fprintf(gcc_tests_file, "int VECT_VAR(expected_overflow,%s,%d,%d) = %d;\n", \ - t1, w, n, Neon_Overflow); + fprintf(ref_file, "%s:%d:%s Neon cumulative saturation %d\n", msg, result_idx++, + name, Neon_Cumulative_Sat); + fprintf(gcc_tests_file, + "int VECT_VAR(expected_cumulative_sat,%s,%d,%d) = %d;\n", + t1, w, n, Neon_Cumulative_Sat); } /* Clean output buffers before execution */ |