diff options
author | Victor Khimenko <khim@google.com> | 2016-10-10 17:38:17 +0200 |
---|---|---|
committer | Victor Khimenko <khim@google.com> | 2016-10-10 19:29:09 +0200 |
commit | 2481784ced1961fde6c6605f31fa1134cfdd3e5f (patch) | |
tree | 55ed738873e970ac1f115e87fb151fb50399f300 | |
parent | 230c1021ca6cef9677c4e8c87a92016048183b6f (diff) | |
parent | d98beba6c8fa7b38fb62cb799c9c7de355937cf3 (diff) | |
download | platform_external_arm-neon-tests-2481784ced1961fde6c6605f31fa1134cfdd3e5f.tar.gz platform_external_arm-neon-tests-2481784ced1961fde6c6605f31fa1134cfdd3e5f.tar.bz2 platform_external_arm-neon-tests-2481784ced1961fde6c6605f31fa1134cfdd3e5f.zip |
Initial import of upstream sources
BUG=30471686
Change-Id: I1270bfb2ee8f7f43683104e92f131dd42222ffd9
-rw-r--r-- | Init.s | 259 | ||||
-rw-r--r-- | InitCache.s | 52 | ||||
-rw-r--r-- | LICENSE | 10 | ||||
-rw-r--r-- | Makefile | 166 | ||||
-rw-r--r-- | Makefile.gcc | 124 | ||||
-rw-r--r-- | README | 60 | ||||
-rw-r--r-- | armscript.inc | 14 | ||||
-rw-r--r-- | compute_ref.axf | bin | 0 -> 3551484 bytes | |||
-rw-r--r-- | compute_ref.c | 370 | ||||
-rwxr-xr-x | compute_ref.gccarm | bin | 0 -> 4524595 bytes | |||
-rw-r--r-- | compute_ref.gccarm-rvct | bin | 0 -> 4818960 bytes | |||
-rw-r--r-- | compute_ref_data.c | 566 | ||||
-rw-r--r-- | expected_input4gcc-nofp16.txt | 7087 | ||||
-rw-r--r-- | expected_input4gcc.txt | 7627 | ||||
-rw-r--r-- | license.html | 456 | ||||
-rw-r--r-- | ref-rvct-all.txt | 8833 | ||||
-rw-r--r-- | ref-rvct-neon-nofp16.txt | 7356 | ||||
-rw-r--r-- | ref-rvct-neon.txt | 7900 | ||||
-rw-r--r-- | ref_dsp.c | 421 | ||||
-rw-r--r-- | ref_dspfns.c | 1502 | ||||
-rw-r--r-- | ref_integer.c | 290 | ||||
-rw-r--r-- | ref_v_binary_op.c | 88 | ||||
-rw-r--r-- | ref_v_binary_sat_op.c | 110 | ||||
-rw-r--r-- | ref_v_comp_f_op.c | 89 | ||||
-rw-r--r-- | ref_v_comp_op.c | 221 | ||||
-rw-r--r-- | ref_v_unary_op.c | 91 | ||||
-rw-r--r-- | ref_v_unary_sat_op.c | 96 | ||||
-rw-r--r-- | ref_vaba.c | 125 | ||||
-rw-r--r-- | ref_vabal.c | 128 | ||||
-rw-r--r-- | ref_vabd.c | 133 | ||||
-rw-r--r-- | ref_vabdl.c | 93 | ||||
-rw-r--r-- | ref_vabs.c | 54 | ||||
-rw-r--r-- | ref_vadd.c | 60 | ||||
-rw-r--r-- | ref_vaddhn.c | 94 | ||||
-rw-r--r-- | ref_vaddl.c | 104 | ||||
-rw-r--r-- | ref_vaddw.c | 104 | ||||
-rw-r--r-- | ref_vand.c | 29 | ||||
-rw-r--r-- | ref_vbic.c | 29 | ||||
-rw-r--r-- | ref_vbsl.c | 104 | ||||
-rw-r--r-- | ref_vcage.c | 29 | ||||
-rw-r--r-- | ref_vcagt.c | 29 | ||||
-rw-r--r-- | ref_vcale.c | 29 | ||||
-rw-r--r-- | ref_vcalt.c | 29 | ||||
-rw-r--r-- | ref_vceq.c | 63 | ||||
-rw-r--r-- | ref_vcge.c | 29 | ||||
-rw-r--r-- | ref_vcgt.c | 29 | ||||
-rw-r--r-- | ref_vcle.c | 29 | ||||
-rw-r--r-- | ref_vcls.c | 107 | ||||
-rw-r--r-- | ref_vclt.c | 29 | ||||
-rw-r--r-- | ref_vclz.c | 142 | ||||
-rw-r--r-- | ref_vcnt.c | 88 | ||||
-rw-r--r-- | ref_vcombine.c | 100 | ||||
-rw-r--r-- | ref_vcreate.c | 119 | ||||
-rw-r--r-- | ref_vcvt.c | 236 | ||||
-rw-r--r-- | ref_vdup.c | 116 | ||||
-rw-r--r-- | ref_vdup_lane.c | 81 | ||||
-rw-r--r-- | ref_veor.c | 29 | ||||
-rw-r--r-- | ref_vext.c | 108 | ||||
-rw-r--r-- | ref_vget_high.c | 76 | ||||
-rw-r--r-- | ref_vget_lane.c | 109 | ||||
-rw-r--r-- | ref_vget_low.c | 76 | ||||
-rw-r--r-- | ref_vhadd.c | 31 | ||||
-rw-r--r-- | ref_vhsub.c | 31 | ||||
-rw-r--r-- | ref_vld1.c | 64 | ||||
-rw-r--r-- | ref_vld1_dup.c | 69 | ||||
-rw-r--r-- | ref_vld1_lane.c | 130 | ||||
-rw-r--r-- | ref_vldX.c | 221 | ||||
-rw-r--r-- | ref_vldX_dup.c | 187 | ||||
-rw-r--r-- | ref_vldX_lane.c | 243 | ||||
-rw-r--r-- | ref_vmax.c | 153 | ||||
-rw-r--r-- | ref_vmin.c | 29 | ||||
-rw-r--r-- | ref_vmla.c | 144 | ||||
-rw-r--r-- | ref_vmla_lane.c | 125 | ||||
-rw-r--r-- | ref_vmla_n.c | 112 | ||||
-rw-r--r-- | ref_vmlal.c | 119 | ||||
-rw-r--r-- | ref_vmlal_lane.c | 101 | ||||
-rw-r--r-- | ref_vmlal_n.c | 92 | ||||
-rw-r--r-- | ref_vmls.c | 29 | ||||
-rw-r--r-- | ref_vmls_lane.c | 29 | ||||
-rw-r--r-- | ref_vmls_n.c | 29 | ||||
-rw-r--r-- | ref_vmlsl.c | 29 | ||||
-rw-r--r-- | ref_vmlsl_lane.c | 29 | ||||
-rw-r--r-- | ref_vmlsl_n.c | 29 | ||||
-rw-r--r-- | ref_vmovl.c | 60 | ||||
-rw-r--r-- | ref_vmovn.c | 60 | ||||
-rw-r--r-- | ref_vmul.c | 134 | ||||
-rw-r--r-- | ref_vmul_lane.c | 105 | ||||
-rw-r--r-- | ref_vmul_n.c | 91 | ||||
-rw-r--r-- | ref_vmull.c | 81 | ||||
-rw-r--r-- | ref_vmull_lane.c | 84 | ||||
-rw-r--r-- | ref_vmull_n.c | 82 | ||||
-rw-r--r-- | ref_vmvn.c | 120 | ||||
-rw-r--r-- | ref_vneg.c | 54 | ||||
-rw-r--r-- | ref_vorn.c | 29 | ||||
-rw-r--r-- | ref_vorr.c | 29 | ||||
-rw-r--r-- | ref_vpadal.c | 140 | ||||
-rw-r--r-- | ref_vpadd.c | 96 | ||||
-rw-r--r-- | ref_vpaddl.c | 113 | ||||
-rw-r--r-- | ref_vpmax.c | 29 | ||||
-rw-r--r-- | ref_vpmin.c | 29 | ||||
-rw-r--r-- | ref_vqabs.c | 73 | ||||
-rw-r--r-- | ref_vqadd.c | 157 | ||||
-rw-r--r-- | ref_vqdmlal.c | 98 | ||||
-rw-r--r-- | ref_vqdmlal_lane.c | 105 | ||||
-rw-r--r-- | ref_vqdmlal_n.c | 92 | ||||
-rw-r--r-- | ref_vqdmlsl.c | 29 | ||||
-rw-r--r-- | ref_vqdmlsl_lane.c | 29 | ||||
-rw-r--r-- | ref_vqdmlsl_n.c | 29 | ||||
-rw-r--r-- | ref_vqdmulh.c | 115 | ||||
-rw-r--r-- | ref_vqdmulh_lane.c | 116 | ||||
-rw-r--r-- | ref_vqdmulh_n.c | 110 | ||||
-rw-r--r-- | ref_vqdmull.c | 93 | ||||
-rw-r--r-- | ref_vqdmull_lane.c | 109 | ||||
-rw-r--r-- | ref_vqdmull_n.c | 104 | ||||
-rw-r--r-- | ref_vqmovn.c | 114 | ||||
-rw-r--r-- | ref_vqmovun.c | 95 | ||||
-rw-r--r-- | ref_vqneg.c | 73 | ||||
-rw-r--r-- | ref_vqrdmulh.c | 135 | ||||
-rw-r--r-- | ref_vqrdmulh_lane.c | 134 | ||||
-rw-r--r-- | ref_vqrdmulh_n.c | 124 | ||||
-rw-r--r-- | ref_vqrshl.c | 303 | ||||
-rw-r--r-- | ref_vqrshrn_n.c | 134 | ||||
-rw-r--r-- | ref_vqrshrun_n.c | 142 | ||||
-rw-r--r-- | ref_vqshl.c | 241 | ||||
-rw-r--r-- | ref_vqshl_n.c | 132 | ||||
-rw-r--r-- | ref_vqshlu_n.c | 157 | ||||
-rw-r--r-- | ref_vqshrn_n.c | 135 | ||||
-rw-r--r-- | ref_vqshrun_n.c | 116 | ||||
-rw-r--r-- | ref_vqsub.c | 163 | ||||
-rw-r--r-- | ref_vraddhn.c | 29 | ||||
-rw-r--r-- | ref_vrecpe.c | 143 | ||||
-rw-r--r-- | ref_vrecps.c | 120 | ||||
-rw-r--r-- | ref_vreinterpret.c | 398 | ||||
-rw-r--r-- | ref_vrev.c | 106 | ||||
-rw-r--r-- | ref_vrhadd.c | 31 | ||||
-rw-r--r-- | ref_vrshl.c | 220 | ||||
-rw-r--r-- | ref_vrshr_n.c | 217 | ||||
-rw-r--r-- | ref_vrshrn_n.c | 119 | ||||
-rw-r--r-- | ref_vrsqrte.c | 143 | ||||
-rw-r--r-- | ref_vrsqrts.c | 120 | ||||
-rw-r--r-- | ref_vrsra_n.c | 238 | ||||
-rw-r--r-- | ref_vrsubhn.c | 29 | ||||
-rw-r--r-- | ref_vsXi_n.c | 116 | ||||
-rw-r--r-- | ref_vset_lane.c | 82 | ||||
-rw-r--r-- | ref_vshl.c | 98 | ||||
-rw-r--r-- | ref_vshl_n.c | 75 | ||||
-rw-r--r-- | ref_vshll_n.c | 64 | ||||
-rw-r--r-- | ref_vshr_n.c | 76 | ||||
-rw-r--r-- | ref_vshrn_n.c | 81 | ||||
-rw-r--r-- | ref_vsli_n.c | 96 | ||||
-rw-r--r-- | ref_vsra_n.c | 97 | ||||
-rw-r--r-- | ref_vsri_n.c | 96 | ||||
-rw-r--r-- | ref_vst1_lane.c | 85 | ||||
-rw-r--r-- | ref_vstX_lane.c | 243 | ||||
-rw-r--r-- | ref_vsub.c | 60 | ||||
-rw-r--r-- | ref_vsubhn.c | 29 | ||||
-rw-r--r-- | ref_vsubl.c | 29 | ||||
-rw-r--r-- | ref_vsubw.c | 29 | ||||
-rw-r--r-- | ref_vtbX.c | 227 | ||||
-rw-r--r-- | ref_vtrn.c | 29 | ||||
-rw-r--r-- | ref_vtst.c | 99 | ||||
-rw-r--r-- | ref_vuzp.c | 171 | ||||
-rw-r--r-- | ref_vzip.c | 29 | ||||
-rw-r--r-- | retarget.c | 46 | ||||
-rw-r--r-- | scatter.scat | 29 | ||||
-rw-r--r-- | stm-arm-neon-ref.h | 815 |
166 files changed, 58059 insertions, 0 deletions
@@ -0,0 +1,259 @@ +;================================================================== +; Copyright ARM Ltd 2005. All rights reserved. +; +; Cortex-A8 Dhrystone example - Startup Code +;================================================================== + + PRESERVE8 + AREA CORTEXA8, CODE, READONLY + + ENTRY + +; Standard definitions of mode bits and interrupt (I & F) flags in PSRs + +Mode_USR EQU 0x10 +Mode_FIQ EQU 0x11 +Mode_IRQ EQU 0x12 +Mode_SVC EQU 0x13 +Mode_ABT EQU 0x17 +Mode_UNDEF EQU 0x1B +Mode_SYS EQU 0x1F + +I_Bit EQU 0x80 ; when I bit is set, IRQ is disabled +F_Bit EQU 0x40 ; when F bit is set, FIQ is disabled + +;================================================================== +; Disable Cortex-A8 MMU if enabled +;================================================================== + + EXPORT Start + +Start + + MRC p15, 0, r0, c1, c0, 0 ; Read CP15 Control Register into r0 + TST r0, #0x1 ; Is the MMU enabled? + BICNE r0, r0, #0x1 ; Clear bit 0 + MCRNE p15, 0, r0, c1, c0, 0 ; Write value back + +;================================================================== +; Initialise Supervisor Mode Stack +; Note stack must be 8 byte aligned. +;================================================================== + + IMPORT ||Image$$STACK$$ZI$$Limit|| ; Linker symbol from scatter file + LDR SP, =||Image$$STACK$$ZI$$Limit|| + +;================================================================== +; TLB maintenance, Invalidate Data and Instruction TLB's +;================================================================== + + MOV r0,#0 + MCR p15, 0, r0, c8, c7, 0 ; Cortex-A8 I-TLB and D-TLB invalidation + +;================================================================== +; Cache Invalidation code for Cortex-A8 +;================================================================== + + ; Invalidate L1 Instruction Cache + + MRC p15, 1, r0, c0, c0, 1 ; Read CLIDR + TST r0, #0x3 ; Harvard Cache? + MOV r0, #0 + MCRNE p15, 0, r0, c7, c5, 0 ; Invalidate Instruction Cache + + ; Invalidate Data/Unified Caches + + MRC p15, 1, r0, c0, c0, 1 ; Read CLIDR + ANDS r3, r0, #&7000000 + MOV r3, r3, LSR #23 ; Total cache levels << 1 + BEQ Finished + + MOV r10, #0 ; R10 holds current cache level << 1 +Loop1 ADD r2, r10, r10, LSR #1 ; R2 holds cache "Set" position + MOV r1, r0, LSR r2 ; Bottom 3 bits are the Cache-type for this level + AND r1, R1, #7 ; Get those 3 bits alone + CMP r1, #2 + BLT Skip ; No cache or only instruction cache at this level + + MCR p15, 2, r10, c0, c0, 0 ; Write the Cache Size selection register + MOV r1, #0 + MCR p15, 0, r1, c7, c5, 4 ; PrefetchFlush to sync the change to the CacheSizeID reg + MRC p15, 1, r1, c0, c0, 0 ; Reads current Cache Size ID register + AND r2, r1, #&7 ; Extract the line length field + ADD r2, r2, #4 ; Add 4 for the line length offset (log2 16 bytes) + LDR r4, =0x3FF + ANDS r4, r4, r1, LSR #3 ; R4 is the max number on the way size (right aligned) + CLZ r5, r4 ; R5 is the bit position of the way size increment + LDR r7, =0x00007FFF + ANDS r7, r7, r1, LSR #13 ; R7 is the max number of the index size (right aligned) + +Loop2 MOV r9, r4 ; R9 working copy of the max way size (right aligned) + +Loop3 ORR r11, r10, r9, LSL r5 ; Factor in the Way number and cache number into R11 + ORR r11, r11, r7, LSL r2 ; Factor in the Set number + MCR p15, 0, r11, c7, c14, 2 ; Clean and Invalidate by set/way + SUBS r9, r9, #1 ; Decrement the Way number + BGE Loop3 + SUBS r7, r7, #1 ; Decrement the Set number + BGE Loop2 +Skip ADD r10, r10, #2 ; increment the cache number + CMP r3, r10 + BGT Loop1 + +Finished + + +;=================================================================== +; Cortex-A8 MMU Configuration +; Set translation table base +;=================================================================== + + + IMPORT ||Image$$TTB$$ZI$$Base|| ; from scatter file.; + + ; Cortex-A8 supports two translation tables + ; Configure translation table base (TTB) control register cp15,c2 + ; to a value of all zeros, indicates we are using TTB register 0. + + MOV r0,#0x0 + MCR p15, 0, r0, c2, c0, 2 + + ; write the address of our page table base to TTB register 0.; + ; We are setting to outer-noncachable [4:3] is zero + + LDR r0,=||Image$$TTB$$ZI$$Base|| + MCR p15, 0, r0, c2, c0, 0 + + +;=================================================================== +; Cortex-A8 PAGE TABLE generation, using standard Arch v6 tables +; +; AP[11:10] - Access Permissions = b11, Read/Write Access +; Domain[8:5] - Domain = b1111, Domain 15 +; Type[1:0] - Descriptor Type = b10, 1Mb descriptors +; +; TEX C B +; 000 0 0 Strongly Ordered +; 001 1 1 Outer and inner write back, write allocate Normal +;=================================================================== + + LDR r1,=0xfff ; loop counter + LDR r2,=2_00000000000000000000110111100010 + + ; r0 contains the address of the translation table base + ; r1 is loop counter + ; r2 is level1 descriptor (bits 19:0) + + ; use loop counter to create 4096 individual table entries + ; this writes from address 0x7FFC down to 0x4000 in word steps (4bytes). + +init_ttb_1 + + ORR r3, r2, r1, LSL#20 ; r3 now contains full level1 descriptor to write + STR r3, [r0, r1, LSL#2] ; str table entry at TTB base + loopcount*4 + SUBS r1, r1, #1 ; decrement loop counter + BPL init_ttb_1 + + ; In this example we will change the cacheable attribute in the first descriptor. + ; Virtual memory from 0 to 1MB will be cacheable (write back mode). + ; TEX[14:12]=001 and CB[3:2]= 11, Outer and inner write back, write allocate. + + ORR r3,r3,#2_0000000001100 ; Set CB bits + ORR r3,r3,#2_1000000000000 ; Set TEX bits + STR r3,[r0] + + ADD r2, r3, #0x100000 ; alter r3 to have correct base address for second descriptor (flat mapping) + STR r2, [r0, #4] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x200000 ; alter r3 to have correct base address for 3 descriptor (flat mapping) + STR r2, [r0, #8] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x300000 ; alter r3 to have correct base address for 4 descriptor (flat mapping) + STR r2, [r0, #0xc] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x400000 ; alter r3 to have correct base address for 5 descriptor (flat mapping) + STR r2, [r0, #0x10] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x500000 ; alter r3 to have correct base address for 6 descriptor (flat mapping) + STR r2, [r0, #0x14] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x600000 ; alter r3 to have correct base address for 7 descriptor (flat mapping) + STR r2, [r0, #0x18] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x700000 ; alter r3 to have correct base address for 8 descriptor (flat mapping) + STR r2, [r0, #0x1c] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x800000 ; alter r3 to have correct base address for 9 descriptor (flat mapping) + STR r2, [r0, #0x20] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x900000 ; alter r3 to have correct base address for 10 descriptor (flat mapping) + STR r2, [r0, #0x24] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0xa00000 ; alter r3 to have correct base address for 11 descriptor (flat mapping) + STR r2, [r0, #0x28] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0xb00000 ; alter r3 to have correct base address for 12 descriptor (flat mapping) + STR r2, [r0, #0x2c] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0xc00000 ; alter r3 to have correct base address for 13 descriptor (flat mapping) + STR r2, [r0, #0x30] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + +;=================================================================== +; Setup domain control register - Enable all domains to client mode +;=================================================================== + + MRC p15, 0, r0, c3, c0, 0 ; Read Domain Access Control Register + LDR r0, =0x55555555 ; Initialize every domain entry to b01 (client) + MCR p15, 0, r0, c3, c0, 0 ; Write Domain Access Control Register + +;=================================================================== +; Setup L2 Cache - L2 Cache Auxiliary Control +;=================================================================== + + MOV r0, #0 + ;MCR p15, 1, r0, c9, c0, 2 ; Write L2 Auxilary Control Register + +;================================================================== +; Enable access to NEON/VFP by enabling access to Coprocessors 10 and 11. +; Enables Full Access i.e. in both priv and non priv modes +;================================================================== + + MRC p15, 0, r0, c1, c0, 2 ; read CP access register + ORR r0, r0, #(0x3 <<20) ; enable access CP 10 + ORR r0, r0, #(0x3 <<22) ; enable access CP 11 + MCR p15, 0, r0, c1, c0, 2 ; write CP access register back + +;================================================================== +; Switch on the VFP and Neon Hardware +;================================================================= + + MOV r0, #0 ; Set up a register + ORR r0, r0, #(0x1 << 30) + FMXR FPEXC, r0 ; Write FPEXC register, EN bit set. + +;=================================================================== +; Enable MMU and Branch to __main +;=================================================================== + + IMPORT __main ; before MMU enabled import label to __main + LDR r12,=__main ; save this in register for possible long jump + + + MRC p15, 0, r0, c1, c0, 0 ; read CP15 register 1 into r0 + ORR r0, r0, #0x1 ; enable MMU before scatter loading + MCR p15, 0, r0, c1, c0, 0 ; write CP15 register 1 + + +; Now the MMU is enabled, virtual to physical address translations will occur. +; This will affect the next instruction fetches. +; +; The two instructions currently in the ARM pipeline will have been fetched +; before the MMU was enabled. This property is useful because the next two +; instructions are safe even if new instruction fetches fail. If this routine +; was mapped out of the new virtual memory map, the branch to __main would +; still succeed. + + BX r12 ; branch to __main C library entry point + + END ; mark the end of this file + diff --git a/InitCache.s b/InitCache.s new file mode 100644 index 0000000..250652a --- /dev/null +++ b/InitCache.s @@ -0,0 +1,52 @@ +; Copyright ARM Ltd 2005. All rights reserved. + +;================================================================== +; This code provides basic global enable for a Cortex-A8 cache +; and program flow prediction +; This code must be run from a privileged mode +;================================================================== + + AREA CORTEXA8CACHE, CODE, READONLY + EXPORT core_init + +core_init + +;================================================================== +; Global Enable for Cortex-A8 Instruction and Data Caching +;================================================================== + + MRC p15, 0, r0, c1, c0, 0 ; read CP15 register 1 into r0 + ORR r0, r0, #(0x1 <<12) ; enable I Cache + ;BIC r0, r0, #(0x1 <<12) ; Clear bit 0 + ORR r0, r0, #(0x1 <<2) ; enable D Cache + ;BIC r0, r0, #(0x1 << 2) ; Clear bit 0 + ORR r0, r0, #0x1 ; enable MMU + MCR p15, 0, r0, c1, c0, 0 ; write CP15 register 1 + +;================================================================== +; Enable Cortex-A8 Level2 Unified Cache +;================================================================== + + MRC p15, 0, r0, c1, c0, 1 ; Read Auxiliary Control Register + ORR r0, r0, #2 ; L2EN bit, enable L2 cache + ;BIC r0, r0, #(0x1 << 1) ; L2EN bit, disable L2 cache + ;ORR r0, r0, #(0x1 << 4) ;Enables speculative accesses on AXI + ORR r0, r0, #(0x1 << 4) ;Enables speculative accesses on AXI + ORR r0, r0, #(0x1 << 5) ;Enables caching NEON data within the L1 data cache + MCR p15, 0, r0, c1, c0, 1 ; Write Auxiliary Control Register + +;================================================================== +; Cortex-A8 program flow prediction +;================================================================== + + MRC p15, 0, r0, c1, c0, 0 ; read CP15 register 1 into r0 + ORR r0, r0, #(0x1 <<11) ; Enable all forms of branch prediction + ;BIC r0, r0, #(0x1 << 11) ; Disable all forms of branch prediction + MCR p15, 0, r0, c1, c0, 0 ; write CP15 register 1 + +;================================================================== + + BX lr + + END ; mark the end of this file + @@ -0,0 +1,10 @@ + +License: +-------- +All files are covered by the MIT license, except for: +retarget.c +Init.s +InitCache.s +scatter.scat +which are covered by ARM's EULA regarding "EXAMPLE CODE", as described +in license.html. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6f57790 --- /dev/null +++ b/Makefile @@ -0,0 +1,166 @@ +# Copyright (c) 2009, 2010, 2011 STMicroelectronics +# Written by Christophe Lyon + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +CPU=cortex-a9 +# ARM RVCT +CC.rvct := armcc +CFLAGS.rvct = -g --cpu=$(CPU) --fp16_format=ieee -Ono_special_regs_postregalloc -I. +LD.rvct := armlink +LDFLAGS.rvct := --cpu=$(CPU) --entry 0x2000 + +# GCC/ARM cross compiler +CC.gccarm := arm-none-eabi-gcc +CFLAGS.gccarm := -g -Wall -mcpu=$(CPU) -mfloat-abi=softfp -mfpu=neon -Wno-unused-variable -Wno-unused-function -ffast-math +#LD.gccarm := $(LD.rvct) +#LDFLAGS.gccarm := $(LDFLAGS.rvct) +LD.gccarm := $(CC.gccarm) +LDFLAGS.gccarm := $(CFLAGS.gccarm) -Tmop500-pm.ld + +# List of validated intrinsics +REFNAMES = vld1 vadd vld1_lane vld1_dup vdup vget_high vget_low \ + vqdmlal_lane vqdmlsl_lane vext vshrn_n vset_lane vget_lane \ + vqsub vqdmulh_lane vqdmull vqdmlal vqdmlsl vceq vcge vcle \ + vcgt vclt vbsl vshl vldX vdup_lane vrshrn_n vqdmull_lane \ + vst1_lane vqshl vqshl_n vqrshrn_n vsub vqadd vabs vqabs \ + vcombine vmax vmin vneg vqneg vmlal vmlal_lane vmlsl \ + vmlsl_lane vmovl vmovn vmull vmull_lane vrev vrshl vshl_n \ + vshr_n vsra_n vtrn vuzp vzip vreinterpret vqdmulh vqrdmulh \ + vqrdmulh_lane vqrshl vaba vabal vabd vabdl vand vorr vorn \ + veor vbic vcreate vldX_lane vldX_dup vmla vmls vmul \ + vmul_lane vmul_n vmull_n vqdmulh_n vqdmull_n vqrdmulh_n \ + vmla_lane vmls_lane vmla_n vmls_n vmlal_n vmlsl_n vqdmlal_n \ + vqdmlsl_n vsri_n vsli_n vtst vaddhn vraddhn vaddl vaddw \ + vhadd vrhadd vhsub vsubl vsubw vsubhn vrsubhn vmvn vqmovn \ + vqmovun vrshr_n vrsra_n vshll_n vpaddl vpadd vpadal \ + vqshlu_n vclz vcls vcnt vqshrn_n vpmax vpmin vqshrun_n \ + vqrshrun_n vstX_lane vtbX vrecpe vrsqrte vcage vcagt vcale \ + vcalt vrecps vrsqrts vcvt +REFLIST = $(addprefix ref_, $(REFNAMES)) + +REFNAMES_INT = integer dsp dspfns +REFLIST_INT = $(addprefix ref_, $(REFNAMES_INT)) + +all: ref-rvct.qemu + +check: check-rvct check-gccarm check-gccarm-rvct + +# Building reference files with RVCT +REFOBJS.rvct = $(addsuffix .rvct.o, $(REFLIST) $(REFLIST_INT)) +REFRVCT=stm-arm-neon.refrvct +ref-rvct: $(REFRVCT) +ref-rvct.qemu: $(REFRVCT).qemu + +check-rvct: $(REFRVCT) + diff $(REFRVCT) ref-rvct-all.txt + +$(REFRVCT): compute_ref.axf + rvdebug -stdiolog=stdio.log -jou=journal.log -log=log.log -nologo -cmd -init @coretile.core.cpu0@RTSM -inc armscript.inc -exec $^ + +$(REFRVCT).qemu: compute_ref.axf + qemu-system-arm -cpu $(CPU) -semihosting -nographic -kernel $^ +# Avoid rebuilding compute_ref.axf if already present, for users who +# don't have rvct +.PRECIOUS .INTERMEDIATE: compute_ref.rvct.o retarget.rvct.o \ + InitCache.o Init.o $(REFOBJS.rvct) + +compute_ref.axf: scatter.scat compute_ref.rvct.o retarget.rvct.o \ + InitCache.o Init.o $(REFOBJS.rvct) + $(LD.rvct) $(LDFLAGS.rvct) --scatter $^ -o $@ + +compute_ref.rvct.o retarget.rvct.o: %.rvct.o: %.c + $(CC.rvct) $(CFLAGS.rvct) -c $^ -o $@ -DREFFILE=\"$(REFRVCT)\" -DGCCTESTS_FILE=\"expected_input4gcc.txt\" + +ref_%.rvct.o: ref_%.c stm-arm-neon-ref.h + $(CC.rvct) $(CFLAGS.rvct) -c $< -o $@ + +InitCache.o Init.o: %.o: %.s + $(CC.rvct) $(CFLAGS.rvct) -c $^ -o $@ + + +# Building reference files with GCC/ARM. Link with GCC/ld. +REFOBJS.gccarm = $(addsuffix .gccarm.o, $(REFLIST)) +REFGCCARM=stm-arm-neon.gccarm +ref-gccarm: $(REFGCCARM) + +check-gccarm: $(REFGCCARM) + diff $(REFGCCARM) ref-rvct-neon-nofp16.txt + +$(REFGCCARM): compute_ref.gccarm + rvdebug -stdiolog=stdio.log -jou=journal.log -log=log.log -nologo -cmd -init @coretile.core.cpu0@RTSM -inc armscript.inc -exec $^ + +compute_ref.gccarm: compute_ref.gccarm.o $(REFOBJS.gccarm) + $(LD.gccarm) $(LDFLAGS.gccarm) $^ -o $@ + +compute_ref.gccarm.o: %.gccarm.o: %.c + $(CC.gccarm) $(CFLAGS.gccarm) -c $^ -o $@ -DREFFILE=\"$(REFGCCARM)\" + +# Building reference files with GCC/ARM. Link with armlink. +REFGCCARM_RVCT=stm-arm-neon.gccarm-rvct +ref-gccarm-rvct: $(REFGCCARM_RVCT) + +check-gccarm-rvct: $(REFGCCARM_RVCT) + diff $(REFGCCARM_RVCT) ref-rvct-neon-nofp16.txt + +$(REFGCCARM_RVCT): compute_ref.gccarm-rvct + rvdebug -stdiolog=stdio.log -jou=journal.log -log=log.log -nologo -cmd -init @coretile.core.cpu0@RTSM -inc armscript.inc -exec $^ + +compute_ref.gccarm-rvct: scatter.scat compute_ref.gccarm-rvct.o \ + retarget.rvct.o InitCache.o Init.o $(REFOBJS.gccarm) + $(LD.rvct) $(LDFLAGS.rvct) --no_strict_wchar_size --scatter $^ -o $@ + +compute_ref.gccarm-rvct.o: %.gccarm-rvct.o: %.c + $(CC.gccarm) $(CFLAGS.gccarm) -c $^ -o $@ -DREFFILE=\"$(REFGCCARM_RVCT)\" + +ref_%.gccarm.o: ref_%.c stm-arm-neon-ref.h + $(CC.gccarm) $(CFLAGS.gccarm) -c $< -o $@ + +# Use '*' rather than '%' in these rules: +# - using '%' does not make them add to the implicit rules above (they +# are different rules, only the 1st one matches) +# - they are needed only when the target already exists, so the +# wildcard matches when needed. +# - if the target does not already exist, the implicit rules apply. +ref_vadd.*.o ref_vsub.*.o ref_vand.*.o ref_vbic.*.o ref_veor.*.o ref_vorn.*.o ref_vorr.*.o: ref_v_binary_op.c +ref_vqadd.*.o ref_vqsub.*.o: ref_v_binary_sat_op.c +ref_vabs.*.o ref_vneg.*.o ref_vmvn.*.o: ref_v_unary_op.c +ref_vqabs.*.o ref_vqneg.*.o: ref_v_unary_sat_op.c +ref_vceq.*.o ref_vcge.*.o ref_vcle.*.o ref_vcgt.*.o ref_vclt.*.o: ref_v_comp_op.c +ref_vhadd.*.o ref_vrhadd.*.o ref_vhsub.*.o ref_vmin.*.o: ref_vmax.c +ref_vmls.*.o: ref_vmla.c +ref_vmls_lane.*.o: ref_vmla_lane.c +ref_vmls_n.*.o: ref_vmla_n.c +ref_vmlsl.*.o: ref_vmlal.c +ref_vmlsl_lane.*.o: ref_vmlal_lane.c +ref_vmlsl_n.*.o: ref_vmlal_n.c +ref_vqdmlsl.*.o: ref_vqdmlal.c +ref_vqdmlsl_lane.*.o: ref_vqdmlal_lane.c +ref_vqdmlsl_n.*.o: ref_vqdmlal_n.c +ref_vtrn.*.o ref_vzip.*.o: ref_vuzp.c +ref_vsli_n.*.o ref_vsri_n.*.o: ref_vsXi_n.c +ref_vsli_n.*.o: ref_vsli_n.c +ref_vsri_n.*.o: ref_vsri_n.c +ref_vraddhn.*.o ref_vsubhn.*.o ref_vrsubhn.*.o: ref_vaddhn.c +ref_vsubl.*.o: ref_vaddl.c +ref_vsubw.*.o: ref_vaddw.c +ref_vcage.*.o ref_vcale.*.o ref_vcagt.*.o ref_vcalt.*.o: ref_v_comp_f_op.c + +clean: + rm -f *.o *.log stm-arm-neon.refrvct expected_input4gcc.txt diff --git a/Makefile.gcc b/Makefile.gcc new file mode 100644 index 0000000..660dcdc --- /dev/null +++ b/Makefile.gcc @@ -0,0 +1,124 @@ +# Copyright (c) 2014 STMicroelectronics +# Written by Christophe Lyon + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# This Makefile is aimed at helping GCC validation, for ARM and +# AArch64 targets. +# For example: +# $ make -f Makefile.gcc clean +# $ make -f Makefile.gcc CC.gccarm=/path/to/gcc check +# Note that this will use qemu-system-arm as execution engine which +# may not be what you want. +# Alternatively: +# $ make -f Makefile.gcc clean +# $ make -f Makefile.gcc CC.gccarm=/path/to/gcc compute_ref.gccarm +# $ /path/to/qemu -L /path/to/sysroot/lib compute_ref.gccarm +# $ diff stm-arm-neon.gccarm ref-rvct-neon.txt + +CPU=cortex-a9 + +# GCC/ARM cross compiler +CC.gccarm := arm-none-eabi-gcc +CFLAGS.gccarm := -g -Wall -mcpu=$(CPU) -mfloat-abi=hard -mfpu=neon -Wno-unused-variable -Wno-unused-function -ffast-math +LD.gccarm := $(CC.gccarm) +LDFLAGS.gccarm := $(CFLAGS.gccarm) + +# List of validated intrinsics +REFNAMES = vld1 vadd vld1_lane vld1_dup vdup vget_high vget_low \ + vqdmlal_lane vqdmlsl_lane vext vshrn_n vset_lane vget_lane \ + vqsub vqdmulh_lane vqdmull vqdmlal vqdmlsl vceq vcge vcle \ + vcgt vclt vbsl vshl vldX vdup_lane vrshrn_n vqdmull_lane \ + vst1_lane vqshl vqshl_n vqrshrn_n vsub vqadd vabs vqabs \ + vcombine vmax vmin vneg vqneg vmlal vmlal_lane vmlsl \ + vmlsl_lane vmovl vmovn vmull vmull_lane vrev vrshl vshl_n \ + vshr_n vsra_n vtrn vuzp vzip vreinterpret vqdmulh vqrdmulh \ + vqrdmulh_lane vqrshl vaba vabal vabd vabdl vand vorr vorn \ + veor vbic vcreate vldX_lane vldX_dup vmla vmls vmul \ + vmul_lane vmul_n vmull_n vqdmulh_n vqdmull_n vqrdmulh_n \ + vmla_lane vmls_lane vmla_n vmls_n vmlal_n vmlsl_n vqdmlal_n \ + vqdmlsl_n vsri_n vsli_n vtst vaddhn vraddhn vaddl vaddw \ + vhadd vrhadd vhsub vsubl vsubw vsubhn vrsubhn vmvn vqmovn \ + vqmovun vrshr_n vrsra_n vshll_n vpaddl vpadd vpadal \ + vqshlu_n vclz vcls vcnt vqshrn_n vpmax vpmin vqshrun_n \ + vqrshrun_n vstX_lane vtbX vrecpe vrsqrte vcage vcagt vcale \ + vcalt vrecps vrsqrts vcvt +REFLIST = $(addprefix ref_, $(REFNAMES)) + +REFNAMES_INT = integer dsp dspfns +REFLIST_INT = $(addprefix ref_, $(REFNAMES_INT)) + +all: ref-gccarm + +check: check-gccarm + +# Building reference files with GCC/ARM. Link with GCC/ld. +REFOBJS.gccarm = $(addsuffix .gccarm.o, $(REFLIST)) +REFGCCARM=stm-arm-neon.gccarm +ref-gccarm: $(REFGCCARM) + +check-gccarm: $(REFGCCARM) + diff $(REFGCCARM) ref-rvct-neon.txt + +SIM=qemu-system-arm +SIMFLAGS=-cpu $(CPU) -semihosting -nographic -kernel +$(REFGCCARM): compute_ref.gccarm + $(SIM) $(SIMFLAGS) $^ + +compute_ref.gccarm: compute_ref.gccarm.o $(REFOBJS.gccarm) + $(LD.gccarm) $(LDFLAGS.gccarm) $^ -o $@ + +compute_ref.gccarm.o: %.gccarm.o: %.c + $(CC.gccarm) $(CFLAGS.gccarm) -c $^ -o $@ -DREFFILE=\"$(REFGCCARM)\" -DGCCTESTS_FILE=\"expected_input4gcc.txt\" + +ref_%.gccarm.o: ref_%.c stm-arm-neon-ref.h + $(CC.gccarm) $(CFLAGS.gccarm) -c $< -o $@ + +# Use '*' rather than '%' in these rules: +# - using '%' does not make them add to the implicit rules above (they +# are different rules, only the 1st one matches) +# - they are needed only when the target already exists, so the +# wildcard matches when needed. +# - if the target does not already exist, the implicit rules apply. +ref_vadd.*.o ref_vsub.*.o ref_vand.*.o ref_vbic.*.o ref_veor.*.o ref_vorn.*.o ref_vorr.*.o: ref_v_binary_op.c +ref_vqadd.*.o ref_vqsub.*.o: ref_v_binary_sat_op.c +ref_vabs.*.o ref_vneg.*.o ref_vmvn.*.o: ref_v_unary_op.c +ref_vqabs.*.o ref_vqneg.*.o: ref_v_unary_sat_op.c +ref_vceq.*.o ref_vcge.*.o ref_vcle.*.o ref_vcgt.*.o ref_vclt.*.o: ref_v_comp_op.c +ref_vhadd.*.o ref_vrhadd.*.o ref_vhsub.*.o ref_vmin.*.o: ref_vmax.c +ref_vmls.*.o: ref_vmla.c +ref_vmls_lane.*.o: ref_vmla_lane.c +ref_vmls_n.*.o: ref_vmla_n.c +ref_vmlsl.*.o: ref_vmlal.c +ref_vmlsl_lane.*.o: ref_vmlal_lane.c +ref_vmlsl_n.*.o: ref_vmlal_n.c +ref_vqdmlsl.*.o: ref_vqdmlal.c +ref_vqdmlsl_lane.*.o: ref_vqdmlal_lane.c +ref_vqdmlsl_n.*.o: ref_vqdmlal_n.c +ref_vtrn.*.o ref_vzip.*.o: ref_vuzp.c +ref_vsli_n.*.o ref_vsri_n.*.o: ref_vsXi_n.c +ref_vsli_n.*.o: ref_vsli_n.c +ref_vsri_n.*.o: ref_vsri_n.c +ref_vraddhn.*.o ref_vsubhn.*.o ref_vrsubhn.*.o: ref_vaddhn.c +ref_vsubl.*.o: ref_vaddl.c +ref_vsubw.*.o: ref_vaddw.c +ref_vcage.*.o ref_vcale.*.o ref_vcagt.*.o ref_vcalt.*.o: ref_v_comp_f_op.c + +clean: + rm -f *.o *.log @@ -0,0 +1,60 @@ +ARM Neon reference tests +======================== +This package contains extensive tests for the ARM/Neon instructions. + +It works by building a program which uses all of them, and then +executing it on an actual target or a simulator. + +It can be used to validate the simulator against an actual HW target, +or to validate C compilers in presence of Neon intrinsics calls. + +The supplied Makefile enables to build with both ARM RVCT compiler and +GNU GCC (for the ARM target), and supports execution with ARM RVDEBUG +on an ARM simulator and with QEMU. + +For convenience, the ARM ELF binary file (as compiled with RVCT) is +supplied (compute_ref.axf), as well as expected output (ref-rvct.txt). + +A second file containing expected output is also supplied: +ref-rvct-neon.txt, which contains only the results of the Neon +instrinsics tests. It is aimed at being used to check GCC's results, +since this compiler does not support the integer & dsp builtins whose +results are also present in ref-rvct.txt. + +Typical usage when used to debug QEmu: +$ make all # to build the test program with ARM rvct and execute with QEmu +$ make check # to compare the results with the expected output + + +Known issues: +------------- +Some tests currently fail to build with GCC/ARM: +- missing include files: dspfns.h, armdsp.h + +As GCC/ARM provides no support for the +Neon_Cumulative_Saturation/fpsrc register, auxiliary accessor +functions have been implemented in stm-arm-neon-ref.h. + +Engineering: +------------ +In order to cover all the Neon instructions extensively, these tests +make intensive use of the C-preprocessor, to save maintenance efforts. + +Most tests (the more regular ones) share a common basic structure. In +general, variable names are suffixed by their type name, so as to +differentiate variables with the same purpose but of differente types. +Hence vector1_int8x8, vector1_int16x4 etc... + +For instance in ref_vmul.c the layout of the code is as follows: + +- declare input and output vectors (named 'vector1', 'vector2' and + 'vector_res') of each possible type (s/u, 8/16/32/64 bits). + +- clean the result buffers. + +- initialize input vectors 'vector1' and 'vector2'. + +- call each variant of the intrinsic and store the result in a buffer + named 'buffer', whose contents is printed after execution. + +One can then compare the actual result with the expected one. diff --git a/armscript.inc b/armscript.inc new file mode 100644 index 0000000..ad53a5c --- /dev/null +++ b/armscript.inc @@ -0,0 +1,14 @@ +ERROR=ABORT // Abort if error occurs when processing the script +WAIT=ON // Wait for each command to finish + +GO + +STATS + + //STDIOLOG OFF // Close the log file + + //UNLOAD 1 // Unload the image + //DELFILE 1 // Remove the symbol definitions + //DISCONNECT // Disconnect from the target + //WAIT=OFF +QUIT Y diff --git a/compute_ref.axf b/compute_ref.axf Binary files differnew file mode 100644 index 0000000..e7035b9 --- /dev/null +++ b/compute_ref.axf diff --git a/compute_ref.c b/compute_ref.c new file mode 100644 index 0000000..5839d36 --- /dev/null +++ b/compute_ref.c @@ -0,0 +1,370 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define _AEABI_PORTABILITY_LEVEL 1 +#undef _AEABI_PORTABLE +#if defined(__cplusplus) +#include <cstdio> +#include <cstdlib> +#else +#include <stdio.h> +#include <stdlib.h> +#endif + +FILE* log_file = NULL; +FILE* ref_file = NULL; +FILE* gcc_tests_file = NULL; + +#define LOGFILE "stm-arm-neon-ref.log" + +void cleanup (void) +{ + if (log_file) fclose (log_file); + if (ref_file) fclose (ref_file); + if (gcc_tests_file) fclose (gcc_tests_file); + exit (1); +} + +extern void exec_vld1(void); +extern void exec_vadd(void); +extern void exec_vld1_lane(void); +extern void exec_vld1_dup(void); +extern void exec_vdup(void); +extern void exec_vget_high(void); +extern void exec_vget_low(void); +extern void exec_vqdmlal_lane(void); +extern void exec_vqdmlsl_lane(void); +extern void exec_vqdmlal_n(void); +extern void exec_vqdmlsl_n(void); +extern void exec_vext(void); +extern void exec_vshr_n(void); +extern void exec_vshrn_n(void); +extern void exec_vrshrn_n(void); +extern void exec_vqrshrn_n(void); +extern void exec_vset_lane(void); +extern void exec_vget_lane(void); +extern void exec_vqsub(void); +extern void exec_vqdmulh(void); +extern void exec_vqdmulh_lane(void); +extern void exec_vqdmulh_n(void); +extern void exec_vqdmull(void); +extern void exec_vqdmlal(void); +extern void exec_vqdmlsl(void); +extern void exec_vceq(void); +extern void exec_vcge(void); +extern void exec_vcle(void); +extern void exec_vcgt(void); +extern void exec_vclt(void); +extern void exec_vbsl(void); +extern void exec_vshl(void); +extern void exec_vqshl(void); +extern void exec_vqshl_n(void); +extern void exec_vrshl(void); +extern void exec_vshl_n(void); +extern void exec_vldX(void); +extern void exec_vdup_lane(void); +extern void exec_vqdmull_lane(void); +extern void exec_vqdmull_n(void); +extern void exec_vst1_lane(void); +extern void exec_vsub(void); +extern void exec_vqadd(void); +extern void exec_vabs(void); +extern void exec_vqabs(void); +extern void exec_vcombine(void); +extern void exec_vmax(void); +extern void exec_vmin(void); +extern void exec_vneg(void); +extern void exec_vqneg(void); +extern void exec_vmlal(void); +extern void exec_vmlal_lane(void); +extern void exec_vmlal_n(void); +extern void exec_vmlsl(void); +extern void exec_vmlsl_lane(void); +extern void exec_vmlsl_n(void); +extern void exec_vmovl(void); +extern void exec_vmovn(void); +extern void exec_vmull(void); +extern void exec_vmull_lane(void); +extern void exec_vrev(void); +extern void exec_vsra_n(void); +extern void exec_vtrn(void); +extern void exec_vuzp(void); +extern void exec_vzip(void); +extern void exec_vreinterpret(void); +extern void exec_vqrdmulh(void); +extern void exec_vqrdmulh_lane(void); +extern void exec_vqrdmulh_n(void); +extern void exec_vqrshl(void); +extern void exec_vaba(void); +extern void exec_vabal(void); +extern void exec_vabd(void); +extern void exec_vabdl(void); +extern void exec_vand(void); +extern void exec_vorr(void); +extern void exec_vorn(void); +extern void exec_veor(void); +extern void exec_vbic(void); +extern void exec_vcreate(void); +extern void exec_vldX_lane(void); +extern void exec_vldX_dup(void); +extern void exec_vmla(void); +extern void exec_vmls(void); +extern void exec_vmul(void); +extern void exec_vmul_lane(void); +extern void exec_vmul_n(void); +extern void exec_vmull_n(void); +extern void exec_vmla_lane(void); +extern void exec_vmls_lane(void); +extern void exec_vmla_n(void); +extern void exec_vmls_n(void); +extern void exec_vsli_n(void); +extern void exec_vsri_n(void); +extern void exec_vtst(void); +extern void exec_vaddhn(void); +extern void exec_vraddhn(void); +extern void exec_vaddl(void); +extern void exec_vaddw(void); +extern void exec_vhadd(void); +extern void exec_vrhadd(void); +extern void exec_vhsub(void); +extern void exec_vsubl(void); +extern void exec_vsubw(void); +extern void exec_vsubhn(void); +extern void exec_vrsubhn(void); +extern void exec_vmvn(void); +extern void exec_vqmovn(void); +extern void exec_vqmovun(void); +extern void exec_vrshr_n(void); +extern void exec_vrsra_n(void); +extern void exec_vshll_n(void); +extern void exec_vpaddl(void); +extern void exec_vpadd(void); +extern void exec_vpadal(void); +extern void exec_vqshlu_n(void); +extern void exec_vclz(void); +extern void exec_vcls(void); +extern void exec_vcnt(void); +extern void exec_vqshrn_n(void); +extern void exec_vpmax(void); +extern void exec_vpmin(void); +extern void exec_vqshrun_n(void); +extern void exec_vqrshrun_n(void); +extern void exec_vstX_lane(void); +extern void exec_vtbX(void); +extern void exec_vrecpe(void); +extern void exec_vrsqrte(void); + +extern void exec_vcage(void); +extern void exec_vcagt(void); +extern void exec_vcale(void); +extern void exec_vcalt(void); +extern void exec_vcvt(void); +extern void exec_vrecps(void); +extern void exec_vrsqrts(void); + +#if defined(__ARMCC_VERSION) || !defined(__arm__) +extern void exec_integer(void); /* Integer (non-NEON) intrinsics */ +extern void exec_dsp(void); /* DSP (non-NEON) intrinsics */ +extern void exec_dspfns(void); /* DSP FNS (non-NEON/ITU) intrinsics */ +#endif + +#include "compute_ref_data.c" + +int main (void) +{ +#if defined(_MSC_VER) + /* When compiled with MSVC, force output of FP numbers with only 2 + * digits for the exponent, for easier comparison of the results + * with GCC. */ + _set_output_format(_TWO_DIGIT_EXPONENT); +#endif + + log_file = fopen (LOGFILE, "w"); + if (log_file == NULL) { + fprintf (stderr, "Error opening log file " LOGFILE "\n"); + cleanup (); + } + + ref_file = fopen (REFFILE, "w"); + if (ref_file == NULL) { + fprintf (log_file, "Error opening ref file %s\n", REFFILE); + cleanup (); + } + + gcc_tests_file = fopen (GCCTESTS_FILE, "w"); + if (gcc_tests_file == NULL) { + fprintf (log_file, "Error opening GCC ref file %s\n", GCCTESTS_FILE); + cleanup (); + } + + fprintf (log_file, "Computing refs....\n"); + + exec_vld1 (); + exec_vadd (); + exec_vld1_lane (); + exec_vld1_dup (); + exec_vdup (); + exec_vget_high (); + exec_vget_low (); + exec_vqdmlal_lane (); + exec_vqdmlsl_lane (); + exec_vqdmlal_n (); + exec_vqdmlsl_n (); + exec_vext (); + exec_vshr_n (); + exec_vshrn_n (); + exec_vrshrn_n (); + exec_vqrshrn_n (); + exec_vset_lane (); + exec_vget_lane (); + exec_vqsub (); + exec_vqdmulh (); + exec_vqdmulh_lane (); + exec_vqdmulh_n (); + exec_vqdmull (); + exec_vqdmlal (); + exec_vqdmlsl (); + exec_vceq (); + exec_vcge (); + exec_vcle (); + exec_vcgt (); + exec_vclt (); + exec_vbsl (); + exec_vshl (); + exec_vshl_n (); + exec_vqshl (); + exec_vqshl_n (); + exec_vrshl (); + exec_vldX (); + exec_vdup_lane (); + exec_vqdmull_lane (); + exec_vqdmull_n (); + exec_vst1_lane (); + exec_vsub (); + exec_vqadd (); + exec_vabs (); + exec_vqabs (); + exec_vcombine (); + exec_vmax (); + exec_vmin (); + exec_vneg (); + exec_vqneg (); + exec_vmlal (); + exec_vmlsl (); + exec_vmlal_lane (); + exec_vmlsl_lane (); + exec_vmlal_n (); + exec_vmlsl_n (); + exec_vmovl (); + exec_vmovn (); + exec_vmull (); + exec_vmull_lane (); + exec_vrev (); + exec_vsra_n (); + exec_vtrn (); + exec_vuzp (); + exec_vzip (); + exec_vreinterpret (); + exec_vqrdmulh (); + exec_vqrdmulh_lane (); + exec_vqrdmulh_n (); + exec_vqrshl (); + exec_vaba (); + exec_vabal (); + exec_vabd (); + exec_vabdl (); + exec_vand (); + exec_vorr (); + exec_vorn (); + exec_veor (); + exec_vbic (); + exec_vcreate (); + exec_vldX_lane (); + exec_vldX_dup (); + exec_vmla (); + exec_vmls (); + exec_vmul (); + exec_vmul_lane (); + exec_vmul_n (); + exec_vmull_n (); + exec_vmla_lane (); + exec_vmls_lane (); + exec_vmla_n (); + exec_vmls_n (); + exec_vsli_n (); + exec_vsri_n (); + exec_vtst (); + exec_vaddhn (); + exec_vraddhn (); + exec_vaddl (); + exec_vaddw (); + exec_vhadd (); + exec_vrhadd (); + exec_vhsub (); + exec_vsubl (); + exec_vsubw (); + exec_vsubhn (); + exec_vrsubhn (); + exec_vmvn (); + exec_vqmovn (); + exec_vqmovun (); + exec_vrshr_n (); + exec_vrsra_n (); + exec_vshll_n (); + exec_vpaddl (); + exec_vpadd (); + exec_vpadal (); + exec_vqshlu_n (); + exec_vclz (); + exec_vcls (); + exec_vcnt (); + exec_vqshrn_n (); + exec_vpmax (); + exec_vpmin (); + exec_vqshrun_n (); + exec_vqrshrun_n (); + exec_vstX_lane (); + exec_vtbX (); + exec_vrecpe (); + exec_vrsqrte (); + + exec_vcage (); + exec_vcale (); + exec_vcagt (); + exec_vcalt (); + exec_vcvt (); + exec_vrecps (); + exec_vrsqrts (); + +#if defined(__ARMCC_VERSION) || (!defined(__arm__) && !defined(__aarch64__)) + exec_integer (); + exec_dsp (); + exec_dspfns (); +#endif + + fprintf (log_file, "Finished\n"); + + return 0; +} diff --git a/compute_ref.gccarm b/compute_ref.gccarm Binary files differnew file mode 100755 index 0000000..664ef29 --- /dev/null +++ b/compute_ref.gccarm diff --git a/compute_ref.gccarm-rvct b/compute_ref.gccarm-rvct Binary files differnew file mode 100644 index 0000000..0e319a3 --- /dev/null +++ b/compute_ref.gccarm-rvct diff --git a/compute_ref_data.c b/compute_ref_data.c new file mode 100644 index 0000000..8dbf727 --- /dev/null +++ b/compute_ref_data.c @@ -0,0 +1,566 @@ +/* + +Copyright (c) 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif +#include "stm-arm-neon-ref.h" + +/* Initialization helpers; 4 slices are needed for vld2, vld3 and + vld4. */ +#define MY_INIT_TAB(T,W,N) xNAME(INIT_TAB,N)(T##W##_t) +#define MY_INIT_TAB2(T,W,N) xNAME(INIT_TAB2,N)(T##W##_t) +#define MY_INIT_TAB3(T,W,N) xNAME(INIT_TAB3,N)(T##W##_t) +#define MY_INIT_TAB4(T,W,N) xNAME(INIT_TAB4,N)(T##W##_t) + +/* Initialized input buffers. */ +#define VECT_VAR_DECL_INIT(V, T, W, N) \ + VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TAB(T,W,N) }; + +/* Specialized initializer with 4 entries, as used by vldX_dup and + vdup tests, which iterate 4 times on input buffers. */ +#define VECT_VAR_DECL_INIT4(V, T, W, N) \ + VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TAB(T,W,4) }; + +/* Initializers for arrays of vectors. */ +#define VECT_ARRAY_INIT2(V, T, W, N) \ + T##W##_t VECT_ARRAY_VAR(V,T,W,N,2)[] = \ + { MY_INIT_TAB(T,W,N) \ + MY_INIT_TAB2(T,W,N) }; + +#define VECT_ARRAY_INIT3(V, T, W, N) \ + T##W##_t VECT_ARRAY_VAR(V,T,W,N,3)[] = \ + { MY_INIT_TAB(T,W,N) \ + MY_INIT_TAB2(T,W,N) \ + MY_INIT_TAB3(T,W,N) }; + +#define VECT_ARRAY_INIT4(V, T, W, N) \ + T##W##_t VECT_ARRAY_VAR(V,T,W,N,4)[] = \ + { MY_INIT_TAB(T,W,N) \ + MY_INIT_TAB2(T,W,N) \ + MY_INIT_TAB3(T,W,N) \ + MY_INIT_TAB4(T,W,N) }; + +/* Sample initialization vectors. */ +#define INIT_TAB_1(T) \ + (T)-16, +#define INIT_TAB2_1(T) \ + (T)-15, +#define INIT_TAB3_1(T) \ + (T)-14, +#define INIT_TAB4_1(T) \ + (T)-13, + +#define INIT_TAB_2(T) \ + (T)-16, (T)-15, +#define INIT_TAB2_2(T) \ + (T)-14, (T)-13, +#define INIT_TAB3_2(T) \ + (T)-12, (T)-11, +#define INIT_TAB4_2(T) \ + (T)-10, (T)-9, + +/* Initializer for vld3_lane tests. */ +#define INIT_TAB_3(T) \ + (T)-16, (T)-15, (T)-14, + +#define INIT_TAB_4(T) \ + (T)-16, (T)-15, (T)-14, (T)-13, +#define INIT_TAB2_4(T) \ + (T)-12, (T)-11, (T)-10, (T)-9, +#define INIT_TAB3_4(T) \ + (T)-8, (T)-7, (T)-6, (T)-5, +#define INIT_TAB4_4(T) \ + (T)-4, (T)-3, (T)-2, (T)-1, + +#define INIT_TAB_8(T) \ + (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9, +#define INIT_TAB2_8(T) \ + (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1, +#define INIT_TAB3_8(T) \ + (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7, +#define INIT_TAB4_8(T) \ + (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15, + +#define INIT_TAB_16(T) \ + (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9, \ + (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1, +#define INIT_TAB2_16(T) \ + (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7, \ + (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15, +#define INIT_TAB3_16(T) \ + (T)16, (T)17, (T)18, (T)19, (T)20, (T)21, (T)22, (T)23, \ + (T)24, (T)25, (T)26, (T)27, (T)28, (T)29, (T)30, (T)31, +#define INIT_TAB4_16(T) \ + (T)32, (T)33, (T)34, (T)35, (T)36, (T)37, (T)38, (T)39, \ + (T)40, (T)41, (T)42, (T)43, (T)44, (T)45, (T)46, (T)47, + +/* Input buffers, one of each size. */ +/* Insert some padding to try to exhibit out of bounds accesses. */ +VECT_VAR_DECL_INIT(buffer, int, 8, 8); +PAD(buffer_pad, int, 8, 8); +VECT_VAR_DECL_INIT(buffer, int, 16, 4); +PAD(buffer_pad, int, 16, 4); +VECT_VAR_DECL_INIT(buffer, int, 32, 2); +PAD(buffer_pad, int, 32, 2); +VECT_VAR_DECL_INIT(buffer, int, 64, 1); +PAD(buffer_pad, int, 64, 1); +VECT_VAR_DECL_INIT(buffer, uint, 8, 8); +PAD(buffer_pad, uint, 8, 8); +VECT_VAR_DECL_INIT(buffer, poly, 8, 8); +PAD(buffer_pad, poly, 8, 8); +VECT_VAR_DECL_INIT(buffer, poly, 16, 4); +PAD(buffer_pad, poly, 16, 4); +VECT_VAR_DECL_INIT(buffer, uint, 16, 4); +PAD(buffer_pad, uint, 16, 4); +VECT_VAR_DECL_INIT(buffer, uint, 32, 2); +PAD(buffer_pad, uint, 32, 2); +VECT_VAR_DECL_INIT(buffer, uint, 64, 1); +PAD(buffer_pad, uint, 64, 1); +VECT_VAR_DECL_INIT(buffer, float, 32, 2); +PAD(buffer_pad, float, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +/* We need a different initialization for ARMCC, because the compiler + performs the conversion to half-precision internal + representation. */ +#ifdef __ARMCC_VERSION +__fp16 buffer_float16x4[4] = {-16, -15, -14, -13}; +#else +VECT_VAR_DECL(buffer, float, 16, 4) [] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, + 0xcb00 /* -14 */, 0xca80 /* -13 */}; +#endif +PAD(buffer_pad, float, 16, 4); +#endif +VECT_VAR_DECL_INIT(buffer, int, 8, 16); +PAD(buffer_pad, int, 8, 16); +VECT_VAR_DECL_INIT(buffer, int, 16, 8); +PAD(buffer_pad, int, 16, 8); +VECT_VAR_DECL_INIT(buffer, int, 32, 4); +PAD(buffer_pad, int, 32, 4); +VECT_VAR_DECL_INIT(buffer, int, 64, 2); +PAD(buffer_pad, int, 64, 2); +VECT_VAR_DECL_INIT(buffer, uint, 8, 16); +PAD(buffer_pad, uint, 8, 16); +VECT_VAR_DECL_INIT(buffer, uint, 16, 8); +PAD(buffer_pad, uint, 16, 8); +VECT_VAR_DECL_INIT(buffer, uint, 32, 4); +PAD(buffer_pad, uint, 32, 4); +VECT_VAR_DECL_INIT(buffer, uint, 64, 2); +PAD(buffer_pad, uint, 64, 2); +VECT_VAR_DECL_INIT(buffer, poly, 8, 16); +PAD(buffer_pad, poly, 8, 16); +VECT_VAR_DECL_INIT(buffer, poly, 16, 8); +PAD(buffer_pad, poly, 16, 8); +VECT_VAR_DECL_INIT(buffer, float, 32, 4); +PAD(buffer_pad, float, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#ifdef __ARMCC_VERSION +__fp16 buffer_float16x8[8] = {-16, -15, -14, -13, -12, -11, -10, -9}; +#else +VECT_VAR_DECL(buffer, float, 16, 8) [] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, + 0xcb00 /* -14 */, 0xca80 /* -13 */, + 0xca00 /* -12 */, 0xc980 /* -11 */, + 0xc900 /* -10 */, 0xc880 /* -9 */}; +#endif +PAD(buffer_pad, float, 16, 8); +#endif + +/* The tests for vld1_dup and vdup expect at least 4 entries in the + input buffer, so force 1- and 2-elements initializers to have 4 + entries. */ +VECT_VAR_DECL_INIT(buffer_dup, int, 8, 8); +VECT_VAR_DECL(buffer_dup_pad, int, 8, 8); +VECT_VAR_DECL_INIT(buffer_dup, int, 16, 4); +VECT_VAR_DECL(buffer_dup_pad, int, 16, 4); +VECT_VAR_DECL_INIT4(buffer_dup, int, 32, 2); +VECT_VAR_DECL(buffer_dup_pad, int, 32, 2); +VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 1); +VECT_VAR_DECL(buffer_dup_pad, int, 64, 1); +VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 8); +VECT_VAR_DECL(buffer_dup_pad, uint, 8, 8); +VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 4); +VECT_VAR_DECL(buffer_dup_pad, uint, 16, 4); +VECT_VAR_DECL_INIT4(buffer_dup, uint, 32, 2); +VECT_VAR_DECL(buffer_dup_pad, uint, 32, 2); +VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 1); +VECT_VAR_DECL(buffer_dup_pad, uint, 64, 1); +VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 8); +VECT_VAR_DECL(buffer_dup_pad, poly, 8, 8); +VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 4); +VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4); +VECT_VAR_DECL_INIT4(buffer_dup, float, 32, 2); +VECT_VAR_DECL(buffer_dup_pad, float, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#ifdef __ARMCC_VERSION +__fp16 buffer_dup_float16x4[4] = {-16, -15, -14, -13}; +#else +VECT_VAR_DECL(buffer_dup, float, 16, 4)[] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, + 0xcb00 /* -14 */, 0xca80 /* -13 */}; +#endif +PAD(buffer_dup_pad, float, 16, 4); +#endif +VECT_VAR_DECL_INIT(buffer_dup, int, 8, 16); +VECT_VAR_DECL(buffer_dup_pad, int, 8, 16); +VECT_VAR_DECL_INIT(buffer_dup, int, 16, 8); +VECT_VAR_DECL(buffer_dup_pad, int, 16, 8); +VECT_VAR_DECL_INIT(buffer_dup, int, 32, 4); +VECT_VAR_DECL(buffer_dup_pad, int, 32, 4); +VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 2); +VECT_VAR_DECL(buffer_dup_pad, int, 64, 2); +VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 16); +VECT_VAR_DECL(buffer_dup_pad, uint, 8, 16); +VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 8); +VECT_VAR_DECL(buffer_dup_pad, uint, 16, 8); +VECT_VAR_DECL_INIT(buffer_dup, uint, 32, 4); +VECT_VAR_DECL(buffer_dup_pad, uint, 32, 4); +VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 2); +VECT_VAR_DECL(buffer_dup_pad, uint, 64, 2); +VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 16); +VECT_VAR_DECL(buffer_dup_pad, poly, 8, 16); +VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 8); +VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8); +VECT_VAR_DECL_INIT(buffer_dup, float, 32, 4); +VECT_VAR_DECL(buffer_dup_pad, float, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#ifdef __ARMCC_VERSION +__fp16 buffer_dup_float16x8[8] = {-16, -15, -14, -13, -12, -11, -10, -9}; +#else +VECT_VAR_DECL(buffer_dup, float, 16, 8)[] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, + 0xcb00 /* -14 */, 0xca80 /* -13 */, + 0xca00 /* -12 */, 0xc980 /* -11 */, + 0xc900 /* -10 */, 0xc880 /* -9 */}; +#endif +PAD(buffer_dup_pad, float, 16, 8); +#endif + +/* Input buffers for vld2, 1 of each size */ +VECT_ARRAY_INIT2(buffer_vld2, int, 8, 8); +PAD(buffer_vld2_pad, int, 8, 8); +VECT_ARRAY_INIT2(buffer_vld2, int, 16, 4); +PAD(buffer_vld2_pad, int, 16, 4); +VECT_ARRAY_INIT2(buffer_vld2, int, 32, 2); +PAD(buffer_vld2_pad, int, 32, 2); +VECT_ARRAY_INIT2(buffer_vld2, int, 64, 1); +PAD(buffer_vld2_pad, int, 64, 1); +VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 8); +PAD(buffer_vld2_pad, uint, 8, 8); +VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 4); +PAD(buffer_vld2_pad, uint, 16, 4); +VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 2); +PAD(buffer_vld2_pad, uint, 32, 2); +VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 1); +PAD(buffer_vld2_pad, uint, 64, 1); +VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 8); +PAD(buffer_vld2_pad, poly, 8, 8); +VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4); +PAD(buffer_vld2_pad, poly, 16, 4); +VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2); +PAD(buffer_vld2_pad, float, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#ifdef __ARMCC_VERSION +__fp16 buffer_vld2_float16x4x2[4*2] = {-16, -15, -14, -13, -12, -11, -10, -9}; +#else +float16_t buffer_vld2_float16x4x2[4*2] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, + 0xcb00 /* -14 */, 0xca80 /* -13 */, + 0xca00 /* -12 */, 0xc980 /* -11 */, + 0xc900 /* -10 */, 0xc880 /* -9 */}; +#endif +PAD(buffer_vld2_pad, float, 16, 4); +#endif +VECT_ARRAY_INIT2(buffer_vld2, int, 8, 16); +PAD(buffer_vld2_pad, int, 8, 16); +VECT_ARRAY_INIT2(buffer_vld2, int, 16, 8); +PAD(buffer_vld2_pad, int, 16, 8); +VECT_ARRAY_INIT2(buffer_vld2, int, 32, 4); +PAD(buffer_vld2_pad, int, 32, 4); +VECT_ARRAY_INIT2(buffer_vld2, int, 64, 2); +PAD(buffer_vld2_pad, int, 64, 2); +VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 16); +PAD(buffer_vld2_pad, uint, 8, 16); +VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 8); +PAD(buffer_vld2_pad, uint, 16, 8); +VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 4); +PAD(buffer_vld2_pad, uint, 32, 4); +VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 2); +PAD(buffer_vld2_pad, uint, 64, 2); +VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 16); +PAD(buffer_vld2_pad, poly, 8, 16); +VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8); +PAD(buffer_vld2_pad, poly, 16, 8); +VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4); +PAD(buffer_vld2_pad, float, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#ifdef __ARMCC_VERSION +__fp16 buffer_vld2_float16x8x2[8*2] = {-16, -15, -14, -13, -12, -11, -10, -9, + -8, -7, -6, -5, -4, -3, -2, -1}; +#else +float16_t buffer_vld2_float16x8x2[8*2] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, + 0xcb00 /* -14 */, 0xca80 /* -13 */, + 0xca00 /* -12 */, 0xc980 /* -11 */, + 0xc900 /* -10 */, 0xc880 /* -9 */, + 0xc800 /* -8 */, 0xc700 /* -7 */, + 0xc600 /* -6 */, 0xc500 /* -5 */, + 0xc400 /* -4 */, 0xc200 /* -3 */, + 0xc000 /* -2 */, 0xbc00 /* -1 */}; +#endif +PAD(buffer_vld2_pad, float, 16, 8); +#endif + +/* Input buffers for vld3, 1 of each size */ +VECT_ARRAY_INIT3(buffer_vld3, int, 8, 8); +PAD(buffer_vld3_pad, int, 8, 8); +VECT_ARRAY_INIT3(buffer_vld3, int, 16, 4); +PAD(buffer_vld3_pad, int, 16, 4); +VECT_ARRAY_INIT3(buffer_vld3, int, 32, 2); +PAD(buffer_vld3_pad, int, 32, 2); +VECT_ARRAY_INIT3(buffer_vld3, int, 64, 1); +PAD(buffer_vld3_pad, int, 64, 1); +VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 8); +PAD(buffer_vld3_pad, uint, 8, 8); +VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 4); +PAD(buffer_vld3_pad, uint, 16, 4); +VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 2); +PAD(buffer_vld3_pad, uint, 32, 2); +VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 1); +PAD(buffer_vld3_pad, uint, 64, 1); +VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 8); +PAD(buffer_vld3_pad, poly, 8, 8); +VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4); +PAD(buffer_vld3_pad, poly, 16, 4); +VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2); +PAD(buffer_vld3_pad, float, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#ifdef __ARMCC_VERSION +__fp16 buffer_vld3_float16x4x3[4*3] = {-16, -15, -14, -13, -12, -11, -10, -9, + -8, -7, -6, -5}; +#else +float16_t buffer_vld3_float16x4x3[4*3] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, + 0xcb00 /* -14 */, 0xca80 /* -13 */, + 0xca00 /* -12 */, 0xc980 /* -11 */, + 0xc900 /* -10 */, 0xc880 /* -9 */, + 0xc800 /* -8 */, 0xc700 /* -7 */, + 0xc600 /* -6 */, 0xc500 /* -5 */}; +#endif +PAD(buffer_vld3_pad, float, 16, 4); +#endif +VECT_ARRAY_INIT3(buffer_vld3, int, 8, 16); +PAD(buffer_vld3_pad, int, 8, 16); +VECT_ARRAY_INIT3(buffer_vld3, int, 16, 8); +PAD(buffer_vld3_pad, int, 16, 8); +VECT_ARRAY_INIT3(buffer_vld3, int, 32, 4); +PAD(buffer_vld3_pad, int, 32, 4); +VECT_ARRAY_INIT3(buffer_vld3, int, 64, 2); +PAD(buffer_vld3_pad, int, 64, 2); +VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 16); +PAD(buffer_vld3_pad, uint, 8, 16); +VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 8); +PAD(buffer_vld3_pad, uint, 16, 8); +VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 4); +PAD(buffer_vld3_pad, uint, 32, 4); +VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 2); +PAD(buffer_vld3_pad, uint, 64, 2); +VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 16); +PAD(buffer_vld3_pad, poly, 8, 16); +VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8); +PAD(buffer_vld3_pad, poly, 16, 8); +VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4); +PAD(buffer_vld3_pad, float, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#ifdef __ARMCC_VERSION +__fp16 buffer_vld3_float16x8x3[8*3] = {-16, -15, -14, -13, -12, -11, -10, -9, + -8, -7, -6, -5, -4, -3, -2, -1, + 0, 1, 2, 3, 4, 5, 6, 7}; +#else +float16_t buffer_vld3_float16x8x3[8*3] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, + 0xcb00 /* -14 */, 0xca80 /* -13 */, + 0xca00 /* -12 */, 0xc980 /* -11 */, + 0xc900 /* -10 */, 0xc880 /* -9 */, + 0xc800 /* -8 */, 0xc700 /* -7 */, + 0xc600 /* -6 */, 0xc500 /* -6 */, + 0xc400 /* -4 */, 0xc200 /* -3 */, + 0xc000 /* -2 */, 0xbc00 /* -1 */, + 0, 0x3c00 /* 1 */, + 0x4000 /* 2 */, 0x4200 /* 3 */, + 0x4400 /* 4 */, 0x4500 /* 5 */, + 0x4600 /* 6 */, 0x4700 /* 7 */}; +#endif +PAD(buffer_vld3_pad, float, 16, 8); +#endif + +/* Input buffers for vld4, 1 of each size */ +VECT_ARRAY_INIT4(buffer_vld4, int, 8, 8); +PAD(buffer_vld4_pad, int, 8, 8); +VECT_ARRAY_INIT4(buffer_vld4, int, 16, 4); +PAD(buffer_vld4_pad, int, 16, 4); +VECT_ARRAY_INIT4(buffer_vld4, int, 32, 2); +PAD(buffer_vld4_pad, int, 32, 2); +VECT_ARRAY_INIT4(buffer_vld4, int, 64, 1); +PAD(buffer_vld4_pad, int, 64, 1); +VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 8); +PAD(buffer_vld4_pad, uint, 8, 8); +VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 4); +PAD(buffer_vld4_pad, uint, 16, 4); +VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 2); +PAD(buffer_vld4_pad, uint, 32, 2); +VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 1); +PAD(buffer_vld4_pad, uint, 64, 1); +VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 8); +PAD(buffer_vld4_pad, poly, 8, 8); +VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4); +PAD(buffer_vld4_pad, poly, 16, 4); +VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2); +PAD(buffer_vld4_pad, float, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#ifdef __ARMCC_VERSION +__fp16 buffer_vld4_float16x4x4[4*4] = {-16, -15, -14, -13, -12, -11, -10, -9, + -8, -7, -6, -5, -4, -3, -2, -1}; +#else +float16_t buffer_vld4_float16x4x4[4*4] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, + 0xcb00 /* -14 */, 0xca80 /* -13 */, + 0xca00 /* -12 */, 0xc980 /* -11 */, + 0xc900 /* -10 */, 0xc880 /* -9 */, + 0xc800 /* -8 */, 0xc700 /* -7 */, + 0xc600 /* -6 */, 0xc500 /* -5 */, + 0xc400 /* -4 */, 0xc200 /* -3 */, + 0xc000 /* -2 */, 0xbc00 /* -1 */}; +#endif +PAD(buffer_vld4_pad, float, 16, 4); +#endif +VECT_ARRAY_INIT4(buffer_vld4, int, 8, 16); +PAD(buffer_vld4_pad, int, 8, 16); +VECT_ARRAY_INIT4(buffer_vld4, int, 16, 8); +PAD(buffer_vld4_pad, int, 16, 8); +VECT_ARRAY_INIT4(buffer_vld4, int, 32, 4); +PAD(buffer_vld4_pad, int, 32, 4); +VECT_ARRAY_INIT4(buffer_vld4, int, 64, 2); +PAD(buffer_vld4_pad, int, 64, 2); +VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 16); +PAD(buffer_vld4_pad, uint, 8, 16); +VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 8); +PAD(buffer_vld4_pad, uint, 16, 8); +VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 4); +PAD(buffer_vld4_pad, uint, 32, 4); +VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 2); +PAD(buffer_vld4_pad, uint, 64, 2); +VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 16); +PAD(buffer_vld4_pad, poly, 8, 16); +VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8); +PAD(buffer_vld4_pad, poly, 16, 8); +VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4); +PAD(buffer_vld4_pad, float, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#ifdef __ARMCC_VERSION +__fp16 buffer_vld4_float16x8x4[8*4] = {-16, -15, -14, -13, -12, -11, -10, -9, + -8, -7, -6, -5, -4, -3, -2, -1, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}; +#else +float16_t buffer_vld4_float16x8x4[8*4] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, + 0xcb00 /* -14 */, 0xca80 /* -13 */, + 0xca00 /* -12 */, 0xc980 /* -11 */, + 0xc900 /* -10 */, 0xc880 /* -9 */, + 0xc800 /* -8 */, 0xc700 /* -7 */, + 0xc600 /* -6 */, 0xc500 /* -6 */, + 0xc400 /* -4 */, 0xc200 /* -3 */, + 0xc000 /* -2 */, 0xbc00 /* -1 */, + 0, 0x3c00 /* 1 */, + 0x4000 /* 2 */, 0x4200 /* 3 */, + 0x4400 /* 4 */, 0x4500 /* 5 */, + 0x4600 /* 6 */, 0x4700 /* 7 */, + 0x4800 /* 8 */, 0x4880 /* 9 */, + 0x4900 /* 10 */, 0x4980 /* 11 */, + 0x4a00 /* 12 */, 0x4a80 /* 13 */, + 0x4b00 /* 14 */, 0x04b80 /* 15 */}; +#endif +PAD(buffer_vld4_pad, float, 16, 8); +#endif + +/* Input buffers for vld2_lane */ +VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 8, 2); +VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 16, 2); +VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 32, 2); +VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 64, 2); +VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 8, 2); +VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 16, 2); +VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2); +VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2); +VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2); +VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2); +VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#ifdef __ARMCC_VERSION +__fp16 buffer_vld2_lane_float16x2[2] = {-16, -15}; +#else +VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2) [] = {0xcc00 /* -16 */, + 0xcb80 /* -15 */}; +#endif +#endif + +/* Input buffers for vld3_lane */ +VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 8, 3); +VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 16, 3); +VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 32, 3); +VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 64, 3); +VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 8, 3); +VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 16, 3); +VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3); +VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3); +VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3); +VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3); +VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 32, 3); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#ifdef __ARMCC_VERSION +__fp16 buffer_vld3_lane_float16x3[3] = {-16, -15, -14}; +#else +VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3) [] = {0xcc00 /* -16 */, + 0xcb80 /* -15 */, + 0xcb00 /* -14 */}; +#endif +#endif + +/* Input buffers for vld4_lane */ +VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 8, 4); +VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 16, 4); +VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 32, 4); +VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 64, 4); +VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 8, 4); +VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 16, 4); +VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4); +VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4); +VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4); +VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4); +VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#ifdef __ARMCC_VERSION +__fp16 buffer_vld4_lane_float16x4[4] = {-16, -15, -14, -13}; +#else +VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4) [] = {0xcc00 /* -16 */, + 0xcb80 /* -15 */, + 0xcb00 /* -14 */, + 0xca80 /* -13 */}; +#endif +#endif diff --git a/expected_input4gcc-nofp16.txt b/expected_input4gcc-nofp16.txt new file mode 100644 index 0000000..fc2d581 --- /dev/null +++ b/expected_input4gcc-nofp16.txt @@ -0,0 +1,7087 @@ + +VLD1/VLD1Q output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; + +VADD/VADDQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffec, 0xffed, 0xffee, 0xffef }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff3, 0xfffffff4 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x54 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xe, 0xf, 0x10, 0x11 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x19 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffdc, 0xffdd, 0xffde, 0xffdf, 0xffe0, 0xffe1, 0xffe2, 0xffe3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffd2, 0xffffffd3, 0xffffffd4, 0xffffffd5 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8, 0x9 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7, 0xfff8, 0xfff9, 0xfffa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x27, 0x28, 0x29, 0x2a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3, 0xfffffffffffffff4 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x40d9999a, 0x40d9999a }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x41100000, 0x41100000, 0x41100000, 0x41100000 }; + +VLD1_LANE/VLD1_LANEQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xc1800000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xfffffff0, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xaaaaaaaaaaaaaaaa, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xfffffff0, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xaaaaaaaaaaaaaaaa }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xc1800000, 0xaaaaaaaa }; + +VLD1_DUP/VLD1_DUPQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1800000, 0xc1800000, 0xc1800000 }; + +VLD1_DUP/VLD1_DUPQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff1, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; + +VLD1_DUP/VLD1_DUPQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xfffffff2, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff2, 0xfffffff2, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1600000, 0xc1600000, 0xc1600000, 0xc1600000 }; + +VDUP/VDUPQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1800000, 0xc1800000, 0xc1800000 }; + +VDUP/VDUPQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff1, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; + +VDUP/VDUPQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xfffffff2, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff2, 0xfffffff2, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1600000, 0xc1600000, 0xc1600000, 0xc1600000 }; + +VMOV/VMOVQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1800000, 0xc1800000, 0xc1800000 }; + +VMOV/VMOVQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff1, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; + +VMOV/VMOVQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xfffffff2, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff2, 0xfffffff2, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1600000, 0xc1600000, 0xc1600000, 0xc1600000 }; + +VGET_HIGH output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VGET_LOW output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLAL_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7c1e, 0x7c1f, 0x7c20, 0x7c21 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7c1e, 0x7c1f }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLAL_LANE (mul with input=0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQDMLAL_LANE (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffef, 0x7ffffff0, 0x7ffffff1, 0x7ffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffef, 0x7ffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLSL_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffff83c2, 0xffff83c3, 0xffff83c4, 0xffff83c5 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffff83c2, 0xffffffffffff83c3 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLSL_LANE (mul with input=0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQDMLSL_LANE (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLAL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x1684, 0x1685, 0x1686, 0x1687 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x21ce, 0x21cf }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQDMLAL_N (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffef, 0x7ffffff0, 0x7ffffff1, 0x7ffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffef, 0x7ffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLSL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffe95c, 0xffffe95d, 0xffffe95e, 0xffffe95f }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffde12, 0xffffffffffffde13 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQDMLSL_N (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VEXT/VEXTQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xff, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff7, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff3, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0x44 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff3, 0x77, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff1, 0x88 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1500000, 0x4204cccd, 0x4204cccd, 0x4204cccd }; + +VSHR_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3c, 0x3c, 0x3c, 0x3c, 0x3d, 0x3d, 0x3d, 0x3d }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x7ffffff, 0x7ffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x7fffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffc, 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3c, 0x3c, 0x3c, 0x3c, 0x3d, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3e, 0x3e, 0x3f, 0x3f, 0x3f, 0x3f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x7ffffff, 0x7ffffff, 0x7ffffff, 0x7ffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VSHRN_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfc, 0xfc, 0xfc, 0xfc, 0xfd, 0xfd, 0xfd, 0xfd }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHRN_N (with input = 0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHRN_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfff9, 0xfffa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfc, 0xfc, 0xfd, 0xfd, 0xfd, 0xfd, 0xfe, 0xfe }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHRN_N (with large shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQRSHRN_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfff9, 0xfffa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQRSHRN_N (check saturation: shift by 3) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQRSHRN_N (check saturation: shift by max) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VSET_LANE/VSET_LANEQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x44 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0x55, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x88 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0x55, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x4204cccd }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0x99 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xaa, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xbb }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xcc }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xdd, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xee, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xff, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0x11 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xdd, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xee, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0x41333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQSUB/VQSUBQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffce, 0xffcf, 0xffd0, 0xffd1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffbd, 0xffffffbe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffac }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff8a, 0xff8b, 0xff8c, 0xff8d }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffff79, 0xffffff7a }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff68 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffce, 0xffcf, 0xffd0, 0xffd1, 0xffd2, 0xffd3, 0xffd4, 0xffd5 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffbd, 0xffffffbe, 0xffffffbf, 0xffffffc0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffac, 0xffffffffffffffad }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90, 0xff91 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffff79, 0xffffff7a, 0xffffff7b, 0xffffff7c }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffff68, 0xffffffffffffff69 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffac }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff68 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffac, 0xffffffffffffffad }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffff68, 0xffffffffffffff69 }; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +VECT_VAR_DECL(expected,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQDMULH output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQDMULH output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQDMULH_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQDMULH_LANE (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQDMULH_N output: +VECT_VAR_DECL(expected,int,16,4) [] = { 0x19, 0x19, 0x19, 0x19 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x4, 0x4 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xa, 0xa, 0xa, 0xa }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQDMULH_N (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; + +VQDMULL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x200, 0x1c2, 0x188, 0x152 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x200, 0x1c2 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; + +VQDMULL (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLAL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7c1e, 0x7c1f, 0x7c20, 0x7c21 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7c1e, 0x7c1f }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQDMLAL (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffef, 0x7ffffff0, 0x7ffffff1, 0x7ffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffef, 0x7ffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLSL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffff83c2, 0xffff83c3, 0xffff83c4, 0xffff83c5 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffff83c2, 0xffffffffffff83c3 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQDMLSL (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VCEQ/VCEQQ output: +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VCGE/VCGEQ output: +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; + +VCLE/VCLEQ output: +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; + +VCGT/VCGTQ output: +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; + +VCLT/VCLTQ output: +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; + +VBSL/VBSLQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf6, 0xf6, 0xf6, 0xf6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffd }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf6, 0xf6, 0xf6, 0xf6, 0xf2, 0xf2, 0xf2, 0xf2, 0xf6, 0xf6, 0xf6, 0xf6 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2, 0xfff4, 0xfff4, 0xfff6, 0xfff6 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffffd, 0xfffffffffffffffd }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7, 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2, 0xfff4, 0xfff4, 0xfff6, 0xfff6 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7, 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2, 0xfff4, 0xfff4, 0xfff6, 0xfff6 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001, 0xc1600001, 0xc1500001 }; + +VSHL/VSHLQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff000, 0xfffff100 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff80 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, 0x6000, 0x7000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x40000000, 0x80000000, 0xc0000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, 0x6000, 0x7000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x40000000, 0x80000000, 0xc0000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VSHL/VSHLQ (large shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VSHL/VSHLQ (negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79, 0x7a, 0x7a, 0x7b, 0x7b }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x7ff8, 0x7ff8, 0x7ff9, 0x7ff9 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3ffffffc, 0x3ffffffc }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xfc, 0xfc, 0xfc, 0xfc, 0xfd, 0xfd, 0xfd, 0xfd, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xfffffffe, 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3c, 0x3c, 0x3c, 0x3c, 0x3d, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3e, 0x3e, 0x3f, 0x3f, 0x3f, 0x3f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x7ff, 0x7ff, 0x7ff, 0x7ff, 0x7ff, 0x7ff, 0x7ff, 0x7ff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffffffe, 0x1ffffffe, 0x1ffffffe, 0x1ffffffe }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7ffffffffffffff, 0x7ffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VSHL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffff80, 0xffffff88 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff00, 0xff10, 0xff20, 0xff30 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffff80, 0xffffff88 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffe0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6, 0xffe8, 0xffea, 0xffec, 0xffee }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffc0, 0xffffffc4, 0xffffffc8, 0xffffffcc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc, 0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xff80, 0xff88, 0xff90, 0xff98, 0xffa0, 0xffa8, 0xffb0, 0xffb8 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc0, 0xffffffc4, 0xffffffc8, 0xffffffcc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffe0, 0xffffffffffffffe2 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQSHL/VQSHLQ (with input = 0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQSHL/VQSHLQ (input 0 and negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHL/VQSHLQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQSHL/VQSHLQ (negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79, 0x7a, 0x7a, 0x7b, 0x7b }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x7ffff, 0x7ffff, 0x7ffff, 0x7ffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffff, 0xfffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHL/VQSHLQ (large shift amount, negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQSHL/VQSHLQ (check cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3fff, 0x3fff, 0x3fff, 0x3fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x3fffffff, 0x3fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3fff, 0x3fff, 0x3fff, 0x3fff, 0x3fff, 0x3fff, 0x3fff, 0x3fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3fffffff, 0x3fffffff, 0x3fffffff, 0x3fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3fffffffffffffff, 0x3fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHL/VQSHLQ (large shift amount, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHL/VQSHLQ (check saturation on 64 bits) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHL_N/VQSHLQ_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe0, 0xffffffe2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc, 0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6, 0xffe8, 0xffea, 0xffec, 0xffee }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2, 0xffffffe4, 0xffffffe6 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHL_N/VQSHLQ_N (check saturation with large positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHL/VRSHLQ (with input = 0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHL/VRSHLQ (input 0 and negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHL/VRSHLQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff000, 0xfffff100 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, 0x6000, 0x7000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, 0x6000, 0x7000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHL/VRSHLQ (negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffd, 0xfffd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x78, 0x79, 0x79, 0x7a, 0x7a, 0x7b, 0x7b, 0x7c }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffd, 0x3ffd }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000, 0x80000, 0x80000, 0x80000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x100000000000, 0x100000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHL/VRSHLQ (checking round_const overflow: shift by -1) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000000000000000, 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHL/VRSHLQ (checking round_const overflow: shift by -3) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10000000, 0x10000000, 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x1000000000000000, 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x20000000, 0x20000000, 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000000000000000, 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHL/VRSHLQ (checking negative shift amount as large as input vector width) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHL/VRSHLQ (large shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHL/VRSHLQ (large negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VLD2/VLD2Q chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; + +VLD2/VLD2Q chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1400000, 0xc1300000, 0xc1200000, 0xc1100000 }; + +VLD3/VLD3Q chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; + +VLD3/VLD3Q chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1400000, 0xc1300000, 0xc1200000, 0xc1100000 }; + +VLD3/VLD3Q chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff4, 0xfffffff5 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff8, 0xfffffff9, 0xfffffffa, 0xfffffffb }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff8, 0xfffffff9, 0xfffffffa, 0xfffffffb }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000, 0xc0c00000, 0xc0a00000 }; + +VLD4/VLD4Q chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; + +VLD4/VLD4Q chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1400000, 0xc1300000, 0xc1200000, 0xc1100000 }; + +VLD4/VLD4Q chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff4, 0xfffffff5 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff8, 0xfffffff9, 0xfffffffa, 0xfffffffb }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff8, 0xfffffff9, 0xfffffffa, 0xfffffffb }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000, 0xc0c00000, 0xc0a00000 }; + +VLD4/VLD4Q chunk 3 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff3 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff3 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc0800000, 0xc0400000, 0xc0000000, 0xbf800000 }; + +VDUP_LANE/VDUP_LANEQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; + +VQDMULL_LANE output: +VECT_VAR_DECL(expected,int,32,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000, 0x4000 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; + +VQDMULL_N output: +VECT_VAR_DECL(expected,int,32,4) [] = { 0x44000, 0x44000, 0x44000, 0x44000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xaa000, 0xaa000 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; + +VST1_LANE/VST1_LANEQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf6, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff5, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfa, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff3, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfa, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0x33333333, 0x33333333, 0x33333333 }; + +VSUB/VSUBQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffed, 0xffffffee }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff8c }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffd2, 0xffd3, 0xffd4, 0xffd5 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffc8, 0xffffffc9 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffee }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xe, 0xf, 0x10, 0x11 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffd8, 0xffffffffffffffd9 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffed, 0xffee, 0xffef, 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffb9, 0xffffffba, 0xffffffbb, 0xffffffbc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffed, 0xffffffffffffffee }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc00ccccd, 0xc00ccccd }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc00ccccc, 0xc00ccccc, 0xc00ccccc, 0xc00ccccc }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQADD/VQADDQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x12, 0x13, 0x14, 0x15 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x23, 0x24 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x34 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x23, 0x24, 0x25, 0x26 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x34, 0x35 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x34 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x34, 0x35 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +VECT_VAR_DECL(expected,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +VABS/VABSQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x40133333, 0x40133333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x4059999a, 0x4059999a, 0x4059999a, 0x4059999a }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQABS/VQABSQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQABS/VQABSQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VCOMBINE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x22, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0x44 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0x88 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0x40533333, 0x40533333 }; + +VMAX/VMAXQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1780000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1680000, 0xc1680000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VMIN/VMINQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf3, 0xf3, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf3, 0xf3, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1780000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1680000, 0xc1680000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xff800000, 0xff800000, 0xff800000, 0xff800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + +VNEG/VNEGQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc0133333, 0xc0133333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc059999a, 0xc059999a, 0xc059999a, 0xc059999a }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQNEG/VQNEGQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQNEG/VQNEGQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VMLAL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xe907, 0xe908, 0xe909, 0xe90a, 0xe90b, 0xe90c, 0xe90d, 0xe90e }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a, 0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VMLSL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x16d9, 0x16da, 0x16db, 0x16dc, 0x16dd, 0x16de, 0x16df, 0x16e0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffc1d9, 0xffffc1da, 0xffffc1db, 0xffffc1dc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffc1d9, 0xffffffffffffc1da }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc, 0xc1dd, 0xc1de, 0xc1df, 0xc1e0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffc1d9, 0xffffc1da, 0xffffc1db, 0xffffc1dc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffc1d9, 0xffffffffffffc1da }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VMLAL_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VMLSL_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffc1d9, 0xffffc1da, 0xffffc1db, 0xffffc1dc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffc1d9, 0xffffffffffffc1da }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffc1d9, 0xffffc1da, 0xffffc1db, 0xffffc1dc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffc1d9, 0xffffffffffffc1da }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VMLAL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x595, 0x596, 0x597, 0x598 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xb3a, 0xb3b }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x10df, 0x10e0, 0x10e1, 0x10e2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x10df, 0x10e0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VMLSL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffa4b, 0xfffffa4c, 0xfffffa4d, 0xfffffa4e }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffff4a6, 0xfffffffffffff4a7 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffef01, 0xffffef02, 0xffffef03, 0xffffef04 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffef01, 0xffffffffffffef02 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VMOVL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VMOVN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VMULL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x100, 0xe1, 0xc4, 0xa9, 0x90, 0x79, 0x64, 0x51 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x100, 0xe1, 0xc4, 0xa9 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x100, 0xe1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xe100, 0xe2e1, 0xe4c4, 0xe6a9, 0xe890, 0xea79, 0xec64, 0xee51 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe00100, 0xffe200e1, 0xffe400c4, 0xffe600a9 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe000000100, 0xffffffe2000000e1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VMULL_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VREV16 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6, 0xf9, 0xf8, 0xfb, 0xfa, 0xfd, 0xfc, 0xff, 0xfe }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6, 0xf9, 0xf8, 0xfb, 0xfa, 0xfd, 0xfc, 0xff, 0xfe }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6, 0xf9, 0xf8, 0xfb, 0xfa, 0xfd, 0xfc, 0xff, 0xfe }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VREV32 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4, 0xfb, 0xfa, 0xf9, 0xf8, 0xff, 0xfe, 0xfd, 0xfc }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2, 0xfff5, 0xfff4, 0xfff7, 0xfff6 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4, 0xfb, 0xfa, 0xf9, 0xf8, 0xff, 0xfe, 0xfd, 0xfc }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2, 0xfff5, 0xfff4, 0xfff7, 0xfff6 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4, 0xfb, 0xfa, 0xf9, 0xf8, 0xff, 0xfe, 0xfd, 0xfc }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2, 0xfff5, 0xfff4, 0xfff7, 0xfff6 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VREV64 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff0, 0xfffffff3, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff0, 0xfffffff3, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1800000, 0xc1500000, 0xc1600000 }; + +VSRA_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffd }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff3, 0xfffffff4 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff, 0x0, 0x1, 0x2, 0x3 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff3, 0xfffffff4, 0xfffffff5, 0xfffffff6 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VTRN/VTRNQ chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0x11, 0x11, 0xf2, 0xf3, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf2, 0xf3, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf2, 0xf3, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11, 0xf2, 0xf3, 0x11, 0x11, 0xf4, 0xf5, 0x11, 0x11, 0xf6, 0xf7, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0x22, 0x22, 0xfff2, 0xfff3, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf2, 0xf3, 0x55, 0x55, 0xf4, 0xf5, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66, 0xfff2, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf2, 0xf3, 0x55, 0x55, 0xf4, 0xf5, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66, 0xfff2, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0x42073333, 0x42073333 }; + +VTRN/VTRNQ chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf4, 0xf5, 0x11, 0x11, 0xf6, 0xf7, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff3, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11, 0xfa, 0xfb, 0x11, 0x11, 0xfc, 0xfd, 0x11, 0x11, 0xfe, 0xff, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff4, 0xfff5, 0x22, 0x22, 0xfff6, 0xfff7, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xfffffff3, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55, 0xfa, 0xfb, 0x55, 0x55, 0xfc, 0xfd, 0x55, 0x55, 0xfe, 0xff, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66, 0xfff6, 0xfff7, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff2, 0xfffffff3, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55, 0xfa, 0xfb, 0x55, 0x55, 0xfc, 0xfd, 0x55, 0x55, 0xfe, 0xff, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66, 0xfff6, 0xfff7, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1600000, 0xc1500000, 0x42073333, 0x42073333 }; + +VUZP/VUZPQ chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; + +VUZP/VUZPQ chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x22, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x77, 0x77, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x42073333, 0x42073333, 0x42073333, 0x42073333 }; + +VZIP/VZIPQ chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf4, 0x11, 0x11, 0xf1, 0xf5, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff2, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55, 0xf1, 0xf5, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55, 0xf1, 0xf5, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11, 0xf1, 0xf9, 0x11, 0x11, 0xf2, 0xfa, 0x11, 0x11, 0xf3, 0xfb, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff4, 0x22, 0x22, 0xfff1, 0xfff5, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff2, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55, 0xf1, 0xf9, 0x55, 0x55, 0xf2, 0xfa, 0x55, 0x55, 0xf3, 0xfb, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66, 0xfff1, 0xfff5, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff2, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55, 0xf1, 0xf9, 0x55, 0x55, 0xf2, 0xfa, 0x55, 0x55, 0xf3, 0xfb, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66, 0xfff1, 0xfff5, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1600000, 0x42073333, 0x42073333 }; + +VZIP/VZIPQ chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf6, 0x11, 0x11, 0xf3, 0xf7, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff3, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55, 0xf3, 0xf7, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55, 0xf3, 0xf7, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11, 0xf5, 0xfd, 0x11, 0x11, 0xf6, 0xfe, 0x11, 0x11, 0xf7, 0xff, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff6, 0x22, 0x22, 0xfff3, 0xfff7, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff3, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55, 0xf5, 0xfd, 0x55, 0x55, 0xf6, 0xfe, 0x55, 0x55, 0xf7, 0xff, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66, 0xfff3, 0xfff7, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff3, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55, 0xf5, 0xfd, 0x55, 0x55, 0xf6, 0xfe, 0x55, 0x55, 0xf7, 0xff, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66, 0xfff3, 0xfff7, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1500000, 0x42073333, 0x42073333 }; + +VREINTERPRET/VREINTERPRETQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffff1fffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffff1fffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1fffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1fffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff, 0xf4, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xf7, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xf2, 0xff, 0xff, 0xff, 0xf3, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff, 0xf4, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xf7, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xf2, 0xff, 0xff, 0xff, 0xf3, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff, 0xf4, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xf7, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff, 0xfff2, 0xffff, 0xfff3, 0xffff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xffff, 0xffff, 0xffff, 0xfff1, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff, 0xfff2, 0xffff, 0xfff3, 0xffff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xffff, 0xffff, 0xffff, 0xfff1, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xffffffff, 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xffffffff, 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0, 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfff3fff2fff1fff0, 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffff1fffffff0, 0xfffffff3fffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0, 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfff3fff2fff1fff0, 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffff1fffffff0, 0xfffffff3fffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0, 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfff3fff2fff1fff0, 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff, 0xfff2, 0xffff, 0xfff3, 0xffff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xffff, 0xffff, 0xffff, 0xfff1, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff, 0xfff2, 0xffff, 0xfff3, 0xffff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xffff, 0xffff, 0xffff, 0xfff1, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xffffffff, 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xffffffff, 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0, 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfff3fff2fff1fff0, 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffff1fffffff0, 0xfffffff3fffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0, 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfff3fff2fff1fff0, 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffff1fffffff0, 0xfffffff3fffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0, 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfff3fff2fff1fff0, 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff, 0xf4, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xf7, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xf2, 0xff, 0xff, 0xff, 0xf3, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff, 0xf4, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xf7, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xf2, 0xff, 0xff, 0xff, 0xf3, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff, 0xf4, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xf7, 0xff }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff, 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff, 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x80, 0xc1, 0x0, 0x0, 0x70, 0xc1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xc1700000c1800000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x80, 0xc1, 0x0, 0x0, 0x70, 0xc1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xc1700000c1800000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x80, 0xc1, 0x0, 0x0, 0x70, 0xc1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x80, 0xc1, 0x0, 0x0, 0x70, 0xc1, 0x0, 0x0, 0x60, 0xc1, 0x0, 0x0, 0x50, 0xc1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170, 0x0, 0xc160, 0x0, 0xc150 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xc1700000c1800000, 0xc1500000c1600000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x80, 0xc1, 0x0, 0x0, 0x70, 0xc1, 0x0, 0x0, 0x60, 0xc1, 0x0, 0x0, 0x50, 0xc1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170, 0x0, 0xc160, 0x0, 0xc150 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xc1700000c1800000, 0xc1500000c1600000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x0, 0x0, 0x80, 0xc1, 0x0, 0x0, 0x70, 0xc1, 0x0, 0x0, 0x60, 0xc1, 0x0, 0x0, 0x50, 0xc1 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170, 0x0, 0xc160, 0x0, 0xc150 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQRDMULH output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff5, 0xfff6, 0xfff7, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQRDMULH (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQRDMULH (check rounding cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQRDMULH_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQRDMULH_LANE (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQRDMULH_LANE (check rounding cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQRDMULH_N output: +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x6, 0x6, 0x6, 0x5, 0x5, 0x4, 0x4, 0x4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xfffffffe, 0xfffffffe, 0xfffffffe }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQRDMULH_N (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQRDMULH_N (check rounding cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (with input = 0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (input 0 and negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQRSHL/VQRSHLQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xfc, 0xfc, 0xfd, 0xfd, 0xfd, 0xfd, 0xfe, 0xfe }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffd, 0xfffd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3c, 0x3c, 0x3d, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffd, 0x3ffd }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000, 0x80000, 0x80000, 0x80000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x100000000000, 0x100000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000000000000000, 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10000000, 0x10000000, 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x1000000000000000, 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x20000000, 0x20000000, 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000000000000000, 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VABA/VABAQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x21 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x907, 0x908, 0x909, 0x90a }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe7, 0xffffffe8 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xb9c, 0xb9d, 0xb9e, 0xb9f, 0xba0, 0xba1, 0xba2, 0xba3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x26e0, 0x26e1, 0x26e2, 0x26e3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VABAL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff6, 0xfff7, 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x16, 0x17, 0x18, 0x19 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x20, 0x21 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x907, 0x908, 0x909, 0x90a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe7, 0xffffffe8 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VABAL test intermediate overflow output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffef, 0xfff0, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffef, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe2, 0xffe3, 0xffe4, 0xffe5 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe7, 0xffffffe8 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VABD/VABDQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x11, 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x18, 0x17 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe8, 0xffffffe9 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x41c26666, 0x41ba6666 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0xf, 0xe, 0xd, 0xc, 0xb }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4, 0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x30, 0x2f, 0x2e, 0x2d }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffe4, 0xffe5, 0xffe6, 0xffe7, 0xffe8, 0xffe9, 0xffea, 0xffeb }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffd0, 0xffffffd1, 0xffffffd2, 0xffffffd3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x42407ae1, 0x423c7ae1, 0x42387ae1, 0x42347ae1 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VABDL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x11, 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x18, 0x17 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe8, 0xffffffe9 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VAND/VANDQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x2, 0x2, 0x0, 0x0, 0x2, 0x2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x60 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x14, 0x14, 0x14, 0x14 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x10, 0x10, 0x12, 0x12 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf2, 0xf2, 0xf4, 0xf4, 0xf6, 0xf6, 0xf0, 0xf0, 0xf2, 0xf2, 0xf4, 0xf4, 0xf6, 0xf6 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe0, 0xffe0, 0xffe0, 0xffe4, 0xffe4, 0xffe4, 0xffe4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe0, 0xffffffe2, 0xffffffe2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x10, 0x10 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x4, 0x4, 0x4, 0x4, 0x8, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xc }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x0, 0x1, 0x2, 0x3 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x30, 0x31, 0x32, 0x33 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VORR/VORRQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf3, 0xf2, 0xf3, 0xf6, 0xf7, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff3, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff4 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf4, 0xf5, 0xf6, 0xf7, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xffff, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff8, 0xfffffff9 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf6, 0xf7, 0xf6, 0xf7, 0xf6, 0xf7, 0xf6, 0xf7, 0xfe, 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xfffffff3, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff8, 0xfffffffffffffff9 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0xfc, 0xfd, 0xfe, 0xff, 0xfc, 0xfd, 0xfe, 0xff, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff7, 0xfff7, 0xfff7, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff7, 0xfffffff7, 0xfffffff7, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3, 0xfffffffffffffff3 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VORN/VORNQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xfd, 0xfd, 0xff, 0xff, 0xfd, 0xfd, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffd }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffb }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfb, 0xfb, 0xfb, 0xfb, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff7, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffffd }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf9, 0xf9, 0xfb, 0xfb, 0xfd, 0xfd, 0xff, 0xff, 0xf9, 0xf9, 0xfb, 0xfb, 0xfd, 0xfd, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff7, 0xfff7, 0xfff7, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffd, 0xfffffffd, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff7, 0xfffffffffffffff7 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7, 0xfb, 0xfb, 0xfb, 0xfb, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff8, 0xfffffff9, 0xfffffffa, 0xfffffffb }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffffc, 0xfffffffffffffffd }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VEOR/VEORQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf3, 0xf0, 0xf1, 0xf6, 0xf7, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff3, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff94 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe4, 0xe5, 0xe6, 0xe7, 0xe0, 0xe1, 0xe2, 0xe3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffee, 0xffef, 0xffec, 0xffed }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffd8, 0xffffffd9 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x6, 0x7, 0x4, 0x5, 0x2, 0x3, 0x0, 0x1, 0xe, 0xf, 0xc, 0xd, 0xa, 0xb, 0x8, 0x9 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x1c, 0x1d, 0x1e, 0x1f, 0x18, 0x19, 0x1a, 0x1b }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x12, 0x13, 0x10, 0x11 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe8, 0xffffffffffffffe9 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0xf8, 0xf9, 0xfa, 0xfb, 0xf4, 0xf5, 0xf6, 0xf7, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc7, 0xffffffc6, 0xffffffc5, 0xffffffc4 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VBIC/VBICQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf4, 0xf5, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x1, 0x2, 0x3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff90 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe1, 0xe2, 0xe3, 0xe0, 0xe1, 0xe2, 0xe3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe0, 0xffe1, 0xffe0, 0xffe1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffd0, 0xffffffd1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x8, 0x9, 0x8, 0x9, 0x8, 0x9, 0x8, 0x9 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0x11, 0x12, 0x13, 0x10, 0x11, 0x12, 0x13 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0x11, 0x10, 0x11 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0, 0xffffffffffffffe1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff4, 0xfff4, 0xfff4, 0xfff4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc0, 0xffffffc0, 0xffffffc0, 0xffffffc0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VCREATE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x9abcdef0, 0x12345678 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x123456789abcdef0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x9abcdef0, 0x12345678 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x123456789abcdef0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x9abcdef0, 0x12345678 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VLD2_LANE/VLD2Q_LANE chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; + +VLD2_LANE/VLD2Q_LANE chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xaaaaaaaa, 0xaaaaaaaa }; + +VLD3_LANE/VLD3Q_LANE chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; + +VLD3_LANE/VLD3Q_LANE chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xf1, 0xf2, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xf1, 0xf2, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xc1800000, 0xc1700000 }; + +VLD3_LANE/VLD3Q_LANE chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xf1, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1600000, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; + +VLD4_LANE/VLD4Q_LANE chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; + +VLD4_LANE/VLD4Q_LANE chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; + +VLD4_LANE/VLD4Q_LANE chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; + +VLD4_LANE/VLD4Q_LANE chunk 3 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; + +VLD2_DUP/VLD2Q_DUP chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VLD2_DUP/VLD2Q_DUP chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VLD3_DUP/VLD3Q_DUP chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VLD3_DUP/VLD3Q_DUP chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff2, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff2, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff1, 0xfff2, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1800000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VLD3_DUP/VLD3Q_DUP chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff0, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff0, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff0, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1600000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VLD4_DUP/VLD4Q_DUP chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VLD4_DUP/VLD4Q_DUP chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VLD4_DUP/VLD4Q_DUP chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VLD4_DUP/VLD4Q_DUP chunk 3 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff3 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff3 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VMLA output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x2bf7, 0x2bf8 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x43ac, 0x43ad }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x43a14e76, 0x43a1ce76 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4830, 0x4831, 0x4832, 0x4833, 0x4834, 0x4835, 0x4836, 0x4837 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x470f, 0x4710, 0x4711, 0x4712 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a, 0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3620, 0x3621, 0x3622, 0x3623 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x45f0ae15, 0x45f0b615, 0x45f0be15, 0x45f0c615 }; + +VMLS output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xe054, 0xe055, 0xe056, 0xe057 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffd3e9, 0xffffd3ea }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffbc34, 0xffffbc35 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b14e76, 0xc3b0ce76 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xb7b0, 0xb7b1, 0xb7b2, 0xb7b3, 0xb7b4, 0xb7b5, 0xb7b6, 0xb7b7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffb8d1, 0xffffb8d2, 0xffffb8d3, 0xffffb8d4 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc, 0xc1dd, 0xc1de, 0xc1df, 0xc1e0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffc9c0, 0xffffc9c1, 0xffffc9c2, 0xffffc9c3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc5f1ae15, 0xc5f1a615, 0xc5f19e15, 0xc5f19615 }; + +VMUL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0x1, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfde0, 0xfe02, 0xfe24, 0xfe46 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffcd0, 0xfffffd03 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0x4, 0x48, 0x8c, 0xd0, 0x14, 0x58, 0x9c }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff9a0, 0xfffffa06 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc0, 0x84, 0x48, 0xc, 0xd0, 0x94, 0x58, 0x1c }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc4053333, 0xc3f9c000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x90, 0x7, 0x7e, 0xf5, 0x6c, 0xe3, 0x5a, 0xd1, 0x48, 0xbf, 0x36, 0xad, 0x24, 0x9b, 0x12, 0x89 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf780, 0xf808, 0xf890, 0xf918, 0xf9a0, 0xfa28, 0xfab0, 0xfb38 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffff670, 0xfffff709, 0xfffff7a2, 0xfffff83b }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x60, 0xa, 0xb4, 0x5e, 0x8, 0xb2, 0x5c, 0x6, 0xb0, 0x5a, 0x4, 0xae, 0x58, 0x2, 0xac, 0x56 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf450, 0xf50b, 0xf5c6, 0xf681, 0xf73c, 0xf7f7, 0xf8b2, 0xf96d }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffff340, 0xfffff40c, 0xfffff4d8, 0xfffff5a4 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0xca, 0x34, 0x9e, 0xc8, 0x62, 0x9c, 0x36, 0x30, 0x9a, 0x64, 0xce, 0x98, 0x32, 0xcc, 0x66 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4c73333, 0xc4bac000, 0xc4ae4ccd, 0xc4a1d999 }; + +VMUL_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffde0, 0xfffffe02 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffface0, 0xffffb212 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b66666, 0xc3ab0000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc, 0xffd0, 0xffd4, 0xffd8, 0xffdc }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffde0, 0xfffffe02, 0xfffffe24, 0xfffffe46 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c, 0xccd0, 0xd114, 0xd558, 0xd99c }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffface0, 0xffffb212, 0xffffb744, 0xffffbc76 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc3b66666, 0xc3ab0000, 0xc39f9999, 0xc3943333 }; + +VMUL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfef0, 0xff01, 0xff12, 0xff23 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffde0, 0xfffffe02 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfcd0, 0xfd03, 0xfd36, 0xfd69 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffbc0, 0xfffffc04 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b26666, 0xc3a74000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf, 0xfc04, 0xfc59, 0xfcae, 0xfd03 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffff9a0, 0xfffffa06, 0xfffffa6c, 0xfffffad2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf890, 0xf907, 0xf97e, 0xf9f5, 0xfa6c, 0xfae3, 0xfb5a, 0xfbd1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffff780, 0xfffff808, 0xfffff890, 0xfffff918 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4b1cccd, 0xc4a6b000, 0xc49b9333, 0xc4907667 }; + +VMULL_N output: +VECT_VAR_DECL(expected,int,32,4) [] = { 0x11000, 0x11000, 0x11000, 0x11000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x22000, 0x22000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33000, 0x33000, 0x33000, 0x33000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x44000, 0x44000 }; + +VMLA_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4418c687, 0x44190687 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a, 0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a, 0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x441a3168, 0x441a7168, 0x441ab168, 0x441af168 }; + +VMLS_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffc1d9, 0xffffc1da }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffc1d9, 0xffffc1da }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc420c687, 0xc4208687 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc, 0xc1dd, 0xc1de, 0xc1df, 0xc1e0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffc1d9, 0xffffc1da, 0xffffc1db, 0xffffc1dc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc, 0xc1dd, 0xc1de, 0xc1df, 0xc1e0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffc1d9, 0xffffc1da, 0xffffc1db, 0xffffc1dc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4223168, 0xc421f168, 0xc421b168, 0xc4217168 }; + +VMLA_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x595, 0x596, 0x597, 0x598 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xb3a, 0xb3b }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x10df, 0x10e0, 0x10e1, 0x10e2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1684, 0x1685 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4497deb8, 0x4497feb8 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x1c29, 0x1c2a, 0x1c2b, 0x1c2c, 0x1c2d, 0x1c2e, 0x1c2f, 0x1c30 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x21ce, 0x21cf, 0x21d0, 0x21d1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2773, 0x2774, 0x2775, 0x2776, 0x2777, 0x2778, 0x2779, 0x277a }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x2d18, 0x2d19, 0x2d1a, 0x2d1b }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x4568087b, 0x4568187b, 0x4568287b, 0x4568387b }; + +VMLS_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfa4b, 0xfa4c, 0xfa4d, 0xfa4e }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff4a6, 0xfffff4a7 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xef01, 0xef02, 0xef03, 0xef04 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffe95c, 0xffffe95d }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc49bdeb8, 0xc49bbeb8 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xe3b7, 0xe3b8, 0xe3b9, 0xe3ba, 0xe3bb, 0xe3bc, 0xe3bd, 0xe3be }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffde12, 0xffffde13, 0xffffde14, 0xffffde15 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xd86d, 0xd86e, 0xd86f, 0xd870, 0xd871, 0xd872, 0xd873, 0xd874 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffd2c8, 0xffffd2c9, 0xffffd2ca, 0xffffd2cb }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc56a087b, 0xc569f87b, 0xc569e87b, 0xc569d87b }; + +VSLI_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe1, 0xffe2, 0xffe3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x64fffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x50, 0x51, 0x52, 0x53, 0x50, 0x51, 0x52, 0x53 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x7bf0, 0x7bf1, 0x7bf2, 0x7bf3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3ffffff0, 0x3ffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x10 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x50, 0x51, 0x52, 0x53, 0x50, 0x51, 0x52, 0x53 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x7bf0, 0x7bf1, 0x7bf2, 0x7bf3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 0xff66, 0xff67 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfe2ffff0, 0xfe2ffff1, 0xfe2ffff2, 0xfe2ffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x18fff0, 0x18fff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3ff0, 0x3ff1, 0x3ff2, 0x3ff3, 0x3ff4, 0x3ff5, 0x3ff6, 0x3ff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1bfffff0, 0x1bfffff1, 0x1bfffff2, 0x1bfffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7ffffffffffff0, 0x7ffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3ff0, 0x3ff1, 0x3ff2, 0x3ff3, 0x3ff4, 0x3ff5, 0x3ff6, 0x3ff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VSLI_Nmax shift amount output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x7ffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x7ffffff0, 0x7ffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x7ffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff5, 0x7ff6, 0x7ff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7ffffff0, 0x7ffffff1, 0x7ffffff2, 0x7ffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7ffffffffffffff0, 0x7ffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VSRI_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x80000001, 0x80000001 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffff00000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffc0, 0xffc0, 0xffc0, 0xffc0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xe000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xffc0, 0xffc0, 0xffc0, 0xffc0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffff000000000000, 0xffff000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffe00, 0xfffffe00, 0xfffffe00, 0xfffffe00 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffff800, 0xfffffffffffff800 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VSRI_N max shift amount output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VTST/VTSTQ (signed input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0xffff, 0x0, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0xffff, 0x0, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0xffffffff, 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VTST/VTSTQ (unsigned input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0xffff, 0x0, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0xffff, 0x0, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0xffffffff, 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VADDHN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x32, 0x32, 0x32, 0x32 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x18, 0x18 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x37, 0x37, 0x37, 0x37 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3, 0x3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRADDHN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x19, 0x19 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x38, 0x38, 0x38, 0x38 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x4, 0x4 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VADDL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe7, 0xffe8, 0xffe9, 0xffea }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe2, 0xffffffe3, 0xffffffe4, 0xffffffe5 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0, 0xffffffffffffffe1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1e3, 0x1e4, 0x1e5, 0x1e6, 0x1e7, 0x1e8, 0x1e9, 0x1ea }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffe1, 0x1ffe2, 0x1ffe3, 0x1ffe4 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffffffe0, 0x1ffffffe1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VADDW output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe7, 0xffe8, 0xffe9, 0xffea }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe2, 0xffffffe3, 0xffffffe4, 0xffffffe5 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0, 0xffffffffffffffe1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe1, 0xffe2, 0xffe3, 0xffe4 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe0, 0xffffffe1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VHADD/VHADDQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf2, 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf2, 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7, 0xf7, 0xf8, 0xf8, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3, 0xfff3, 0xfff4, 0xfff4, 0xfff5 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff1, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7, 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2, 0xfff3, 0xfff3, 0xfff4, 0xfff4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff1, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRHADD/VRHADDQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7, 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff3, 0xfff3, 0xfff4, 0xfff4, 0xfff5, 0xfff5 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf6, 0xf6, 0xf7, 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc, 0xfc }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3, 0xfff3, 0xfff4, 0xfff4, 0xfff5 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VHSUB/VHSUBQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xfe, 0xff, 0xff, 0x0, 0x0, 0x1, 0x1, 0x2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfe, 0xff, 0xff, 0x0, 0x0, 0x1, 0x1, 0x2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0x0, 0x0, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xfe, 0xff, 0xff, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2, 0x3, 0x3, 0x4, 0x4, 0x5, 0x5 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffe, 0xffff, 0xffff, 0x0, 0x0, 0x1, 0x1, 0x2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0x0, 0x0, 0x1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff, 0xff, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2, 0x3 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0x0, 0x0, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VSUBL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0, 0x1, 0x2, 0x3, 0x4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xffffffff, 0x0, 0x1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0, 0x1, 0x2, 0x3, 0x4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0x0, 0x1, 0x2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VSUBW output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0, 0x1, 0x2, 0x3, 0x4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xffffffff, 0x0, 0x1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfefd, 0xfefe, 0xfeff, 0xff00, 0xff01, 0xff02, 0xff03, 0xff04 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffeffff, 0xffff0000, 0xffff0001, 0xffff0002 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffff00000000, 0xffffffff00000001 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VSUBHN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x31, 0x31, 0x31, 0x31 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x17, 0x17 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x36, 0x36, 0x36, 0x36 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSUBHN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x31, 0x31, 0x31, 0x31 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x17, 0x17 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x36, 0x36, 0x36, 0x36 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VMVN/VMVNQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf, 0xe, 0xd, 0xc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf, 0xe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xf, 0xe, 0xd, 0xc }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xf, 0xe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf, 0xe, 0xd, 0xc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf, 0xe, 0xd, 0xc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; + +VQMOVN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1278, 0x1278, 0x1278, 0x1278 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x12345678, 0x12345678 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8765, 0x8765, 0x8765, 0x8765 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x87654321, 0x87654321 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; + +VQMOVN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; + +VQMOVUN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x5678, 0x5678, 0x5678, 0x5678 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x12345678, 0x12345678 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; + +VQMOVUN (negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHR_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3c, 0x3c, 0x3d, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x8000000, 0x8000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x80000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff, 0xff, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffc, 0xfffffffd, 0xfffffffd }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3c, 0x3c, 0x3d, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3e, 0x3e, 0x3f, 0x3f, 0x3f, 0x3f, 0x40, 0x40 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe, 0x1fff, 0x1fff, 0x1fff, 0x1fff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x8000000, 0x8000000, 0x8000000, 0x8000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHR_N (overflow test: max shift amount, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHR_N (overflow test: shift by 1, with negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000000000000000, 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHR_N (overflow test: shift by 3, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10000000, 0x10000000, 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x1000000000000000, 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x20000000, 0x20000000, 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000000000000000, 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHR_N (overflow test: shift by 1, with negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xc000, 0xc000, 0xc000, 0xc000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xc0000000, 0xc0000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xc000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xc000, 0xc000, 0xc000, 0xc000, 0xc000, 0xc000, 0xc000, 0xc000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xc0000000, 0xc0000000, 0xc0000000, 0xc0000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xc000000000000000, 0xc000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSHR_N (overflow test: shift by 3, with negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf000, 0xf000, 0xf000, 0xf000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf0000000, 0xf0000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xf000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf000, 0xf000, 0xf000, 0xf000, 0xf000, 0xf000, 0xf000, 0xf000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf0000000, 0xf0000000, 0xf0000000, 0xf0000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xf000000000000000, 0xf000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x20000000, 0x20000000, 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000000000000000, 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSRA_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffd, 0xfffffffe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffd, 0xfffe, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffd, 0xfffffffe, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0, 0x1, 0x2, 0x3, 0x4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSRA_N (checking overflow: shift by 1, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000000000000000, 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSRA_N (checking overflow: shift by 3, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10000000, 0x10000000, 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x1000000000000000, 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x20000000, 0x20000000, 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000000000000000, 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSRA_N (checking overflow: shift by max, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSRA_N (checking overflow: shift by 1, negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xc000, 0xc000, 0xc000, 0xc000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xc0000000, 0xc0000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xc000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xc000, 0xc000, 0xc000, 0xc000, 0xc000, 0xc000, 0xc000, 0xc000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xc0000000, 0xc0000000, 0xc0000000, 0xc0000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xc000000000000000, 0xc000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSRA_N (checking overflow: shift by max, negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf000, 0xf000, 0xf000, 0xf000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf0000000, 0xf0000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xf000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf000, 0xf000, 0xf000, 0xf000, 0xf000, 0xf000, 0xf000, 0xf000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf0000000, 0xf0000000, 0xf0000000, 0xf0000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xf000000000000000, 0xf000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VRSRA_N (checking overflow: shift by max, negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VSHLL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6, 0xffe8, 0xffea, 0xffec, 0xffee }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2, 0xffffffe4, 0xffffffe6 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffff80, 0xffffffffffffff88 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3c0, 0x3c4, 0x3c8, 0x3cc, 0x3d0, 0x3d4, 0x3d8, 0x3dc }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff00, 0xfff10, 0xfff20, 0xfff30 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7ffffff80, 0x7ffffff88 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VPADDL/VPADDLQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe1, 0xffe5, 0xffe9, 0xffed }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe1, 0xffffffe5 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffe1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1e1, 0x1e5, 0x1e9, 0x1ed }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffe1, 0x1ffe5 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffe1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe1, 0xffe5, 0xffe9, 0xffed, 0xfff1, 0xfff5, 0xfff9, 0xfffd }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe1, 0xffffffe5, 0xffffffe9, 0xffffffed }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe1, 0xffffffffffffffe5 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1e1, 0x1e5, 0x1e9, 0x1ed, 0x1f1, 0x1f5, 0x1f9, 0x1fd }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffe1, 0x1ffe5, 0x1ffe9, 0x1ffed }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffffffe1, 0x1ffffffe5 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VPADD output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe1, 0xe5, 0xe9, 0xed, 0xe1, 0xe5, 0xe9, 0xed }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe1, 0xffe5, 0xffe1, 0xffe5 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe1, 0xffffffe1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe1, 0xe5, 0xe9, 0xed, 0xe1, 0xe5, 0xe9, 0xed }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe1, 0xffe5, 0xffe1, 0xffe5 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe1, 0xffffffe1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1f80000, 0xc1f80000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VPADAL/VPADALQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffd1, 0xffd6, 0xffdb, 0xffe0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffd1, 0xffffffd6 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffd1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1d1, 0x1d6, 0x1db, 0x1e0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffd1, 0x1ffd6 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffd1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffd1, 0xffd6, 0xffdb, 0xffe0, 0xffe5, 0xffea, 0xffef, 0xfff4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffd1, 0xffffffd6, 0xffffffdb, 0xffffffe0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffd1, 0xffffffffffffffd6 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1d1, 0x1d6, 0x1db, 0x1e0, 0x1e5, 0x1ea, 0x1ef, 0x1f4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffd1, 0x1ffd6, 0x1ffdb, 0x1ffe0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffffffd1, 0x1ffffffd6 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQSHLU_N/VQSHLUQ_N (negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffffe }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffffe, 0xfffffffe, 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffffe, 0xfffffffffffffffe }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQSHLU_N/VQSHLUQ_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8, 0x8, 0x8, 0x8 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x18 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x40 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x180, 0x180, 0x180, 0x180, 0x180, 0x180, 0x180, 0x180 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x380, 0x380, 0x380, 0x380 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x800, 0x800 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VCLZ/VCLZQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3, 0x3, 0x3, 0x3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x5, 0x5 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3, 0x3, 0x3, 0x3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1f, 0x1f, 0x1f, 0x1f }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VCLZ/VCLZQ (input=0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x20 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VCLS/VCLSQ (positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x19, 0x19 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x14, 0x14, 0x14, 0x14 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VCLS/VCLSQ (negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VCNT/VCNTQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHRN_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHRN_N (check saturation: shift by 3) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQSHRN_N (check saturation: shift by max) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VPMAX output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, 0xf1, 0xf3, 0xf5, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff3, 0xfff1, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, 0xf1, 0xf3, 0xf5, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff3, 0xfff1, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VPMIN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, 0xf0, 0xf2, 0xf4, 0xf6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff2, 0xfff0, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, 0xf0, 0xf2, 0xf4, 0xf6 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff2, 0xfff0, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQSHRUN_N (negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQSHRUN_N (check cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQSHRUN_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x48 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbe, 0xdeadbe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQRSHRUN_N (negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQRSHRUN_N (check cumulative saturation: shift by 1) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQRSHRUN_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbf, 0xdeadbf }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VST2_LANE/VST2Q_LANE chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0x0, 0x0 }; + +VST2_LANE/VST2Q_LANE chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VST3_LANE/VST3Q_LANE chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0x0 }; + +VST3_LANE/VST3Q_LANE chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VST3_LANE/VST3Q_LANE chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VST4_LANE/VST4Q_LANE chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; + +VST4_LANE/VST4Q_LANE chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VST4_LANE/VST4Q_LANE chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VST4_LANE/VST4Q_LANE chunk 3 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VTBL1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0xf2, 0xf2, 0xf2, 0x0, 0x0, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0xf3, 0xf3, 0xf3, 0x0, 0x0, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0xf3, 0xf3, 0xf3, 0x0, 0x0, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VTBL2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf3, 0xf3, 0xf3, 0x0, 0x0, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, 0x0, 0x0, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, 0x0, 0x0, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VTBL3 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf4, 0xf4, 0xf4, 0xff, 0x0, 0xf4, 0xf4 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, 0xff, 0x0, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, 0xff, 0x0, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VTBL4 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xfa, 0xf5, 0xf5, 0xf5, 0x3, 0x0, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, 0x3, 0x0, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, 0x3, 0x0, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VTBX1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0xf2, 0xf2, 0xf2, 0x33, 0x33, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xcc, 0xf3, 0xf3, 0xf3, 0xcc, 0xcc, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xcc, 0xf3, 0xf3, 0xf3, 0xcc, 0xcc, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VTBX2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf3, 0xf3, 0xf3, 0x33, 0x33, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, 0xcc, 0xcc, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, 0xcc, 0xcc, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VTBX3 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf4, 0xf4, 0xf4, 0xff, 0x33, 0xf4, 0xf4 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, 0xff, 0xcc, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, 0xff, 0xcc, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; + +VTBX4 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xfa, 0xf5, 0xf5, 0xf5, 0x3, 0x33, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, 0x3, 0xcc, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, 0x3, 0xcc, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x3f068000, 0x3f068000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3c030000, 0x3c030000, 0x3c030000, 0x3c030000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xee800000, 0xee800000, 0xee800000, 0xee800000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xbdcc8000, 0xbdcc8000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xbc030000, 0xbc030000, 0xbc030000, 0xbc030000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x7f800000, 0x7f800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x80000000, 0x80000000 }; + +VRSQRTE/VRSQRTEQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x9c800000, 0x9c800000, 0x9c800000, 0x9c800000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x3e498000, 0x3e498000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3e700000, 0x3e700000, 0x3e700000, 0x3e700000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xae800000, 0xae800000, 0xae800000, 0xae800000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xb4800000, 0xb4800000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xed000000, 0xed000000, 0xed000000, 0xed000000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; + +VCAGE/VCAGEQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x0 }; + +VCAGE/VCAGEQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +VCALE/VCALEQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; + +VCALE/VCALEQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VCAGT/VCAGTQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; + +VCAGT/VCAGTQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +VCALT/VCALTQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; + +VCALT/VCALTQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VCVT/VCVTQ output: +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4f800000, 0x4f800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0x5 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x5 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0xf, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xf, 0x0 }; + +VCVT_N/VCVTQ_N output: +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc0800000, 0xc0700000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4c000000, 0x4c000000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xb2800000, 0xb2700000, 0xb2600000, 0xb2500000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x49800000, 0x49800000, 0x49800000, 0x49800000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xff0b3333, 0x54cccd }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x15 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x1e3d7, 0xfffe1c29 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x1e, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xa, 0xa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xa, 0xa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7d, 0x7d, 0x7d, 0x7d }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x7d, 0x7d, 0x7d, 0x7d }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xa66666, 0xa66666 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xa66666, 0xa66666 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfbccc, 0xfbccc, 0xfbccc, 0xfbccc }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfbccc, 0xfbccc, 0xfbccc, 0xfbccc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; + +VRECPS/VRECPSQ output: +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2e19eb7, 0xc2e19eb7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1db851f, 0xc1db851f, 0xc1db851f, 0xc1db851f }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; + +VRSQRTS/VRSQRTSQ output: +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2796b84, 0xc2796b84 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc0e4a3d8, 0xc0e4a3d8, 0xc0e4a3d8, 0xc0e4a3d8 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x3fc00000, 0x3fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000 }; diff --git a/expected_input4gcc.txt b/expected_input4gcc.txt new file mode 100644 index 0000000..df294b9 --- /dev/null +++ b/expected_input4gcc.txt @@ -0,0 +1,7627 @@ + +VLD1/VLD1Q output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xca00, 0xc980, 0xc900, 0xc880 }; + +VADD/VADDQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffec, 0xffed, 0xffee, 0xffef }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff3, 0xfffffff4 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x54 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xe, 0xf, 0x10, 0x11 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x19 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffdc, 0xffdd, 0xffde, 0xffdf, 0xffe0, 0xffe1, 0xffe2, 0xffe3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffd2, 0xffffffd3, 0xffffffd4, 0xffffffd5 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8, 0x9 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7, 0xfff8, 0xfff9, 0xfffa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x27, 0x28, 0x29, 0x2a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3, 0xfffffffffffffff4 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x40d9999a, 0x40d9999a }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x41100000, 0x41100000, 0x41100000, 0x41100000 }; + +VLD1_LANE/VLD1_LANEQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xc1800000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xfffffff0, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xaaaaaaaaaaaaaaaa, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xfffffff0, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xaaaaaaaaaaaaaaaa }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xc1800000, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xcc00, 0xaaaa, 0xaaaa }; + +VLD1_DUP/VLD1_DUPQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1800000, 0xc1800000, 0xc1800000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00, 0xcc00, 0xcc00, 0xcc00, 0xcc00 }; + +VLD1_DUP/VLD1_DUPQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff1, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80, 0xcb80, 0xcb80, 0xcb80, 0xcb80 }; + +VLD1_DUP/VLD1_DUPQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xfffffff2, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff2, 0xfffffff2, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1600000, 0xc1600000, 0xc1600000, 0xc1600000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00, 0xcb00, 0xcb00, 0xcb00, 0xcb00 }; + +VDUP/VDUPQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1800000, 0xc1800000, 0xc1800000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VDUP/VDUPQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff1, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VDUP/VDUPQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xfffffff2, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff2, 0xfffffff2, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1600000, 0xc1600000, 0xc1600000, 0xc1600000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMOV/VMOVQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1800000, 0xc1800000, 0xc1800000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMOV/VMOVQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff1, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMOV/VMOVQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xfffffff2, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff2, 0xfffffff2, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff2, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1600000, 0xc1600000, 0xc1600000, 0xc1600000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VGET_HIGH output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VGET_LOW output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLAL_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7c1e, 0x7c1f, 0x7c20, 0x7c21 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7c1e, 0x7c1f }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLAL_LANE (mul with input=0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQDMLAL_LANE (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffef, 0x7ffffff0, 0x7ffffff1, 0x7ffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffef, 0x7ffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLSL_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffff83c2, 0xffff83c3, 0xffff83c4, 0xffff83c5 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffff83c2, 0xffffffffffff83c3 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLSL_LANE (mul with input=0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQDMLSL_LANE (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLAL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x1684, 0x1685, 0x1686, 0x1687 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x21ce, 0x21cf }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQDMLAL_N (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffef, 0x7ffffff0, 0x7ffffff1, 0x7ffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffef, 0x7ffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLSL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffe95c, 0xffffe95d, 0xffffe95e, 0xffffe95f }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffde12, 0xffffffffffffde13 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQDMLSL_N (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VEXT/VEXTQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xff, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff7, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff3, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0x44 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff3, 0x77, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff1, 0x88 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1500000, 0x4204cccd, 0x4204cccd, 0x4204cccd }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSHR_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3c, 0x3c, 0x3c, 0x3c, 0x3d, 0x3d, 0x3d, 0x3d }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x7ffffff, 0x7ffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x7fffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffc, 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3c, 0x3c, 0x3c, 0x3c, 0x3d, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3e, 0x3e, 0x3f, 0x3f, 0x3f, 0x3f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x7ffffff, 0x7ffffff, 0x7ffffff, 0x7ffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSHRN_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfc, 0xfc, 0xfc, 0xfc, 0xfd, 0xfd, 0xfd, 0xfd }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHRN_N (with input = 0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHRN_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfff9, 0xfffa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfc, 0xfc, 0xfd, 0xfd, 0xfd, 0xfd, 0xfe, 0xfe }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHRN_N (with large shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQRSHRN_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfff9, 0xfffa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQRSHRN_N (check saturation: shift by 3) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQRSHRN_N (check saturation: shift by max) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSET_LANE/VSET_LANEQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x44 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0x55, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x88 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0x55, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x4204cccd }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0x99 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xaa, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xbb }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xcc }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xdd, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xee, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xff, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0x11 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xdd, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xee, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0x41333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQSUB/VQSUBQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffce, 0xffcf, 0xffd0, 0xffd1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffbd, 0xffffffbe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffac }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff8a, 0xff8b, 0xff8c, 0xff8d }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffff79, 0xffffff7a }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff68 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffce, 0xffcf, 0xffd0, 0xffd1, 0xffd2, 0xffd3, 0xffd4, 0xffd5 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffbd, 0xffffffbe, 0xffffffbf, 0xffffffc0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffac, 0xffffffffffffffad }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90, 0xff91 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffff79, 0xffffff7a, 0xffffff7b, 0xffffff7c }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffff68, 0xffffffffffffff69 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffac }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff68 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffac, 0xffffffffffffffad }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffff68, 0xffffffffffffff69 }; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +VECT_VAR_DECL(expected,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQDMULH output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQDMULH output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQDMULH_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQDMULH_LANE (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQDMULH_N output: +VECT_VAR_DECL(expected,int,16,4) [] = { 0x19, 0x19, 0x19, 0x19 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x4, 0x4 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xa, 0xa, 0xa, 0xa }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQDMULH_N (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; + +VQDMULL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x200, 0x1c2, 0x188, 0x152 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x200, 0x1c2 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; + +VQDMULL (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLAL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7c1e, 0x7c1f, 0x7c20, 0x7c21 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7c1e, 0x7c1f }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQDMLAL (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffef, 0x7ffffff0, 0x7ffffff1, 0x7ffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffef, 0x7ffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQDMLSL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffff83c2, 0xffff83c3, 0xffff83c4, 0xffff83c5 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffff83c2, 0xffffffffffff83c3 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQDMLSL (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VCEQ/VCEQQ output: +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VCGE/VCGEQ output: +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; + +VCLE/VCLEQ output: +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; + +VCGT/VCGTQ output: +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; + +VCLT/VCLTQ output: +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; + +VBSL/VBSLQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf6, 0xf6, 0xf6, 0xf6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffd }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf6, 0xf6, 0xf6, 0xf6, 0xf2, 0xf2, 0xf2, 0xf2, 0xf6, 0xf6, 0xf6, 0xf6 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2, 0xfff4, 0xfff4, 0xfff6, 0xfff6 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffffd, 0xfffffffffffffffd }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7, 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2, 0xfff4, 0xfff4, 0xfff6, 0xfff6 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7, 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2, 0xfff4, 0xfff4, 0xfff6, 0xfff6 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001, 0xc1600001, 0xc1500001 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSHL/VSHLQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff000, 0xfffff100 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff80 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, 0x6000, 0x7000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x40000000, 0x80000000, 0xc0000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, 0x6000, 0x7000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x40000000, 0x80000000, 0xc0000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSHL/VSHLQ (large shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSHL/VSHLQ (negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79, 0x7a, 0x7a, 0x7b, 0x7b }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x7ff8, 0x7ff8, 0x7ff9, 0x7ff9 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3ffffffc, 0x3ffffffc }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xfc, 0xfc, 0xfc, 0xfc, 0xfd, 0xfd, 0xfd, 0xfd, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xfffffffe, 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3c, 0x3c, 0x3c, 0x3c, 0x3d, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3e, 0x3e, 0x3f, 0x3f, 0x3f, 0x3f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x7ff, 0x7ff, 0x7ff, 0x7ff, 0x7ff, 0x7ff, 0x7ff, 0x7ff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffffffe, 0x1ffffffe, 0x1ffffffe, 0x1ffffffe }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7ffffffffffffff, 0x7ffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSHL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffff80, 0xffffff88 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff00, 0xff10, 0xff20, 0xff30 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffff80, 0xffffff88 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffe0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6, 0xffe8, 0xffea, 0xffec, 0xffee }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffc0, 0xffffffc4, 0xffffffc8, 0xffffffcc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc, 0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xff80, 0xff88, 0xff90, 0xff98, 0xffa0, 0xffa8, 0xffb0, 0xffb8 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc0, 0xffffffc4, 0xffffffc8, 0xffffffcc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffe0, 0xffffffffffffffe2 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQSHL/VQSHLQ (with input = 0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQSHL/VQSHLQ (input 0 and negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHL/VQSHLQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQSHL/VQSHLQ (negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79, 0x7a, 0x7a, 0x7b, 0x7b }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x7ffff, 0x7ffff, 0x7ffff, 0x7ffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffff, 0xfffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHL/VQSHLQ (large shift amount, negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQSHL/VQSHLQ (check cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3fff, 0x3fff, 0x3fff, 0x3fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x3fffffff, 0x3fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3fff, 0x3fff, 0x3fff, 0x3fff, 0x3fff, 0x3fff, 0x3fff, 0x3fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3fffffff, 0x3fffffff, 0x3fffffff, 0x3fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3fffffffffffffff, 0x3fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHL/VQSHLQ (large shift amount, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHL/VQSHLQ (check saturation on 64 bits) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHL_N/VQSHLQ_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe0, 0xffffffe2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc, 0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6, 0xffe8, 0xffea, 0xffec, 0xffee }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2, 0xffffffe4, 0xffffffe6 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHL_N/VQSHLQ_N (check saturation with large positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHL/VRSHLQ (with input = 0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHL/VRSHLQ (input 0 and negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHL/VRSHLQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff000, 0xfffff100 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, 0x6000, 0x7000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, 0x6000, 0x7000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHL/VRSHLQ (negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffd, 0xfffd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x78, 0x79, 0x79, 0x7a, 0x7a, 0x7b, 0x7b, 0x7c }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffd, 0x3ffd }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000, 0x80000, 0x80000, 0x80000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x100000000000, 0x100000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHL/VRSHLQ (checking round_const overflow: shift by -1) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000000000000000, 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHL/VRSHLQ (checking round_const overflow: shift by -3) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10000000, 0x10000000, 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x1000000000000000, 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x20000000, 0x20000000, 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000000000000000, 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHL/VRSHLQ (checking negative shift amount as large as input vector width) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHL/VRSHLQ (large shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHL/VRSHLQ (large negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VLD2/VLD2Q chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xca00, 0xc980, 0xc900, 0xc880 }; + +VLD2/VLD2Q chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1400000, 0xc1300000, 0xc1200000, 0xc1100000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500, 0xc400, 0xc200, 0xc000, 0xbc00 }; + +VLD3/VLD3Q chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xca00, 0xc980, 0xc900, 0xc880 }; + +VLD3/VLD3Q chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1400000, 0xc1300000, 0xc1200000, 0xc1100000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500, 0xc400, 0xc200, 0xc000, 0xbc00 }; + +VLD3/VLD3Q chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff4, 0xfffffff5 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff8, 0xfffffff9, 0xfffffffa, 0xfffffffb }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff8, 0xfffffff9, 0xfffffffa, 0xfffffffb }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000, 0xc0c00000, 0xc0a00000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700 }; + +VLD4/VLD4Q chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xca00, 0xc980, 0xc900, 0xc880 }; + +VLD4/VLD4Q chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1400000, 0xc1300000, 0xc1200000, 0xc1100000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500, 0xc400, 0xc200, 0xc000, 0xbc00 }; + +VLD4/VLD4Q chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff4, 0xfffffff5 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff8, 0xfffffff9, 0xfffffffa, 0xfffffffb }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff8, 0xfffffff9, 0xfffffffa, 0xfffffffb }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000, 0xc0c00000, 0xc0a00000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700 }; + +VLD4/VLD4Q chunk 3 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff3 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff3 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xc400, 0xc200, 0xc000, 0xbc00 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc0800000, 0xc0400000, 0xc0000000, 0xbf800000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x4800, 0x4880, 0x4900, 0x4980, 0x4a00, 0x4a80, 0x4b00, 0x4b80 }; + +VDUP_LANE/VDUP_LANEQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; + +VQDMULL_LANE output: +VECT_VAR_DECL(expected,int,32,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000, 0x4000 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; + +VQDMULL_N output: +VECT_VAR_DECL(expected,int,32,4) [] = { 0x44000, 0x44000, 0x44000, 0x44000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xaa000, 0xaa000 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; + +VST1_LANE/VST1_LANEQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf6, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcb00, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff5, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfa, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff3, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfa, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xc980, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSUB/VSUBQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffed, 0xffffffee }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff8c }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffd2, 0xffd3, 0xffd4, 0xffd5 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffc8, 0xffffffc9 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffee }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xe, 0xf, 0x10, 0x11 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffd8, 0xffffffffffffffd9 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffed, 0xffee, 0xffef, 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffb9, 0xffffffba, 0xffffffbb, 0xffffffbc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffed, 0xffffffffffffffee }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc00ccccd, 0xc00ccccd }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc00ccccc, 0xc00ccccc, 0xc00ccccc, 0xc00ccccc }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQADD/VQADDQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x12, 0x13, 0x14, 0x15 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x23, 0x24 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x34 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x23, 0x24, 0x25, 0x26 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x34, 0x35 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x34 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x34, 0x35 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +VECT_VAR_DECL(expected,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +VABS/VABSQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x40133333, 0x40133333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x4059999a, 0x4059999a, 0x4059999a, 0x4059999a }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQABS/VQABSQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQABS/VQABSQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VCOMBINE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x22, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0x44 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0x88 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0x40533333, 0x40533333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0x4b80, 0x4b80, 0x4b80, 0x4b80 }; + +VMAX/VMAXQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1780000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1680000, 0xc1680000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VMIN/VMINQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf3, 0xf3, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf3, 0xf3, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1780000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4, 0xf4 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1680000, 0xc1680000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xff800000, 0xff800000, 0xff800000, 0xff800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + +VNEG/VNEGQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc0133333, 0xc0133333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc059999a, 0xc059999a, 0xc059999a, 0xc059999a }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQNEG/VQNEGQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQNEG/VQNEGQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMLAL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xe907, 0xe908, 0xe909, 0xe90a, 0xe90b, 0xe90c, 0xe90d, 0xe90e }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a, 0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMLSL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x16d9, 0x16da, 0x16db, 0x16dc, 0x16dd, 0x16de, 0x16df, 0x16e0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffc1d9, 0xffffc1da, 0xffffc1db, 0xffffc1dc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffc1d9, 0xffffffffffffc1da }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc, 0xc1dd, 0xc1de, 0xc1df, 0xc1e0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffc1d9, 0xffffc1da, 0xffffc1db, 0xffffc1dc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffc1d9, 0xffffffffffffc1da }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMLAL_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMLSL_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffc1d9, 0xffffc1da, 0xffffc1db, 0xffffc1dc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffc1d9, 0xffffffffffffc1da }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffc1d9, 0xffffc1da, 0xffffc1db, 0xffffc1dc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffc1d9, 0xffffffffffffc1da }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMLAL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x595, 0x596, 0x597, 0x598 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xb3a, 0xb3b }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x10df, 0x10e0, 0x10e1, 0x10e2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x10df, 0x10e0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMLSL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffa4b, 0xfffffa4c, 0xfffffa4d, 0xfffffa4e }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffff4a6, 0xfffffffffffff4a7 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffef01, 0xffffef02, 0xffffef03, 0xffffef04 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffef01, 0xffffffffffffef02 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMOVL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMOVN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMULL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x100, 0xe1, 0xc4, 0xa9, 0x90, 0x79, 0x64, 0x51 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x100, 0xe1, 0xc4, 0xa9 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x100, 0xe1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xe100, 0xe2e1, 0xe4c4, 0xe6a9, 0xe890, 0xea79, 0xec64, 0xee51 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe00100, 0xffe200e1, 0xffe400c4, 0xffe600a9 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe000000100, 0xffffffe2000000e1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMULL_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VREV16 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6, 0xf9, 0xf8, 0xfb, 0xfa, 0xfd, 0xfc, 0xff, 0xfe }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6, 0xf9, 0xf8, 0xfb, 0xfa, 0xfd, 0xfc, 0xff, 0xfe }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6, 0xf9, 0xf8, 0xfb, 0xfa, 0xfd, 0xfc, 0xff, 0xfe }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VREV32 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4, 0xfb, 0xfa, 0xf9, 0xf8, 0xff, 0xfe, 0xfd, 0xfc }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2, 0xfff5, 0xfff4, 0xfff7, 0xfff6 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4, 0xfb, 0xfa, 0xf9, 0xf8, 0xff, 0xfe, 0xfd, 0xfc }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2, 0xfff5, 0xfff4, 0xfff7, 0xfff6 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4, 0xfb, 0xfa, 0xf9, 0xf8, 0xff, 0xfe, 0xfd, 0xfc }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2, 0xfff5, 0xfff4, 0xfff7, 0xfff6 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VREV64 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff0, 0xfffffff3, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff0, 0xfffffff3, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1800000, 0xc1500000, 0xc1600000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSRA_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffd }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff3, 0xfffffff4 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff, 0x0, 0x1, 0x2, 0x3 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff3, 0xfffffff4, 0xfffffff5, 0xfffffff6 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VTRN/VTRNQ chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0x11, 0x11, 0xf2, 0xf3, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf2, 0xf3, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf2, 0xf3, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11, 0xf2, 0xf3, 0x11, 0x11, 0xf4, 0xf5, 0x11, 0x11, 0xf6, 0xf7, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0x22, 0x22, 0xfff2, 0xfff3, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf2, 0xf3, 0x55, 0x55, 0xf4, 0xf5, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66, 0xfff2, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf2, 0xf3, 0x55, 0x55, 0xf4, 0xf5, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66, 0xfff2, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0x42073333, 0x42073333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VTRN/VTRNQ chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf4, 0xf5, 0x11, 0x11, 0xf6, 0xf7, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff3, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11, 0xfa, 0xfb, 0x11, 0x11, 0xfc, 0xfd, 0x11, 0x11, 0xfe, 0xff, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff4, 0xfff5, 0x22, 0x22, 0xfff6, 0xfff7, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xfffffff3, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55, 0xfa, 0xfb, 0x55, 0x55, 0xfc, 0xfd, 0x55, 0x55, 0xfe, 0xff, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66, 0xfff6, 0xfff7, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff2, 0xfffffff3, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55, 0xfa, 0xfb, 0x55, 0x55, 0xfc, 0xfd, 0x55, 0x55, 0xfe, 0xff, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66, 0xfff6, 0xfff7, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1600000, 0xc1500000, 0x42073333, 0x42073333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VUZP/VUZPQ chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VUZP/VUZPQ chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x22, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x77, 0x77, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x42073333, 0x42073333, 0x42073333, 0x42073333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VZIP/VZIPQ chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf4, 0x11, 0x11, 0xf1, 0xf5, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff2, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55, 0xf1, 0xf5, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55, 0xf1, 0xf5, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11, 0xf1, 0xf9, 0x11, 0x11, 0xf2, 0xfa, 0x11, 0x11, 0xf3, 0xfb, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff4, 0x22, 0x22, 0xfff1, 0xfff5, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff2, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55, 0xf1, 0xf9, 0x55, 0x55, 0xf2, 0xfa, 0x55, 0x55, 0xf3, 0xfb, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66, 0xfff1, 0xfff5, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff2, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55, 0xf1, 0xf9, 0x55, 0x55, 0xf2, 0xfa, 0x55, 0x55, 0xf3, 0xfb, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66, 0xfff1, 0xfff5, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1600000, 0x42073333, 0x42073333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VZIP/VZIPQ chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf6, 0x11, 0x11, 0xf3, 0xf7, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff3, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55, 0xf3, 0xf7, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55, 0xf3, 0xf7, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11, 0xf5, 0xfd, 0x11, 0x11, 0xf6, 0xfe, 0x11, 0x11, 0xf7, 0xff, 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff6, 0x22, 0x22, 0xfff3, 0xfff7, 0x22, 0x22 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff3, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55, 0xf5, 0xfd, 0x55, 0x55, 0xf6, 0xfe, 0x55, 0x55, 0xf7, 0xff, 0x55, 0x55 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66, 0xfff3, 0xfff7, 0x66, 0x66 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff3, 0x77, 0x77 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55, 0xf5, 0xfd, 0x55, 0x55, 0xf6, 0xfe, 0x55, 0x55, 0xf7, 0xff, 0x55, 0x55 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66, 0xfff3, 0xfff7, 0x66, 0x66 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1500000, 0x42073333, 0x42073333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VREINTERPRET/VREINTERPRETQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffff1fffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffff1fffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1fffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1fffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff, 0xf4, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xf7, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xf2, 0xff, 0xff, 0xff, 0xf3, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff, 0xf4, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xf7, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xf2, 0xff, 0xff, 0xff, 0xf3, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff, 0xf4, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xf7, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff, 0xfff2, 0xffff, 0xfff3, 0xffff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xffff, 0xffff, 0xffff, 0xfff1, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff, 0xfff2, 0xffff, 0xfff3, 0xffff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xffff, 0xffff, 0xffff, 0xfff1, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xffffffff, 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xffffffff, 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0, 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfff3fff2fff1fff0, 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffff1fffffff0, 0xfffffff3fffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0, 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfff3fff2fff1fff0, 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffff1fffffff0, 0xfffffff3fffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0, 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfff3fff2fff1fff0, 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff, 0xfff2, 0xffff, 0xfff3, 0xffff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xffff, 0xffff, 0xffff, 0xfff1, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff, 0xfff2, 0xffff, 0xfff3, 0xffff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xffff, 0xffff, 0xffff, 0xfff1, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xffffffff, 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xffffffff, 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0, 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfff3fff2fff1fff0, 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffff1fffffff0, 0xfffffff3fffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0, 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfff3fff2fff1fff0, 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffff1fffffff0, 0xfffffff3fffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0, 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfff3fff2fff1fff0, 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff, 0xf4, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xf7, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xf2, 0xff, 0xff, 0xff, 0xf3, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff, 0xf4, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xf7, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xf2, 0xff, 0xff, 0xff, 0xf3, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, 0xf2, 0xff, 0xf3, 0xff, 0xf4, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xf7, 0xff }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff, 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff, 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, 0xfbfaf9f8, 0xfffefdfc }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2, 0xfff5fff4, 0xfff7fff6 }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x80, 0xc1, 0x0, 0x0, 0x70, 0xc1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xc1700000c1800000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x80, 0xc1, 0x0, 0x0, 0x70, 0xc1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xc1700000c1800000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x80, 0xc1, 0x0, 0x0, 0x70, 0xc1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x80, 0xc1, 0x0, 0x0, 0x70, 0xc1, 0x0, 0x0, 0x60, 0xc1, 0x0, 0x0, 0x50, 0xc1 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170, 0x0, 0xc160, 0x0, 0xc150 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xc1700000c1800000, 0xc1500000c1600000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x80, 0xc1, 0x0, 0x0, 0x70, 0xc1, 0x0, 0x0, 0x60, 0xc1, 0x0, 0x0, 0x50, 0xc1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170, 0x0, 0xc160, 0x0, 0xc150 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xc1700000c1800000, 0xc1500000c1600000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x0, 0x0, 0x80, 0xc1, 0x0, 0x0, 0x70, 0xc1, 0x0, 0x0, 0x60, 0xc1, 0x0, 0x0, 0x50, 0xc1 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170, 0x0, 0xc160, 0x0, 0xc150 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff, 0xfff2, 0xffff, 0xfff3, 0xffff }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xfff0, 0xffff, 0xffff, 0xffff, 0xfff1, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff, 0xfff2, 0xffff, 0xfff3, 0xffff }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xfff0, 0xffff, 0xffff, 0xffff, 0xfff1, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6, 0xf9f8, 0xfbfa, 0xfdfc, 0xfffe }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170, 0x0, 0xc160, 0x0, 0xc150 }; +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0xcc, 0x80, 0xcb, 0x0, 0xcb, 0x80, 0xca }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xcb80cc00, 0xca80cb00 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xca80cb00cb80cc00 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0xcc, 0x80, 0xcb, 0x0, 0xcb, 0x80, 0xca }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xcb80cc00, 0xca80cb00 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xca80cb00cb80cc00 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0xcc, 0x80, 0xcb, 0x0, 0xcb, 0x80, 0xca }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xcb80cc00, 0xca80cb00 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0xcc, 0x80, 0xcb, 0x0, 0xcb, 0x80, 0xca, 0x0, 0xca, 0x80, 0xc9, 0x0, 0xc9, 0x80, 0xc8 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xca00, 0xc980, 0xc900, 0xc880 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xcb80cc00, 0xca80cb00, 0xc980ca00, 0xc880c900 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xca80cb00cb80cc00, 0xc880c900c980ca00 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0xcc, 0x80, 0xcb, 0x0, 0xcb, 0x80, 0xca, 0x0, 0xca, 0x80, 0xc9, 0x0, 0xc9, 0x80, 0xc8 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xca00, 0xc980, 0xc900, 0xc880 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xcb80cc00, 0xca80cb00, 0xc980ca00, 0xc880c900 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xca80cb00cb80cc00, 0xc880c900c980ca00 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x0, 0xcc, 0x80, 0xcb, 0x0, 0xcb, 0x80, 0xca, 0x0, 0xca, 0x80, 0xc9, 0x0, 0xc9, 0x80, 0xc8 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xca00, 0xc980, 0xc900, 0xc880 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xcb80cc00, 0xca80cb00, 0xc980ca00, 0xc880c900 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQRDMULH output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff5, 0xfff6, 0xfff7, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQRDMULH (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQRDMULH (check rounding cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQRDMULH_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQRDMULH_LANE (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQRDMULH_LANE (check rounding cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQRDMULH_N output: +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x6, 0x6, 0x6, 0x5, 0x5, 0x4, 0x4, 0x4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xfffffffe, 0xfffffffe, 0xfffffffe }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; + +VQRDMULH_N (check mul cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; + +VQRDMULH_N (check rounding cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (with input = 0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (input 0 and negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQRSHL/VQRSHLQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xfc, 0xfc, 0xfd, 0xfd, 0xfd, 0xfd, 0xfe, 0xfe }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffd, 0xfffd }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3c, 0x3c, 0x3d, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffd, 0x3ffd }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000, 0x80000, 0x80000, 0x80000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x100000000000, 0x100000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000000000000000, 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10000000, 0x10000000, 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x1000000000000000, 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x20000000, 0x20000000, 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000000000000000, 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7fffffffffffffff, 0x7fffffffffffffff }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VABA/VABAQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x21 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x907, 0x908, 0x909, 0x90a }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe7, 0xffffffe8 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xb9c, 0xb9d, 0xb9e, 0xb9f, 0xba0, 0xba1, 0xba2, 0xba3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x26e0, 0x26e1, 0x26e2, 0x26e3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VABAL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff6, 0xfff7, 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x16, 0x17, 0x18, 0x19 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x20, 0x21 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x907, 0x908, 0x909, 0x90a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe7, 0xffffffe8 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VABAL test intermediate overflow output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffef, 0xfff0, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffef, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe2, 0xffe3, 0xffe4, 0xffe5 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe7, 0xffffffe8 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VABD/VABDQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x11, 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x18, 0x17 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe8, 0xffffffe9 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x41c26666, 0x41ba6666 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0xf, 0xe, 0xd, 0xc, 0xb }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4, 0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x30, 0x2f, 0x2e, 0x2d }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffe4, 0xffe5, 0xffe6, 0xffe7, 0xffe8, 0xffe9, 0xffea, 0xffeb }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffd0, 0xffffffd1, 0xffffffd2, 0xffffffd3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x42407ae1, 0x423c7ae1, 0x42387ae1, 0x42347ae1 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VABDL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x11, 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x18, 0x17 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe8, 0xffffffe9 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VAND/VANDQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x2, 0x2, 0x0, 0x0, 0x2, 0x2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x60 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x14, 0x14, 0x14, 0x14 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x10, 0x10, 0x12, 0x12 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf2, 0xf2, 0xf4, 0xf4, 0xf6, 0xf6, 0xf0, 0xf0, 0xf2, 0xf2, 0xf4, 0xf4, 0xf6, 0xf6 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe0, 0xffe0, 0xffe0, 0xffe4, 0xffe4, 0xffe4, 0xffe4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe0, 0xffffffe2, 0xffffffe2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x10, 0x10 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x4, 0x4, 0x4, 0x4, 0x8, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xc }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x0, 0x1, 0x2, 0x3 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x30, 0x31, 0x32, 0x33 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VORR/VORRQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf3, 0xf2, 0xf3, 0xf6, 0xf7, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff3, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff4 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf4, 0xf5, 0xf6, 0xf7, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xffff, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff8, 0xfffffff9 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf6, 0xf7, 0xf6, 0xf7, 0xf6, 0xf7, 0xf6, 0xf7, 0xfe, 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xfffffff3, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff8, 0xfffffffffffffff9 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0xfc, 0xfd, 0xfe, 0xff, 0xfc, 0xfd, 0xfe, 0xff, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff7, 0xfff7, 0xfff7, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff7, 0xfffffff7, 0xfffffff7, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3, 0xfffffffffffffff3 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VORN/VORNQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xfd, 0xfd, 0xff, 0xff, 0xfd, 0xfd, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffd }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffb }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfb, 0xfb, 0xfb, 0xfb, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff7, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffffd }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf9, 0xf9, 0xfb, 0xfb, 0xfd, 0xfd, 0xff, 0xff, 0xf9, 0xf9, 0xfb, 0xfb, 0xfd, 0xfd, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3, 0xfff7, 0xfff7, 0xfff7, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffd, 0xfffffffd, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff7, 0xfffffffffffffff7 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7, 0xfb, 0xfb, 0xfb, 0xfb, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff8, 0xfffffff9, 0xfffffffa, 0xfffffffb }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffffc, 0xfffffffffffffffd }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VEOR/VEORQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf3, 0xf0, 0xf1, 0xf6, 0xf7, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xc, 0xd, 0xe, 0xf }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff3, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff94 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe4, 0xe5, 0xe6, 0xe7, 0xe0, 0xe1, 0xe2, 0xe3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffee, 0xffef, 0xffec, 0xffed }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffd8, 0xffffffd9 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x6, 0x7, 0x4, 0x5, 0x2, 0x3, 0x0, 0x1, 0xe, 0xf, 0xc, 0xd, 0xa, 0xb, 0x8, 0x9 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x1c, 0x1d, 0x1e, 0x1f, 0x18, 0x19, 0x1a, 0x1b }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x12, 0x13, 0x10, 0x11 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe8, 0xffffffffffffffe9 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0xf8, 0xf9, 0xfa, 0xfb, 0xf4, 0xf5, 0xf6, 0xf7, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc7, 0xffffffc6, 0xffffffc5, 0xffffffc4 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3, 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VBIC/VBICQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf4, 0xf5, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x1, 0x2, 0x3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff90 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe1, 0xe2, 0xe3, 0xe0, 0xe1, 0xe2, 0xe3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe0, 0xffe1, 0xffe0, 0xffe1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffd0, 0xffffffd1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x8, 0x9, 0x8, 0x9, 0x8, 0x9, 0x8, 0x9 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0x11, 0x12, 0x13, 0x10, 0x11, 0x12, 0x13 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0x11, 0x10, 0x11 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0, 0xffffffffffffffe1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff4, 0xfff4, 0xfff4, 0xfff4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc0, 0xffffffc0, 0xffffffc0, 0xffffffc0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VCREATE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x9abcdef0, 0x12345678 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x123456789abcdef0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x9abcdef0, 0x12345678 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x123456789abcdef0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x9abcdef0, 0x12345678 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VLD2_LANE/VLD2Q_LANE chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xcc00, 0xcb80, 0xaaaa, 0xaaaa }; + +VLD2_LANE/VLD2Q_LANE chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + +VLD3_LANE/VLD3Q_LANE chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xcc00, 0xcb80 }; + +VLD3_LANE/VLD3Q_LANE chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xf1, 0xf2, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xf1, 0xf2, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcb00, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + +VLD3_LANE/VLD3Q_LANE chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xf1, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1600000, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + +VLD4_LANE/VLD4Q_LANE chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + +VLD4_LANE/VLD4Q_LANE chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + +VLD4_LANE/VLD4Q_LANE chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + +VLD4_LANE/VLD4Q_LANE chunk 3 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; + +VLD2_DUP/VLD2Q_DUP chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcc00, 0xcb80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VLD2_DUP/VLD2Q_DUP chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcc00, 0xcb80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VLD3_DUP/VLD3Q_DUP chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xcc00 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VLD3_DUP/VLD3Q_DUP chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff2, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff2, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff1, 0xfff2, 0xfff0, 0xfff1 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1800000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcb80, 0xcb00, 0xcc00, 0xcb80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VLD3_DUP/VLD3Q_DUP chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff0, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff0, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff0, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1600000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcb00, 0xcc00, 0xcb80, 0xcb00 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VLD4_DUP/VLD4Q_DUP chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VLD4_DUP/VLD4Q_DUP chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VLD4_DUP/VLD4Q_DUP chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VLD4_DUP/VLD4Q_DUP chunk 3 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff3 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff3 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMLA output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x2bf7, 0x2bf8 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x43ac, 0x43ad }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x43a14e76, 0x43a1ce76 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4830, 0x4831, 0x4832, 0x4833, 0x4834, 0x4835, 0x4836, 0x4837 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x470f, 0x4710, 0x4711, 0x4712 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a, 0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3620, 0x3621, 0x3622, 0x3623 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x45f0ae15, 0x45f0b615, 0x45f0be15, 0x45f0c615 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMLS output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xe054, 0xe055, 0xe056, 0xe057 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffd3e9, 0xffffd3ea }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffbc34, 0xffffbc35 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b14e76, 0xc3b0ce76 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xb7b0, 0xb7b1, 0xb7b2, 0xb7b3, 0xb7b4, 0xb7b5, 0xb7b6, 0xb7b7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffb8d1, 0xffffb8d2, 0xffffb8d3, 0xffffb8d4 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc, 0xc1dd, 0xc1de, 0xc1df, 0xc1e0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffc9c0, 0xffffc9c1, 0xffffc9c2, 0xffffc9c3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc5f1ae15, 0xc5f1a615, 0xc5f19e15, 0xc5f19615 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMUL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0x1, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfde0, 0xfe02, 0xfe24, 0xfe46 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffcd0, 0xfffffd03 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0x4, 0x48, 0x8c, 0xd0, 0x14, 0x58, 0x9c }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff9a0, 0xfffffa06 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc0, 0x84, 0x48, 0xc, 0xd0, 0x94, 0x58, 0x1c }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc4053333, 0xc3f9c000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x90, 0x7, 0x7e, 0xf5, 0x6c, 0xe3, 0x5a, 0xd1, 0x48, 0xbf, 0x36, 0xad, 0x24, 0x9b, 0x12, 0x89 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf780, 0xf808, 0xf890, 0xf918, 0xf9a0, 0xfa28, 0xfab0, 0xfb38 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffff670, 0xfffff709, 0xfffff7a2, 0xfffff83b }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x60, 0xa, 0xb4, 0x5e, 0x8, 0xb2, 0x5c, 0x6, 0xb0, 0x5a, 0x4, 0xae, 0x58, 0x2, 0xac, 0x56 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf450, 0xf50b, 0xf5c6, 0xf681, 0xf73c, 0xf7f7, 0xf8b2, 0xf96d }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffff340, 0xfffff40c, 0xfffff4d8, 0xfffff5a4 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0xca, 0x34, 0x9e, 0xc8, 0x62, 0x9c, 0x36, 0x30, 0x9a, 0x64, 0xce, 0x98, 0x32, 0xcc, 0x66 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4c73333, 0xc4bac000, 0xc4ae4ccd, 0xc4a1d999 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMUL_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffde0, 0xfffffe02 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffface0, 0xffffb212 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b66666, 0xc3ab0000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc, 0xffd0, 0xffd4, 0xffd8, 0xffdc }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffde0, 0xfffffe02, 0xfffffe24, 0xfffffe46 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c, 0xccd0, 0xd114, 0xd558, 0xd99c }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffface0, 0xffffb212, 0xffffb744, 0xffffbc76 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc3b66666, 0xc3ab0000, 0xc39f9999, 0xc3943333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMUL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfef0, 0xff01, 0xff12, 0xff23 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffde0, 0xfffffe02 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfcd0, 0xfd03, 0xfd36, 0xfd69 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffbc0, 0xfffffc04 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b26666, 0xc3a74000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf, 0xfc04, 0xfc59, 0xfcae, 0xfd03 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffff9a0, 0xfffffa06, 0xfffffa6c, 0xfffffad2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf890, 0xf907, 0xf97e, 0xf9f5, 0xfa6c, 0xfae3, 0xfb5a, 0xfbd1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffff780, 0xfffff808, 0xfffff890, 0xfffff918 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4b1cccd, 0xc4a6b000, 0xc49b9333, 0xc4907667 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMULL_N output: +VECT_VAR_DECL(expected,int,32,4) [] = { 0x11000, 0x11000, 0x11000, 0x11000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x22000, 0x22000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33000, 0x33000, 0x33000, 0x33000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x44000, 0x44000 }; + +VMLA_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4418c687, 0x44190687 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a, 0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a, 0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x441a3168, 0x441a7168, 0x441ab168, 0x441af168 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMLS_LANE output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffc1d9, 0xffffc1da }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffc1d9, 0xffffc1da }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc420c687, 0xc4208687 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc, 0xc1dd, 0xc1de, 0xc1df, 0xc1e0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffc1d9, 0xffffc1da, 0xffffc1db, 0xffffc1dc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc, 0xc1dd, 0xc1de, 0xc1df, 0xc1e0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffc1d9, 0xffffc1da, 0xffffc1db, 0xffffc1dc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4223168, 0xc421f168, 0xc421b168, 0xc4217168 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMLA_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x595, 0x596, 0x597, 0x598 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xb3a, 0xb3b }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x10df, 0x10e0, 0x10e1, 0x10e2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1684, 0x1685 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4497deb8, 0x4497feb8 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x1c29, 0x1c2a, 0x1c2b, 0x1c2c, 0x1c2d, 0x1c2e, 0x1c2f, 0x1c30 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x21ce, 0x21cf, 0x21d0, 0x21d1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2773, 0x2774, 0x2775, 0x2776, 0x2777, 0x2778, 0x2779, 0x277a }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x2d18, 0x2d19, 0x2d1a, 0x2d1b }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x4568087b, 0x4568187b, 0x4568287b, 0x4568387b }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMLS_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfa4b, 0xfa4c, 0xfa4d, 0xfa4e }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff4a6, 0xfffff4a7 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xef01, 0xef02, 0xef03, 0xef04 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffe95c, 0xffffe95d }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc49bdeb8, 0xc49bbeb8 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xe3b7, 0xe3b8, 0xe3b9, 0xe3ba, 0xe3bb, 0xe3bc, 0xe3bd, 0xe3be }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffde12, 0xffffde13, 0xffffde14, 0xffffde15 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xd86d, 0xd86e, 0xd86f, 0xd870, 0xd871, 0xd872, 0xd873, 0xd874 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffd2c8, 0xffffd2c9, 0xffffd2ca, 0xffffd2cb }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc56a087b, 0xc569f87b, 0xc569e87b, 0xc569d87b }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSLI_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe1, 0xffe2, 0xffe3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x6, 0x7 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x64fffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x50, 0x51, 0x52, 0x53, 0x50, 0x51, 0x52, 0x53 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x7bf0, 0x7bf1, 0x7bf2, 0x7bf3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3ffffff0, 0x3ffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x10 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x50, 0x51, 0x52, 0x53, 0x50, 0x51, 0x52, 0x53 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x7bf0, 0x7bf1, 0x7bf2, 0x7bf3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 0xff66, 0xff67 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfe2ffff0, 0xfe2ffff1, 0xfe2ffff2, 0xfe2ffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x18fff0, 0x18fff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3ff0, 0x3ff1, 0x3ff2, 0x3ff3, 0x3ff4, 0x3ff5, 0x3ff6, 0x3ff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1bfffff0, 0x1bfffff1, 0x1bfffff2, 0x1bfffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7ffffffffffff0, 0x7ffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3ff0, 0x3ff1, 0x3ff2, 0x3ff3, 0x3ff4, 0x3ff5, 0x3ff6, 0x3ff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSLI_Nmax shift amount output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x7ffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x7ffffff0, 0x7ffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x7ffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff5, 0x7ff6, 0x7ff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7ffffff0, 0x7ffffff1, 0x7ffffff2, 0x7ffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x7ffffffffffffff0, 0x7ffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSRI_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x80000001, 0x80000001 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffff00000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffc0, 0xffc0, 0xffc0, 0xffc0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xe000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xffc0, 0xffc0, 0xffc0, 0xffc0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffff000000000000, 0xffff000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffe00, 0xfffffe00, 0xfffffe00, 0xfffffe00 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffff800, 0xfffffffffffff800 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSRI_N max shift amount output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VTST/VTSTQ (signed input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0xffff, 0x0, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0xffff, 0x0, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0xffffffff, 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VTST/VTSTQ (unsigned input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0xffff, 0x0, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0xffff, 0x0, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0xffffffff, 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VADDHN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x32, 0x32, 0x32, 0x32 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x18, 0x18 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x37, 0x37, 0x37, 0x37 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3, 0x3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRADDHN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x19, 0x19 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x38, 0x38, 0x38, 0x38 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x4, 0x4 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VADDL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe7, 0xffe8, 0xffe9, 0xffea }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe2, 0xffffffe3, 0xffffffe4, 0xffffffe5 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0, 0xffffffffffffffe1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1e3, 0x1e4, 0x1e5, 0x1e6, 0x1e7, 0x1e8, 0x1e9, 0x1ea }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffe1, 0x1ffe2, 0x1ffe3, 0x1ffe4 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffffffe0, 0x1ffffffe1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VADDW output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe7, 0xffe8, 0xffe9, 0xffea }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe2, 0xffffffe3, 0xffffffe4, 0xffffffe5 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0, 0xffffffffffffffe1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe1, 0xffe2, 0xffe3, 0xffe4 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe0, 0xffffffe1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VHADD/VHADDQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf2, 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf2, 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7, 0xf7, 0xf8, 0xf8, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3, 0xfff3, 0xfff4, 0xfff4, 0xfff5 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff1, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7, 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2, 0xfff3, 0xfff3, 0xfff4, 0xfff4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff1, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRHADD/VRHADDQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7, 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff3, 0xfff3, 0xfff4, 0xfff4, 0xfff5, 0xfff5 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf6, 0xf6, 0xf7, 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc, 0xfc }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3, 0xfff3, 0xfff4, 0xfff4, 0xfff5 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VHSUB/VHSUBQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xfe, 0xff, 0xff, 0x0, 0x0, 0x1, 0x1, 0x2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfe, 0xff, 0xff, 0x0, 0x0, 0x1, 0x1, 0x2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0x0, 0x0, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xfe, 0xff, 0xff, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2, 0x3, 0x3, 0x4, 0x4, 0x5, 0x5 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffe, 0xffff, 0xffff, 0x0, 0x0, 0x1, 0x1, 0x2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0x0, 0x0, 0x1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff, 0xff, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2, 0x3 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0x0, 0x0, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSUBL output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0, 0x1, 0x2, 0x3, 0x4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xffffffff, 0x0, 0x1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0, 0x1, 0x2, 0x3, 0x4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0x0, 0x1, 0x2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSUBW output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0, 0x1, 0x2, 0x3, 0x4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xffffffff, 0x0, 0x1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfefd, 0xfefe, 0xfeff, 0xff00, 0xff01, 0xff02, 0xff03, 0xff04 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffeffff, 0xffff0000, 0xffff0001, 0xffff0002 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffff00000000, 0xffffffff00000001 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSUBHN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x31, 0x31, 0x31, 0x31 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x17, 0x17 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x36, 0x36, 0x36, 0x36 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSUBHN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x31, 0x31, 0x31, 0x31 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x17, 0x17 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x36, 0x36, 0x36, 0x36 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VMVN/VMVNQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf, 0xe, 0xd, 0xc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf, 0xe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xf, 0xe, 0xd, 0xc }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xf, 0xe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf, 0xe, 0xd, 0xc }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf, 0xe, 0xd, 0xc }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; + +VQMOVN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1278, 0x1278, 0x1278, 0x1278 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x12345678, 0x12345678 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8765, 0x8765, 0x8765, 0x8765 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x87654321, 0x87654321 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; + +VQMOVN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0; + +VQMOVUN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x5678, 0x5678, 0x5678, 0x5678 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x12345678, 0x12345678 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1; + +VQMOVUN (negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHR_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3c, 0x3c, 0x3d, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x8000000, 0x8000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x80000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff, 0xff, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffc, 0xfffffffc, 0xfffffffd, 0xfffffffd }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3c, 0x3c, 0x3d, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3e, 0x3e, 0x3f, 0x3f, 0x3f, 0x3f, 0x40, 0x40 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1ffe, 0x1ffe, 0x1ffe, 0x1ffe, 0x1fff, 0x1fff, 0x1fff, 0x1fff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x8000000, 0x8000000, 0x8000000, 0x8000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHR_N (overflow test: max shift amount, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHR_N (overflow test: shift by 1, with negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000000000000000, 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHR_N (overflow test: shift by 3, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10000000, 0x10000000, 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x1000000000000000, 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x20000000, 0x20000000, 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000000000000000, 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHR_N (overflow test: shift by 1, with negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xc000, 0xc000, 0xc000, 0xc000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xc0000000, 0xc0000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xc000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xc000, 0xc000, 0xc000, 0xc000, 0xc000, 0xc000, 0xc000, 0xc000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xc0000000, 0xc0000000, 0xc0000000, 0xc0000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xc000000000000000, 0xc000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSHR_N (overflow test: shift by 3, with negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf000, 0xf000, 0xf000, 0xf000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf0000000, 0xf0000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xf000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf000, 0xf000, 0xf000, 0xf000, 0xf000, 0xf000, 0xf000, 0xf000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf0000000, 0xf0000000, 0xf0000000, 0xf0000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xf000000000000000, 0xf000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x20000000, 0x20000000, 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000000000000000, 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSRA_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffd, 0xfffffffe }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffd, 0xfffe, 0xffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffd, 0xfffffffe, 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0, 0x1, 0x2, 0x3, 0x4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff4, 0xfffffff5, 0xfffffff6, 0xfffffff7 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0xfffffffffffffff1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSRA_N (checking overflow: shift by 1, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000000000000000, 0x4000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x8000000000000000, 0x8000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSRA_N (checking overflow: shift by 3, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x10000000, 0x10000000, 0x10000000, 0x10000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x1000000000000000, 0x1000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x20000000, 0x20000000, 0x20000000, 0x20000000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000000000000000, 0x2000000000000000 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSRA_N (checking overflow: shift by max, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSRA_N (checking overflow: shift by 1, negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xc000, 0xc000, 0xc000, 0xc000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xc0000000, 0xc0000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xc000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xc000, 0xc000, 0xc000, 0xc000, 0xc000, 0xc000, 0xc000, 0xc000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xc0000000, 0xc0000000, 0xc0000000, 0xc0000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xc000000000000000, 0xc000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSRA_N (checking overflow: shift by max, negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xf000, 0xf000, 0xf000, 0xf000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xf0000000, 0xf0000000 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xf000000000000000 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xf000, 0xf000, 0xf000, 0xf000, 0xf000, 0xf000, 0xf000, 0xf000 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf0000000, 0xf0000000, 0xf0000000, 0xf0000000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xf000000000000000, 0xf000000000000000 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VRSRA_N (checking overflow: shift by max, negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VSHLL_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6, 0xffe8, 0xffea, 0xffec, 0xffee }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2, 0xffffffe4, 0xffffffe6 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffff80, 0xffffffffffffff88 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3c0, 0x3c4, 0x3c8, 0x3cc, 0x3d0, 0x3d4, 0x3d8, 0x3dc }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff00, 0xfff10, 0xfff20, 0xfff30 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7ffffff80, 0x7ffffff88 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VPADDL/VPADDLQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe1, 0xffe5, 0xffe9, 0xffed }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe1, 0xffffffe5 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffe1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1e1, 0x1e5, 0x1e9, 0x1ed }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffe1, 0x1ffe5 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffe1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe1, 0xffe5, 0xffe9, 0xffed, 0xfff1, 0xfff5, 0xfff9, 0xfffd }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe1, 0xffffffe5, 0xffffffe9, 0xffffffed }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe1, 0xffffffffffffffe5 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1e1, 0x1e5, 0x1e9, 0x1ed, 0x1f1, 0x1f5, 0x1f9, 0x1fd }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffe1, 0x1ffe5, 0x1ffe9, 0x1ffed }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffffffe1, 0x1ffffffe5 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VPADD output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xe1, 0xe5, 0xe9, 0xed, 0xe1, 0xe5, 0xe9, 0xed }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe1, 0xffe5, 0xffe1, 0xffe5 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe1, 0xffffffe1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe1, 0xe5, 0xe9, 0xed, 0xe1, 0xe5, 0xe9, 0xed }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe1, 0xffe5, 0xffe1, 0xffe5 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe1, 0xffffffe1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1f80000, 0xc1f80000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VPADAL/VPADALQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffd1, 0xffd6, 0xffdb, 0xffe0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffd1, 0xffffffd6 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffd1 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1d1, 0x1d6, 0x1db, 0x1e0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffd1, 0x1ffd6 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffd1 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffd1, 0xffd6, 0xffdb, 0xffe0, 0xffe5, 0xffea, 0xffef, 0xfff4 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffd1, 0xffffffd6, 0xffffffdb, 0xffffffe0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffd1, 0xffffffffffffffd6 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1d1, 0x1d6, 0x1db, 0x1e0, 0x1e5, 0x1ea, 0x1ef, 0x1f4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffd1, 0x1ffd6, 0x1ffdb, 0x1ffe0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffffffd1, 0x1ffffffd6 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQSHLU_N/VQSHLUQ_N (negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffffe }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffffe, 0xfffffffe, 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffffe, 0xfffffffffffffffe }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 1; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 1; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff, 0xffffffffffffffff }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,8,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,1) = 0; +int VECT_VAR(expected_cumulative_sat,int,8,16) = 0; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQSHLU_N/VQSHLUQ_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8, 0x8, 0x8, 0x8 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x18 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x40 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x180, 0x180, 0x180, 0x180, 0x180, 0x180, 0x180, 0x180 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x380, 0x380, 0x380, 0x380 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x800, 0x800 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VCLZ/VCLZQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3, 0x3, 0x3, 0x3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x11, 0x11 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x5, 0x5 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3, 0x3, 0x3, 0x3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1f, 0x1f, 0x1f, 0x1f }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VCLZ/VCLZQ (input=0) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x20 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x20, 0x20, 0x20, 0x20 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VCLS/VCLSQ (positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x19, 0x19 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x14, 0x14, 0x14, 0x14 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VCLS/VCLSQ (negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x1, 0x1, 0x1, 0x1 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x1, 0x1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VCNT/VCNTQ output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHRN_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, 0xfb }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1; + +VQSHRN_N (check saturation: shift by 3) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; +int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0; + +VQSHRN_N (check saturation: shift by max) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VPMAX output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, 0xf1, 0xf3, 0xf5, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff3, 0xfff1, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, 0xf1, 0xf3, 0xf5, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff3, 0xfff1, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VPMIN output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, 0xf0, 0xf2, 0xf4, 0xf6 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff2, 0xfff0, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, 0xf0, 0xf2, 0xf4, 0xf6 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff2, 0xfff0, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQSHRUN_N (negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQSHRUN_N (check cumulative saturation) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQSHRUN_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x48 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbe, 0xdeadbe }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQRSHRUN_N (negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQRSHRUN_N (check cumulative saturation: shift by 1) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 1; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 1; + +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +int VECT_VAR(expected_cumulative_sat,int,16,8) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,4) = 1; +int VECT_VAR(expected_cumulative_sat,int,64,2) = 0; + +VQRSHRUN_N output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbf, 0xdeadbf }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VST2_LANE/VST2Q_LANE chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VST2_LANE/VST2Q_LANE chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VST3_LANE/VST3Q_LANE chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VST3_LANE/VST3Q_LANE chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VST3_LANE/VST3Q_LANE chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VST4_LANE/VST4Q_LANE chunk 0 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0x0, 0x0, 0x0, 0x0 }; + +VST4_LANE/VST4Q_LANE chunk 1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VST4_LANE/VST4Q_LANE chunk 2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VST4_LANE/VST4Q_LANE chunk 3 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VTBL1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0xf2, 0xf2, 0xf2, 0x0, 0x0, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0xf3, 0xf3, 0xf3, 0x0, 0x0, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x0, 0xf3, 0xf3, 0xf3, 0x0, 0x0, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VTBL2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf3, 0xf3, 0xf3, 0x0, 0x0, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, 0x0, 0x0, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, 0x0, 0x0, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VTBL3 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf4, 0xf4, 0xf4, 0xff, 0x0, 0xf4, 0xf4 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, 0xff, 0x0, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, 0xff, 0x0, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VTBL4 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xfa, 0xf5, 0xf5, 0xf5, 0x3, 0x0, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, 0x3, 0x0, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, 0x3, 0x0, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VTBX1 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0xf2, 0xf2, 0xf2, 0x33, 0x33, 0xf2, 0xf2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xcc, 0xf3, 0xf3, 0xf3, 0xcc, 0xcc, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xcc, 0xf3, 0xf3, 0xf3, 0xcc, 0xcc, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VTBX2 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf3, 0xf3, 0xf3, 0x33, 0x33, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, 0xcc, 0xcc, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, 0xcc, 0xcc, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VTBX3 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf4, 0xf4, 0xf4, 0xff, 0x33, 0xf4, 0xf4 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, 0xff, 0xcc, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, 0xff, 0xcc, 0xf7, 0xf7 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +VTBX4 output: +VECT_VAR_DECL(expected,int,8,8) [] = { 0xfa, 0xf5, 0xf5, 0xf5, 0x3, 0x33, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, 0x3, 0xcc, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, 0x3, 0xcc, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x3f068000, 0x3f068000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3c030000, 0x3c030000, 0x3c030000, 0x3c030000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xee800000, 0xee800000, 0xee800000, 0xee800000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xbdcc8000, 0xbdcc8000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xbc030000, 0xbc030000, 0xbc030000, 0xbc030000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x7f800000, 0x7f800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x80000000, 0x80000000 }; + +VRSQRTE/VRSQRTEQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x9c800000, 0x9c800000, 0x9c800000, 0x9c800000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x3e498000, 0x3e498000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3e700000, 0x3e700000, 0x3e700000, 0x3e700000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xae800000, 0xae800000, 0xae800000, 0xae800000 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xb4800000, 0xb4800000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xed000000, 0xed000000, 0xed000000, 0xed000000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; + +VCAGE/VCAGEQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x0 }; + +VCAGE/VCAGEQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +VCALE/VCALEQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; + +VCALE/VCALEQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VCAGT/VCAGTQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 }; + +VCAGT/VCAGTQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +VCALT/VCALTQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; + +VCALT/VCALTQ output: +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +VCVT/VCVTQ output: +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4f800000, 0x4f800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x41720000, 0xc1720000, 0x0, 0x80000000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0x5 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x5 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x0, 0x8000, 0x4b8f, 0xcb8f }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0xf, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xf, 0x0 }; + +VCVT_N/VCVTQ_N output: +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc0800000, 0xc0700000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4c000000, 0x4c000000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xb2800000, 0xb2700000, 0xb2600000, 0xb2500000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x49800000, 0x49800000, 0x49800000, 0x49800000 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xff0b3333, 0x54cccd }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x15 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x1e3d7, 0xfffe1c29 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x1e, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xa, 0xa }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xa, 0xa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7d, 0x7d, 0x7d, 0x7d }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x7d, 0x7d, 0x7d, 0x7d }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xa66666, 0xa66666 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xa66666, 0xa66666 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfbccc, 0xfbccc, 0xfbccc, 0xfbccc }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfbccc, 0xfbccc, 0xfbccc, 0xfbccc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x38640000, 0x7f800000, 0x7fc00000, 0xff800000 }; +VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0x7e00, 0x7c00, 0xfc00, 0x8000 }; + +VRECPS/VRECPSQ output: +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2e19eb7, 0xc2e19eb7 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1db851f, 0xc1db851f, 0xc1db851f, 0xc1db851f }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x40000000, 0x40000000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; + +VRSQRTS/VRSQRTSQ output: +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2796b84, 0xc2796b84 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc0e4a3d8, 0xc0e4a3d8, 0xc0e4a3d8, 0xc0e4a3d8 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x3fc00000, 0x3fc00000 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000 }; diff --git a/license.html b/license.html new file mode 100644 index 0000000..ad41df4 --- /dev/null +++ b/license.html @@ -0,0 +1,456 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
+<title>RVDS 3.1 EULA - LEC-ELA-00069</title>
+</head>
+<body>
+<div align="center"><b>
+END USER LICENCE AGREEMENT FOR THE ARM REALVIEW DEVELOPMENT SUITE
+</b><br></div><br>
+THIS END USER LICENCE AGREEMENT ("<b>LICENCE</b>") IS A LEGAL AGREEMENT
+BETWEEN YOU (EITHER A SINGLE INDIVIDUAL, OR SINGLE LEGAL ENTITY) AND ARM
+LIMITED ("<b>ARM</b>") FOR THE USE OF THE SOFTWARE ACCOMPANYING THIS
+LICENCE. ARM IS ONLY WILLING TO LICENSE THE SOFTWARE TO YOU ON CONDITION
+THAT YOU ACCEPT ALL OF THE TERMS IN THIS LICENCE. BY CLICKING "<b>I
+AGREE</b>" OR BY INSTALLING OR OTHERWISE USING OR COPYING THE SOFTWARE
+YOU INDICATE THAT YOU AGREE TO BE BOUND BY ALL THE TERMS OF THIS
+LICENCE. IF YOU DO NOT AGREE TO THE TERMS OF THIS LICENCE, ARM IS
+UNWILLING TO LICENSE THE SOFTWARE TO YOU AND YOU MAY NOT INSTALL, USE OR
+COPY THE SOFTWARE, BUT YOU SHOULD PROMPTLY RETURN THE SOFTWARE TO YOUR
+SUPPLIER AND ASK FOR A REFUND OF ANY LICENCE FEE PAID.
+<br><br>
+"<b>Software</b>" means any software, firmware and data accompanying
+this Licence, any printed, electronic or online documentation supplied
+with it, and any updates, patches and modifications ARM may agree to
+make available to you under the terms of this Licence, in all cases
+relating to the RealView Development Suite package of software
+development tools. The package includes; (i) RealView Compilation Tools,
+including compiler, assembler, linker, libraries and example code; (ii)
+RealView Debugger; (iii) instruction set level models of ARM
+microprocessors; (iv) ARM plug-ins for the Eclipse Integrated
+Development Environment (IDE); and (v) if you have obtained the
+CodeWarrior version of the Suite, then your package also includes the
+Metrowerks CodeWarrior IDE. For convenience, your package also includes
+a version of the Eclipse IDE, and other Separate Files (defined below in
+Clause 7), but these are licensed under their own terms and not as part
+of the Software.
+<br><br>
+"<b>Seat</b>" means a written or electronic authorisation from ARM
+to run any Software component as a single task or process on one (1)
+processing unit (such as a processor core, hardware partition, blade,
+virtual machine or emulator) only, by itself or concurrently with other
+Seats, for the time period specified in that authorization.
+<br><br>
+"<b>Licence Key</b>" means an electronic licence key issued to you by
+ARM to enable the use of the Software.<br>
+<br><br>
+<b>1. LICENCE GRANTS.</b><br>
+(i) SOFTWARE: ARM hereby grants to you, subject to the terms and
+conditions of this Licence, a non-exclusive, non-transferable licence,
+only for the Seats, to use the Software, solely for the purposes of your
+internal development, testing and debugging of software applications
+(referred to in this Licence as "Software Applications"). You shall not
+modify or redistribute any of the Software except as may be permitted by
+clauses 1(ii)-1(iv) below.
+<br><br>
+(ii) LIBRARIES: In respect of any Language Libraries and Helper
+Libraries (together "Target Libraries") identified in the Schedule, ARM
+hereby grants you, provided you comply with the terms of clause 1(iv),
+additional non-exclusive, non-transferable licences to; (a) incorporate
+and link the Language Libraries into or with Software Applications,
+provided that Software Applications contain substantial additional
+functionality; (b) incorporate and link the Helper Libraries into, or
+use them in, Software Applications; and (c) reproduce and distribute the
+Target Libraries, only in object code form, and only as part of Software
+Applications.
+<br><br>
+(iii) EXAMPLE CODE: In respect of any Example Code identified in the
+Schedule, ARM hereby grants you, provided you comply with the terms of
+clause 1(iv), additional non-exclusive, non-transferable licences to;
+(a) use copy and modify the Example Code, only for internal testing,
+evaluation, and development and testing of Software Applications; (b)
+incorporate, compile and link the Example Code and any modifications
+into Software Applications (except for Dhrystone which may only be used
+for internal testing and evaluation); (c) reproduce and distribute the
+Example Code identified in the Schedule as "Limited Use", and any
+modifications, only in object code form (except for Dhrystone which
+shall not be reproduced except as necessary to exercise the above
+internal use licence, nor redistributed); and (d) reproduce and
+distribute the Example Code identified in the Schedule as "Extended
+Use", and any modifications, in source or object code form and only as
+part of Software Applications.
+<br><br>
+(iv) FURTHER CONDITIONS ON REDISTRIBUTION: If you are authorised and
+choose to redistribute the whole or any part of the Example Code or
+Target Libraries, you agree to; (a) ensure that they are licensed for
+use only as part of Software Applications and only on microprocessors
+manufactured or simulated under licence from ARM; (b) not use ARM's
+name, logo or trademarks to market Software Applications; (c) include
+valid copyright notices on Software Applications, and preserve any
+copyright notices which are included with, or in, the Example Code and
+Target Libraries; (d) comply with all the other terms of this Licence;
+and (e) not permit further redistribution of the Example Code or Target
+Libraries by your customers except where your distributors redistribute
+them to your end-user customers only as part of Software Applications
+and your distributors otherwise comply with the terms of this Licence.
+<br><br>
+<b>2. RESTRICTIONS ON USE OF THE SOFTWARE.</b><br>
+SEATS, INSTALLATION AND LICENCE KEYS: Your use of the Software is
+limited to the specific number of Seats issued to you by ARM. For each
+Seat, ARM will make a Licence Key available to you to enable use of the
+Software, or certain components or optional functionality in the
+Software, as applicable. You shall only install and use the Software and
+Licence Keys on computers, or virtual machines running on computers,
+which are owned by you (or which are in your exclusive possession under
+an equipment finance arrangement) and which are either; (i) located at
+your premises; or (ii) portable computers which shall remain in the
+possession and control of your employees when outside such premises.
+Each Seat and Licence Key shall be limited or locked to a single item of
+your personal computer hardware (also known as host ID) on which the
+Seat or Licence Key is initially installed, except where ARM may
+separately agree to authorise the transfer (also known as re-hosting) of
+such Seat or Licence Key to another host ID.
+<br><br>
+COPYING: You shall not use or copy the Software or Licence Keys except
+as expressly authorised in this Licence. You may make one additional
+copy of the delivered Software media or image for backup or archival
+purposes.
+<br><br>
+UPDATES AND UPGRADES: Upgrades and updates replace the earlier releases
+of Software that you are upgrading or updating, and are not additional
+copies. When you upgrade or update the Software, the Licence Key for
+such new release replaces your existing Licence Key. Therefore, if you
+are receiving the Software as an update or upgrade, you obtain no rights
+to, and shall not, install or use this update or upgrade unless you have
+first ceased all use of, and deleted, your Licence Key for the version
+of the Software that you are updating or upgrading.
+<br><br>
+PERMITTED USERS: The Software and Licence Keys shall be used only by
+your employees,or by your bona fide sub-contractors for whose acts and
+omissions you hereby agree to be responsible to ARM to the same extent
+as those of your employees, and provided always that such
+sub-contractors; (i) work only onsite at your premises; (ii) are aware
+of the terms of this Licence; (iii) are contractually obligated to use
+the same only for your benefit, and (iv) agree to assign all their work
+product and any rights they create therein in the supply of such work to
+you. Only the single individual, company or other legal entity to whom
+ARM is supplying this Licence may use the Software and Licence Keys. You
+shall not allow third parties (including but not limited to any
+subsidiary, parent or affiliated companies, or offsite contractors you
+may have) to use the Software or Licence Keys unless ARM specifically
+agrees otherwise with you on a case by case basis.
+<br><br>
+REMOTE USE: The Software and Licence Keys shall only be used onsite at
+your premises, except when used offsite by an employee (but not a
+sub-contractor) of yours, on a portable computer, in accordance with the
+"SEATS, INSTALLATION AND LICENCE KEYS" paragraph above, as part of his
+or her normal work activities, and only for your benefit.
+<br><br>
+MULTIPLE VERSIONS: The media on which the Software resides may contain
+more than one version of the Software, each of which is compatible with
+a different operating system (such as Microsoft Windows XP Professional
+and Red Hat Linux). Each version may only be installed and used by you
+to the extent that you have valid Seats and Licence Keys from ARM.
+<br><br>
+ACADEMIC OR EDUCATIONAL USE RESTRICTION: If you or your employer or
+institution paid academic or educational pricing for the Software, or
+the Software is identified as an academic or educational version
+(collectively "Academic Software"), then notwithstanding anything else
+in this Licence, YOU AGREE TO USE THE ACADEMIC SOFTWARE ONLY FOR
+ACADEMIC, NON-COMMERCIAL PURPOSES, AND ARM DOES NOT GRANT YOU ANY RIGHTS
+TO DISTRIBUTE OR SUB-LICENCE SOFTWARE APPLICATIONS UNDER THIS LICENCE.
+<br><br>
+REVERSE ENGINEERING: Except to the extent that such activity is
+permitted by applicable law you shall not reverse engineer, decompile or
+disassemble any of the Software. If the Software was provided to you in
+Europe you shall not reverse engineer, decompile or disassemble any of
+the Software for the purposes of error correction.
+<br><br>
+BENCHMARKING: This licence does not prevent you from using the Software
+for internal benchmarking purposes. However, you shall treat any and all
+benchmarking data relating to the Software, and any other results of
+your use or testing of the Software which are indicative of its
+performance, efficacy, reliability or quality, as confidential
+information and you shall not disclose such information to any third
+party without the express written permission of ARM. ARM may agree to
+waive the restrictions in this paragraph if requested in writing.
+<br><br>
+RESTRICTIONS ON TRANSFER OF LICENSED RIGHTS: The rights granted to you
+under this agreement may not be assigned, sublicensed or otherwise
+transferred by you to any third party without the prior written consent
+of ARM. You shall not rent or lease the Software, or share it with
+contractors or third parties.
+<br><br>
+COPYRIGHT AND RESERVATION OF RIGHTS: The Software is owned by ARM or its
+licensors and is protected by copyright and other intellectual property
+laws and international treaties. The Software is licensed not sold. You
+acquire no rights to the Software other than as expressly provided by
+this Licence. You shall not remove from the Software any copyright
+notice or other notice and shall ensure that any such notice is
+reproduced in any copies of the whole or any part of the Software made
+by you.
+<br><br>
+<b>3. SUPPORT.</b><br>
+If you purchased the Software directly from ARM, and you are not
+receiving it as Academic Software (defined in clause 2), you are
+entitled to reasonable support for the Software, for the period of one
+(1) year, commencing from the day of purchase. The support will be
+provided by telephone, email or other written format designated by ARM,
+prioritised at ARM's discretion, and may not be used as a substitute for
+training or as additional resource for your programming projects. If you
+obtained the Software from an ARM authorised reseller or other third
+party, or no charge was made by ARM for the Software, update or upgrade,
+or it is Academic Software, then, except as provided as a remedy to any
+claim under the limited warranties provided in clause 5, you are not
+entitled to any support for the Software from ARM under this Licence.
+The vendor of the Software may or may not offer support to you for the
+Software. Please refer to the Technical Support area of
+http://www.arm.com for contact details for ARM's support service and (if
+applicable) other authorised support channels.
+<br><br>
+<b>4. CONFIDENTIALITY.</b><br>
+You acknowledge that the Software, Licence Keys, and any benchmarking
+data and related information mentioned in Clause 2 contain trade secrets
+and confidential material, and you agree to maintain them in confidence
+and apply security measures no less stringent than the measures which
+you apply to protect your own like information, but not less than a
+reasonable degree of care, to prevent their unauthorised disclosure and
+use. Subject to any restrictions imposed by applicable law, the period
+of confidentiality shall be indefinite. You agree that you shall not use
+any such information other than in normal use of the Software under the
+licences granted in this Licence.
+<br><br>
+<b>5. LIMITED WARRANTIES.</b><br>
+For the period of ninety (90) days from the date of receipt by you of
+the Software, ARM warrants to you that (i) the media on which the
+Software is provided shall be free from defects in materials and
+workmanship under normal use; and (ii) the Software will perform
+substantially in accordance with its accompanying documentation (if
+any). ARM's total liability and your exclusive remedy for breach of
+these limited warranties shall be limited to ARM, at ARM's option; (a)
+refunding the price paid by you for the Software; or (b) replacing the
+defective Software; or (c) using reasonable efforts to correct material,
+documented, reproducible defects in the Software and delivering such
+corrected Software to you. Any replacement Software will be warranted
+for the remainder of the original warranty period or thirty (30) days,
+whichever is the longer.
+<br><br>
+EXCEPT AS MAY BE PROVIDED ABOVE, YOU AGREE THAT THE SOFTWARE IS LICENSED
+"AS IS", AND THAT ARM EXPRESSLY DISCLAIMS ALL REPRESENTATIONS,
+WARRANTIES, CONDITIONS OR OTHER TERMS, EXPRESS OR IMPLIED, INCLUDING
+WITHOUT LIMITATION THE IMPLIED WARRANTIES OF NON-INFRINGEMENT,
+SATISFACTORY QUALITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+<br><br>
+YOU EXPRESSLY ASSUME ALL LIABILITIES AND RISKS, FOR USE OR OPERATION OF
+SOFTWARE APPLICATIONS, INCLUDING WITHOUT LIMITATION, APPLICATIONS
+DESIGNED OR INTENDED FOR MISSION CRITICAL APPLICATIONS, SUCH AS
+PACEMAKERS, WEAPONARY, AIRCRAFT NAVIGATION, FACTORY CONTROL SYSTEMS,
+ETC. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE ENTIRE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+<br><br>
+<b>6. LIMITATION OF LIABILITY.</b><br>
+TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL ARM
+BE LIABLE FOR ANY INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
+(INCLUDING LOSS OF PROFITS) ARISING OUT OF THE USE OR INABILITY TO USE
+THE SOFTWARE WHETHER BASED ON A CLAIM UNDER CONTRACT, TORT OR OTHER
+LEGAL THEORY, EVEN IF ARM WAS ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+<br><br>
+ARM does not seek to limit or exclude liability for death or personal
+injury arising from ARM's negligence and because some jurisdictions do
+not permit the exclusion or limitation of liability for consequential or
+incidental damages the above limitation relating to liability for
+consequential damages may not apply to you.
+<br><br>
+NOTWITHSTANDING ANYTHING TO THE CONTRARY CONTAINED IN THIS LICENCE, THE
+MAXIMUM LIABILITY OF ARM TO YOU IN AGGREGATE FOR ALL CLAIMS MADE AGAINST
+ARM IN CONTRACT TORT OR OTHERWISE UNDER OR IN CONNECTION WITH THE
+SUBJECT MATTER OF THIS LICENCE SHALL NOT EXCEED THE TOTAL OF SUMS PAID
+BY YOU TO ARM (IF ANY) FOR THIS LICENCE.
+<br><br>
+<b>7. THIRD PARTY RIGHTS.</b><br>
+The Software is supplied with, and uses, third party software or
+materials which are governed by their own separate licence agreements
+(together "Separate Files"). This Licence does not apply to such
+Separate Files and they are not included in the term "Software" under
+this Licence. You agree to comply with all terms and conditions imposed
+on you in respect of such Separate Files including those identified in
+the Schedule ("Third Party Terms"). Any provisions in this Licence
+which differ from Third Party Terms are offered by ARM alone and shall
+not supersede or modify any Third Party Terms. In addition to Third
+Party Terms, the disclaimer of warranty and limitation of liability
+provisions in this Licence shall apply to all Software and Separate
+Files.
+<br><br>
+ARM HEREBY DISCLAIMS ANY AND ALL WARRANTIES EXPRESS OR IMPLIED FROM ANY
+THIRD PARTIES REGARDING ANY SEPARATE FILES, ANY THIRD PARTY MATERIALS
+INCLUDED IN THE SOFTWARE, ANY THIRD PARTY MATERIALS FROM WHICH THE
+SOFTWARE IS DERIVED (COLLECTIVELY "OTHER CODE"), AND THE USE OF ANY OR
+ALL THE OTHER CODE IN CONNECTION WITH THE SOFTWARE, INCLUDING (WITHOUT
+LIMITATION) ANY WARRANTIES OF SATISFACTORY QUALITY OR FITNESS FOR A
+PARTICULAR PURPOSE.
+<br><br>
+NO THIRD PARTY LICENSORS OF OTHER CODE SHALL HAVE ANY LIABILITY FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND
+WHETHER MADE UNDER CONTRACT, TORT OR OTHER LEGAL THEORY, ARISING IN ANY
+WAY OUT OF THE USE OR DISTRIBUTION OF THE OTHER CODE OR THE EXERCISE OF
+ANY RIGHTS GRANTED UNDER EITHER OR BOTH THIS LICENCE AND THE LEGAL TERMS
+APPLICABLE TO ANY SEPARATE FILES, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+<br><br>
+Distribution of code from the Rogue Wave C++ Language Libraries is
+permitted only if: (a) you are granted the right to redistribute those
+Libraries under Clauses 1 and 2 of this Licence; and (b) they do not
+constitute a major portion of the value of the Software Applications. No
+right is granted to use those libraries on any computers as are marketed
+or commonly considered to be mainframe computers in the computer
+industry. Notwithstanding any provisions in this Licence to the
+contrary, you shall not: (a) distribute in any manner any of the header
+files, object modules, template-based classes, or independent static
+libraries of those libraries or documentation relating thereto; (b)
+distribute any portion of the same in a software utility product or
+otherwise in competition with Rogue Wave Software Inc's distribution
+thereof; nor (c) rent or lease the Software. "Rogue Wave" is a
+registered trademark of Rogue Wave Software, Inc.
+<br><br>
+<b>8. U.S. GOVERNMENT END USERS.</b><br>
+US Government Restrictions: Use, duplication, reproduction, release,
+modification, disclosure or transfer of this commercial product and
+accompanying documentation is restricted in accordance with the terms of
+this Licence.
+<br><br>
+<b>9. TERM AND TERMINATION.</b><br>
+This Licence shall remain in force until terminated by you, by ARM or by
+expiry (in the case of a time limited licence). Without prejudice to any
+of its other rights if you are in breach of any of the terms and
+conditions of this Licence then ARM may terminate this Licence
+immediately upon giving written notice to you. You may terminate this
+Licence at any time. Upon termination of this Licence by you or by ARM
+you shall stop using the Software and destroy all copies of the Software
+in your possession together with all documentation and related
+materials. The provisions of clauses 4, 6, 7, 8, 9 and 10 shall survive
+termination of this Licence.
+<br><br>
+<b>10. GENERAL.</b><br>
+This Licence is governed by English Law. Except where ARM agrees
+otherwise in a written contract signed by you and ARM, this is the only
+agreement between you and ARM relating to the Software and it may only
+be modified by written agreement between you and ARM. Except as
+expressly agreed in writing, this Licence may not be modified by
+purchase orders, advertising or other representation by any person. If
+any clause or sentence in this Licence is held by a court of law to be
+illegal or unenforceable the remaining provisions of this Licence shall
+not be affected thereby. The failure by ARM to enforce any of the
+provisions of this Licence, unless waived in writing, shall not
+constitute a waiver of ARM's rights to enforce such provision or any
+other provision of this Licence in the future.
+<br><br>
+At ARM's request, you agree to check your computers for installations of
+the Software, copies of Licence Keys, contents of any licence server log
+files, individual or concurrent usage of Seats, and any other
+information requested by ARM relating to Software installation, usage
+and Licence Key management, and to provide this information to ARM. ARM
+will not make such a request more frequently than once per year, except
+as necessary where ARM believes, in good faith, that an additional check
+is needed (for example after re-hosting a Licence Key). You agree that
+auditors nominated by ARM may also perform such checking and reporting
+on behalf of ARM by prior appointment during your normal business hours
+on seven (7) days' notice. ARM shall bear the auditors' costs for that
+audit unless it reveals unlicensed usage in which case you shall
+promptly reimburse ARM for all reasonable costs and expenses, including
+professional fees, relating to such audit. Any information which is
+disclosed to ARM or such auditors during checking or audit shall be
+treated as your confidential information and shall only be used by ARM
+for licence management, compliance and enforcement purposes.
+<br><br>
+The Software provided under this Licence is subject to U.S. export
+control laws, including the U.S. Export Administration Act and its
+associated regulations, and may be subject to export or import
+regulations in other countries. You agree to comply fully with all laws
+and regulations of the United States and other countries ("Export Laws")
+to assure that the Software, is not (1) exported, directly or
+indirectly, in violation of Export Laws, either to any countries that
+are subject to U.S.A. export restrictions or to any end user who has
+been prohibited from participating in the U.S.A. export transactions by
+any federal agency of the U.S.A. government; or (2) intended to be used
+for any purpose prohibited by Export Laws, including, without
+limitation, nuclear, chemical, or biological weapons proliferation.
+<br><br> <div align="center"><b>
+THE SCHEDULE
+</b><br></div><b>
+TARGET LIBRARIES (OBJECT CODE):
+</b><br>
+<i>Language Libraries:</i> C Libraries; C++ runtime support library;
+Rogue Wave C++ Libraries <br>
+<i>Helper Libraries:</i> ARM Compiler Helper Libraries found in your
+installation at:
+<br><tt>"<install_dir>/RVCT/Data/3.1/<buildno>/lib/armlib/h_*.*"</tt><br>
+<br><b>
+EXAMPLE CODE - LIMITED USE:
+</b><br>
+Dhrystone; MMUgen; FFT example code
+<br><br><b>
+EXAMPLE CODE - EXTENDED USE:
+</b><br>
+Example Flash Download Algorithms found in your installation at:
+<br><tt>"<install_dir>/RVD/Flash/3.1/<buildno>/<OS>/<platform>/*.*"</tt><br>
+Any and all other example code identified as such and included with the
+Software
+<br><br><b>
+SEPARATE FILES:
+</b><br>
+A. GNU standard C and C++ language libraries (only installed on Linux
+host computers):
+<br>
+This package includes GNU libgcc_s and libstdc++ libraries in binary
+form, licensed to you under the GNU General Public License, version 2.
+<br><br>
+B. zthread library (only installed on Windows host computers):
+<br>
+This package includes zthread shared library in source and binary form,
+licensed to you under the GNU Lesser General Public License, version
+2.1.
+<br><br>
+C. Portions of the ARM installer:
+<br>
+Portions of the installer use the JAXB XML binding library and Java 2
+Platform Standard Edition Runtime (JRE) binary runtime files. The
+applicable Third Party Terms are set out in the
+THIRDPARTYLICENSEREADME.txt files, which ARM received with such
+binaries, and (in the case of any portions of the MESA graphics library
+used in the JRE) in the "SGI Free Software License B" in file
+SGIFREEB.txt.
+<br><br>
+D. The Eclipse IDE:
+<br>
+The Eclipse IDE is distributed subject to the terms of the Eclipse
+Software User Agreement (SUA) version March 17, 2005, except that the
+term "Content" used in the SUA shall mean only those Eclipse IDE
+materials contained in this package, and it is ARM, not the Eclipse
+Foundation, who makes the agreement with you and supplies you with that
+Content. That Content is also subject to the legal terms and notices
+referred to in the SUA, including but not limited to the Eclipse Public
+License version 1.0, the Common Public License version 1.0, the Apache
+Software License version 1.1, the Apache Software License version 2.0,
+the IBM Public License version 1.0, the Metro Link Public License 1.00,
+and the Mozilla Public License version 1.1. For further information on
+the Eclipse Foundation's approach to licensing, please see their
+guidance notes at
+<a href="http://www.eclipse.org/legal/guidetolegaldoc.php">
+http://www.eclipse.org/legal/guidetolegaldoc.php</a>,
+or more generally the Eclipse legal resources website at
+<a href="http://www.eclipse.org/legal">http://www.eclipse.org/legal</a>.
+<br><br>
+E. Portions of the ARM plug-ins for the Eclipse IDE:
+<br>
+Certain modules used in these plug-ins are licensed to you under the
+Eclipse Public License, version 1.0.
+<br><br>
+ARM contract references: LEC-ELA-00069-V14.0 RVDS 3.1 FCS AB/AB
+<br>/end
+<br>
+</body>
+</html>
diff --git a/ref-rvct-all.txt b/ref-rvct-all.txt new file mode 100644 index 0000000..cb7ab02 --- /dev/null +++ b/ref-rvct-all.txt @@ -0,0 +1,8833 @@ + +VLD1/VLD1Q output: +VLD1/VLD1Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD1/VLD1Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD1/VLD1Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD1/VLD1Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD1/VLD1Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD1/VLD1Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD1/VLD1Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD1/VLD1Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD1/VLD1Q:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD1/VLD1Q:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD1/VLD1Q:10:result_float32x2 [] = { c1800000, c1700000, } +VLD1/VLD1Q:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD1/VLD1Q:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD1/VLD1Q:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD1/VLD1Q:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD1/VLD1Q:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VLD1/VLD1Q:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD1/VLD1Q:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD1/VLD1Q:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD1/VLD1Q:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VLD1/VLD1Q:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD1/VLD1Q:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD1/VLD1Q:22:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VLD1/VLD1Q:23:result_float16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } + +VADD/VADDQ output: +VADD/VADDQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, } +VADD/VADDQ:1:result_int16x4 [] = { ffffffec, ffffffed, ffffffee, ffffffef, } +VADD/VADDQ:2:result_int32x2 [] = { fffffff3, fffffff4, } +VADD/VADDQ:3:result_int64x1 [] = { 54, } +VADD/VADDQ:4:result_uint8x8 [] = { 4, 5, 6, 7, 8, 9, a, b, } +VADD/VADDQ:5:result_uint16x4 [] = { e, f, 10, 11, } +VADD/VADDQ:6:result_uint32x2 [] = { 18, 19, } +VADD/VADDQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VADD/VADDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADD/VADDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VADD/VADDQ:10:result_float32x2 [] = { 33333333, 33333333, } +VADD/VADDQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VADD/VADDQ:12:result_int8x16 [] = { ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, ffffffeb, ffffffec, ffffffed, ffffffee, ffffffef, fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, } +VADD/VADDQ:13:result_int16x8 [] = { ffffffdc, ffffffdd, ffffffde, ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, } +VADD/VADDQ:14:result_int32x4 [] = { ffffffd2, ffffffd3, ffffffd4, ffffffd5, } +VADD/VADDQ:15:result_int64x2 [] = { 8, 9, } +VADD/VADDQ:16:result_uint8x16 [] = { fc, fd, fe, ff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, } +VADD/VADDQ:17:result_uint16x8 [] = { fff3, fff4, fff5, fff6, fff7, fff8, fff9, fffa, } +VADD/VADDQ:18:result_uint32x4 [] = { 27, 28, 29, 2a, } +VADD/VADDQ:19:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff4, } +VADD/VADDQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADD/VADDQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADD/VADDQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADD/VADDQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +float32: +VADD/VADDQ:24:result_float32x2 [] = { 40d9999a, 40d9999a, } +VADD/VADDQ:25:result_float32x4 [] = { 41100000, 41100000, 41100000, 41100000, } + +VLD1_LANE/VLD1_LANEQ output: +VLD1_LANE/VLD1_LANEQ:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, ffffffaa, } +VLD1_LANE/VLD1_LANEQ:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:2:result_int32x2 [] = { aaaaaaaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:3:result_int64x1 [] = { fffffffffffffff0, } +VLD1_LANE/VLD1_LANEQ:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, f0, } +VLD1_LANE/VLD1_LANEQ:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, fff0, } +VLD1_LANE/VLD1_LANEQ:6:result_uint32x2 [] = { aaaaaaaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD1_LANE/VLD1_LANEQ:8:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, f0, } +VLD1_LANE/VLD1_LANEQ:9:result_poly16x4 [] = { aaaa, aaaa, aaaa, fff0, } +VLD1_LANE/VLD1_LANEQ:10:result_float32x2 [] = { aaaaaaaa, c1800000, } +VLD1_LANE/VLD1_LANEQ:11:result_float16x4 [] = { aaaa, aaaa, cc00, aaaa, } +VLD1_LANE/VLD1_LANEQ:12:result_int8x16 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:13:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, ffffaaaa, ffffaaaa, } +VLD1_LANE/VLD1_LANEQ:14:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, aaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:15:result_int64x2 [] = { aaaaaaaaaaaaaaaa, fffffffffffffff0, } +VLD1_LANE/VLD1_LANEQ:16:result_uint8x16 [] = { aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, f0, aa, aa, aa, } +VLD1_LANE/VLD1_LANEQ:17:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, aaaa, } +VLD1_LANE/VLD1_LANEQ:18:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, aaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:19:result_uint64x2 [] = { fffffffffffffff0, aaaaaaaaaaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:20:result_poly8x16 [] = { aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, f0, aa, aa, aa, } +VLD1_LANE/VLD1_LANEQ:21:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, aaaa, } +VLD1_LANE/VLD1_LANEQ:22:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, c1800000, aaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:23:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, cc00, aaaa, aaaa, } + +VLD1_DUP/VLD1_DUPQ output: +VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:8:result_poly8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:9:result_poly16x4 [] = { fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:10:result_float32x2 [] = { c1800000, c1800000, } +VLD1_DUP/VLD1_DUPQ:11:result_float16x4 [] = { cc00, cc00, cc00, cc00, } +VLD1_DUP/VLD1_DUPQ:12:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:13:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:14:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:16:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:17:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:18:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:20:result_poly8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:21:result_poly16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:22:result_float32x4 [] = { c1800000, c1800000, c1800000, c1800000, } +VLD1_DUP/VLD1_DUPQ:23:result_float16x8 [] = { cc00, cc00, cc00, cc00, cc00, cc00, cc00, cc00, } + +VLD1_DUP/VLD1_DUPQ output: +VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:8:result_poly8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:9:result_poly16x4 [] = { fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:10:result_float32x2 [] = { c1700000, c1700000, } +VLD1_DUP/VLD1_DUPQ:11:result_float16x4 [] = { cb80, cb80, cb80, cb80, } +VLD1_DUP/VLD1_DUPQ:12:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:13:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:14:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:15:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:16:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:17:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:18:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:19:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:20:result_poly8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:21:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:22:result_float32x4 [] = { c1700000, c1700000, c1700000, c1700000, } +VLD1_DUP/VLD1_DUPQ:23:result_float16x8 [] = { cb80, cb80, cb80, cb80, cb80, cb80, cb80, cb80, } + +VLD1_DUP/VLD1_DUPQ output: +VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:8:result_poly8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:9:result_poly16x4 [] = { fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:10:result_float32x2 [] = { c1600000, c1600000, } +VLD1_DUP/VLD1_DUPQ:11:result_float16x4 [] = { cb00, cb00, cb00, cb00, } +VLD1_DUP/VLD1_DUPQ:12:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:13:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:14:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:15:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:16:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:17:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:18:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:19:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:20:result_poly8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:21:result_poly16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:22:result_float32x4 [] = { c1600000, c1600000, c1600000, c1600000, } +VLD1_DUP/VLD1_DUPQ:23:result_float16x8 [] = { cb00, cb00, cb00, cb00, cb00, cb00, cb00, cb00, } + +VDUP/VDUPQ output: +VDUP/VDUPQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff0, } +VDUP/VDUPQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VDUP/VDUPQ:8:result_poly8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:9:result_poly16x4 [] = { fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:10:result_float32x2 [] = { c1800000, c1800000, } +VDUP/VDUPQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VDUP/VDUPQ:12:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:13:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:14:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP/VDUPQ:16:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:17:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:18:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP/VDUPQ:20:result_poly8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:21:result_poly16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:22:result_float32x4 [] = { c1800000, c1800000, c1800000, c1800000, } +VDUP/VDUPQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VDUP/VDUPQ output: +VDUP/VDUPQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff1, } +VDUP/VDUPQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff1, } +VDUP/VDUPQ:8:result_poly8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:9:result_poly16x4 [] = { fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:10:result_float32x2 [] = { c1700000, c1700000, } +VDUP/VDUPQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VDUP/VDUPQ:12:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:13:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:14:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:15:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VDUP/VDUPQ:16:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:17:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:18:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:19:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VDUP/VDUPQ:20:result_poly8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:21:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:22:result_float32x4 [] = { c1700000, c1700000, c1700000, c1700000, } +VDUP/VDUPQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VDUP/VDUPQ output: +VDUP/VDUPQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:2:result_int32x2 [] = { fffffff2, fffffff2, } +VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff2, } +VDUP/VDUPQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff2, fffffff2, } +VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VDUP/VDUPQ:8:result_poly8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:9:result_poly16x4 [] = { fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:10:result_float32x2 [] = { c1600000, c1600000, } +VDUP/VDUPQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VDUP/VDUPQ:12:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:13:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:14:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:15:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VDUP/VDUPQ:16:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:17:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:18:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:19:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VDUP/VDUPQ:20:result_poly8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:21:result_poly16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:22:result_float32x4 [] = { c1600000, c1600000, c1600000, c1600000, } +VDUP/VDUPQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMOV/VMOVQ output: +VMOV/VMOVQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff0, } +VMOV/VMOVQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VMOV/VMOVQ:8:result_poly8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:9:result_poly16x4 [] = { fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:10:result_float32x2 [] = { c1800000, c1800000, } +VMOV/VMOVQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMOV/VMOVQ:12:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:13:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:14:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VMOV/VMOVQ:16:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:17:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:18:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VMOV/VMOVQ:20:result_poly8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:21:result_poly16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:22:result_float32x4 [] = { c1800000, c1800000, c1800000, c1800000, } +VMOV/VMOVQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMOV/VMOVQ output: +VMOV/VMOVQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff1, } +VMOV/VMOVQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff1, } +VMOV/VMOVQ:8:result_poly8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:9:result_poly16x4 [] = { fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:10:result_float32x2 [] = { c1700000, c1700000, } +VMOV/VMOVQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMOV/VMOVQ:12:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:13:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:14:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:15:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VMOV/VMOVQ:16:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:17:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:18:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:19:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VMOV/VMOVQ:20:result_poly8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:21:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:22:result_float32x4 [] = { c1700000, c1700000, c1700000, c1700000, } +VMOV/VMOVQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMOV/VMOVQ output: +VMOV/VMOVQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:2:result_int32x2 [] = { fffffff2, fffffff2, } +VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff2, } +VMOV/VMOVQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff2, fffffff2, } +VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VMOV/VMOVQ:8:result_poly8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:9:result_poly16x4 [] = { fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:10:result_float32x2 [] = { c1600000, c1600000, } +VMOV/VMOVQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMOV/VMOVQ:12:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:13:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:14:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:15:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VMOV/VMOVQ:16:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:17:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:18:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:19:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VMOV/VMOVQ:20:result_poly8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:21:result_poly16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:22:result_float32x4 [] = { c1600000, c1600000, c1600000, c1600000, } +VMOV/VMOVQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VGET_HIGH output: +VGET_HIGH:0:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VGET_HIGH:1:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VGET_HIGH:2:result_int32x2 [] = { fffffff2, fffffff3, } +VGET_HIGH:3:result_int64x1 [] = { fffffffffffffff1, } +VGET_HIGH:4:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VGET_HIGH:5:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VGET_HIGH:6:result_uint32x2 [] = { fffffff2, fffffff3, } +VGET_HIGH:7:result_uint64x1 [] = { fffffffffffffff1, } +VGET_HIGH:8:result_poly8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VGET_HIGH:9:result_poly16x4 [] = { fff4, fff5, fff6, fff7, } +VGET_HIGH:10:result_float32x2 [] = { c1600000, c1500000, } +VGET_HIGH:11:result_float16x4 [] = { ca00, c980, c900, c880, } +VGET_HIGH:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_HIGH:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_HIGH:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_HIGH:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_HIGH:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_HIGH:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_HIGH:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_HIGH:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_HIGH:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_HIGH:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_HIGH:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_HIGH:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VGET_LOW output: +VGET_LOW:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VGET_LOW:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VGET_LOW:2:result_int32x2 [] = { fffffff0, fffffff1, } +VGET_LOW:3:result_int64x1 [] = { fffffffffffffff0, } +VGET_LOW:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VGET_LOW:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VGET_LOW:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VGET_LOW:7:result_uint64x1 [] = { fffffffffffffff0, } +VGET_LOW:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VGET_LOW:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VGET_LOW:10:result_float32x2 [] = { c1800000, c1700000, } +VGET_LOW:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VGET_LOW:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_LOW:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_LOW:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_LOW:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_LOW:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_LOW:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_LOW:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_LOW:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_LOW:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_LOW:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_LOW:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_LOW:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL_LANE cumulative saturation output: +VQDMLAL_LANE:0:vqdmlal_lane_s16 Neon cumulative saturation 0 +VQDMLAL_LANE:1:vqdmlal_lane_s32 Neon cumulative saturation 0 + +VQDMLAL_LANE output: +VQDMLAL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:5:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL_LANE:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:16:result_int32x4 [] = { 7c1e, 7c1f, 7c20, 7c21, } +VQDMLAL_LANE:17:result_int64x2 [] = { 7c1e, 7c1f, } +VQDMLAL_LANE:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_LANE:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL_LANE (mul with input=0) cumulative saturation output: +VQDMLAL_LANE:26:vqdmlal_lane_s16 Neon cumulative saturation 0 +VQDMLAL_LANE:27:vqdmlal_lane_s32 Neon cumulative saturation 0 + +VQDMLAL_LANE (mul with input=0) output: +VQDMLAL_LANE:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:31:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:35:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL_LANE:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:42:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VQDMLAL_LANE:43:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQDMLAL_LANE:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_LANE:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMLAL_LANE:52:vqdmlal_lane_s16 Neon cumulative saturation 1 +VQDMLAL_LANE:53:vqdmlal_lane_s32 Neon cumulative saturation 1 + +VQDMLAL_LANE (check mul cumulative saturation) output: +VQDMLAL_LANE:54:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:55:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:56:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:57:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:58:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:59:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:60:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:61:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:62:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:63:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:64:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:65:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL_LANE:66:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:67:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:68:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2, } +VQDMLAL_LANE:69:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0, } +VQDMLAL_LANE:70:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:71:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:72:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:73:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_LANE:74:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:75:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:76:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:77:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL_LANE cumulative saturation output: +VQDMLSL_LANE:0:vqdmlsl_lane_s16 Neon cumulative saturation 0 +VQDMLSL_LANE:1:vqdmlsl_lane_s32 Neon cumulative saturation 0 + +VQDMLSL_LANE output: +VQDMLSL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:5:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL_LANE:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:16:result_int32x4 [] = { ffff83c2, ffff83c3, ffff83c4, ffff83c5, } +VQDMLSL_LANE:17:result_int64x2 [] = { ffffffffffff83c2, ffffffffffff83c3, } +VQDMLSL_LANE:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_LANE:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL_LANE (mul with input=0) cumulative saturation output: +VQDMLSL_LANE:26:vqdmlsl_lane_s16 Neon cumulative saturation 0 +VQDMLSL_LANE:27:vqdmlsl_lane_s32 Neon cumulative saturation 0 + +VQDMLSL_LANE (mul with input=0) output: +VQDMLSL_LANE:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:31:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:35:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL_LANE:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:42:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VQDMLSL_LANE:43:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQDMLSL_LANE:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_LANE:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMLSL_LANE:52:vqdmlsl_lane_s16 Neon cumulative saturation 1 +VQDMLSL_LANE:53:vqdmlsl_lane_s32 Neon cumulative saturation 1 + +VQDMLSL_LANE (check mul cumulative saturation) output: +VQDMLSL_LANE:54:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:55:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:56:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:57:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:58:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:59:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:60:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:61:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:62:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:63:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:64:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:65:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL_LANE:66:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:67:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:68:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQDMLSL_LANE:69:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQDMLSL_LANE:70:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:71:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:72:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:73:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_LANE:74:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:75:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:76:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:77:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL_N cumulative saturation output: +VQDMLAL_N:0:vqdmlal_n_s16 Neon cumulative saturation 0 +VQDMLAL_N:1:vqdmlal_n_s32 Neon cumulative saturation 0 + +VQDMLAL_N output: +VQDMLAL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:5:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_N:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_N:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL_N:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:16:result_int32x4 [] = { 1684, 1685, 1686, 1687, } +VQDMLAL_N:17:result_int64x2 [] = { 21ce, 21cf, } +VQDMLAL_N:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_N:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_N:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_N:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL_N (check mul cumulative saturation) cumulative saturation output: +VQDMLAL_N:26:vqdmlal_n_s16 Neon cumulative saturation 1 +VQDMLAL_N:27:vqdmlal_n_s32 Neon cumulative saturation 1 + +VQDMLAL_N (check mul cumulative saturation) output: +VQDMLAL_N:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:31:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_N:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:35:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_N:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL_N:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:42:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2, } +VQDMLAL_N:43:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0, } +VQDMLAL_N:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_N:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_N:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_N:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL_N cumulative saturation output: +VQDMLSL_N:0:vqdmlsl_n_s16 Neon cumulative saturation 0 +VQDMLSL_N:1:vqdmlsl_n_s32 Neon cumulative saturation 0 + +VQDMLSL_N output: +VQDMLSL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:5:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_N:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_N:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL_N:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:16:result_int32x4 [] = { ffffe95c, ffffe95d, ffffe95e, ffffe95f, } +VQDMLSL_N:17:result_int64x2 [] = { ffffffffffffde12, ffffffffffffde13, } +VQDMLSL_N:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_N:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_N:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_N:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL_N (check mul cumulative saturation) cumulative saturation output: +VQDMLSL_N:26:vqdmlsl_n_s16 Neon cumulative saturation 1 +VQDMLSL_N:27:vqdmlsl_n_s32 Neon cumulative saturation 1 + +VQDMLSL_N (check mul cumulative saturation) output: +VQDMLSL_N:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:31:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_N:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:35:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_N:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL_N:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:42:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQDMLSL_N:43:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQDMLSL_N:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_N:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_N:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_N:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VEXT/VEXTQ output: +VEXT/VEXTQ:0:result_int8x8 [] = { fffffff7, 11, 11, 11, 11, 11, 11, 11, } +VEXT/VEXTQ:1:result_int16x4 [] = { fffffff3, 22, 22, 22, } +VEXT/VEXTQ:2:result_int32x2 [] = { fffffff1, 33, } +VEXT/VEXTQ:3:result_int64x1 [] = { fffffffffffffff0, } +VEXT/VEXTQ:4:result_uint8x8 [] = { f6, f7, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:5:result_uint16x4 [] = { fff2, fff3, 66, 66, } +VEXT/VEXTQ:6:result_uint32x2 [] = { fffffff1, 77, } +VEXT/VEXTQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VEXT/VEXTQ:8:result_poly8x8 [] = { f6, f7, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:9:result_poly16x4 [] = { fff2, fff3, 66, 66, } +VEXT/VEXTQ:10:result_float32x2 [] = { c1700000, 42066666, } +VEXT/VEXTQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VEXT/VEXTQ:12:result_int8x16 [] = { fffffffe, ffffffff, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, } +VEXT/VEXTQ:13:result_int16x8 [] = { fffffff7, 22, 22, 22, 22, 22, 22, 22, } +VEXT/VEXTQ:14:result_int32x4 [] = { fffffff3, 33, 33, 33, } +VEXT/VEXTQ:15:result_int64x2 [] = { fffffffffffffff1, 44, } +VEXT/VEXTQ:16:result_uint8x16 [] = { fc, fd, fe, ff, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:17:result_uint16x8 [] = { fff6, fff7, 66, 66, 66, 66, 66, 66, } +VEXT/VEXTQ:18:result_uint32x4 [] = { fffffff3, 77, 77, 77, } +VEXT/VEXTQ:19:result_uint64x2 [] = { fffffffffffffff1, 88, } +VEXT/VEXTQ:20:result_poly8x16 [] = { fc, fd, fe, ff, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:21:result_poly16x8 [] = { fff6, fff7, 66, 66, 66, 66, 66, 66, } +VEXT/VEXTQ:22:result_float32x4 [] = { c1500000, 4204cccd, 4204cccd, 4204cccd, } +VEXT/VEXTQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHR_N output: +VSHR_N:0:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VSHR_N:1:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VSHR_N:2:result_int32x2 [] = { fffffffc, fffffffc, } +VSHR_N:3:result_int64x1 [] = { ffffffffffffffff, } +VSHR_N:4:result_uint8x8 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, } +VSHR_N:5:result_uint16x4 [] = { 1ffe, 1ffe, 1ffe, 1ffe, } +VSHR_N:6:result_uint32x2 [] = { 7ffffff, 7ffffff, } +VSHR_N:7:result_uint64x1 [] = { 7fffffff, } +VSHR_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHR_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHR_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSHR_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSHR_N:12:result_int8x16 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, fffffffc, fffffffd, fffffffd, fffffffe, fffffffe, ffffffff, ffffffff, } +VSHR_N:13:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VSHR_N:14:result_int32x4 [] = { fffffffc, fffffffc, fffffffc, fffffffc, } +VSHR_N:15:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VSHR_N:16:result_uint8x16 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, } +VSHR_N:17:result_uint16x8 [] = { 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, } +VSHR_N:18:result_uint32x4 [] = { 7ffffff, 7ffffff, 7ffffff, 7ffffff, } +VSHR_N:19:result_uint64x2 [] = { 7fffffff, 7fffffff, } +VSHR_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHR_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHR_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHR_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHRN_N output: +VSHRN_N:0:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VSHRN_N:1:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9, } +VSHRN_N:2:result_int32x2 [] = { fffffffc, fffffffc, } +VSHRN_N:3:result_int64x1 [] = { 3333333333333333, } +VSHRN_N:4:result_uint8x8 [] = { fc, fc, fc, fc, fd, fd, fd, fd, } +VSHRN_N:5:result_uint16x4 [] = { fffe, fffe, fffe, fffe, } +VSHRN_N:6:result_uint32x2 [] = { fffffffe, fffffffe, } +VSHRN_N:7:result_uint64x1 [] = { 3333333333333333, } +VSHRN_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHRN_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSHRN_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSHRN_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHRN_N:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHRN_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VSHRN_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHRN_N:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHRN_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VSHRN_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHRN_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHRN_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHRN_N (with input = 0) output: +VRSHRN_N:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:1:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:2:result_int32x2 [] = { 0, 0, } +VRSHRN_N:3:result_int64x1 [] = { 3333333333333333, } +VRSHRN_N:4:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:5:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:6:result_uint32x2 [] = { 0, 0, } +VRSHRN_N:7:result_uint64x1 [] = { 3333333333333333, } +VRSHRN_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHRN_N:10:result_float32x2 [] = { 33333333, 33333333, } +VRSHRN_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHRN_N output: +VRSHRN_N:24:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VRSHRN_N:25:result_int16x4 [] = { fffffff8, fffffff9, fffffff9, fffffffa, } +VRSHRN_N:26:result_int32x2 [] = { fffffffc, fffffffc, } +VRSHRN_N:27:result_int64x1 [] = { 3333333333333333, } +VRSHRN_N:28:result_uint8x8 [] = { fc, fc, fd, fd, fd, fd, fe, fe, } +VRSHRN_N:29:result_uint16x4 [] = { fffe, fffe, fffe, fffe, } +VRSHRN_N:30:result_uint32x2 [] = { fffffffe, fffffffe, } +VRSHRN_N:31:result_uint64x1 [] = { 3333333333333333, } +VRSHRN_N:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHRN_N:34:result_float32x2 [] = { 33333333, 33333333, } +VRSHRN_N:35:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:37:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:38:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:41:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:42:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHRN_N (with large shift amount) output: +VRSHRN_N:48:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:49:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:50:result_int32x2 [] = { 0, 0, } +VRSHRN_N:51:result_int64x1 [] = { 3333333333333333, } +VRSHRN_N:52:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:53:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:54:result_uint32x2 [] = { 0, 0, } +VRSHRN_N:55:result_uint64x1 [] = { 3333333333333333, } +VRSHRN_N:56:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:57:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHRN_N:58:result_float32x2 [] = { 33333333, 33333333, } +VRSHRN_N:59:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:61:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:62:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:65:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:66:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRN_N cumulative saturation output: +VQRSHRN_N:0:vqrshrn_n_s16 Neon cumulative saturation 0 +VQRSHRN_N:1:vqrshrn_n_s32 Neon cumulative saturation 0 +VQRSHRN_N:2:vqrshrn_n_s64 Neon cumulative saturation 0 +VQRSHRN_N:3:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:4:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:5:vqrshrn_n_u64 Neon cumulative saturation 1 + +VQRSHRN_N output: +VQRSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VQRSHRN_N:7:result_int16x4 [] = { fffffff8, fffffff9, fffffff9, fffffffa, } +VQRSHRN_N:8:result_int32x2 [] = { fffffffc, fffffffc, } +VQRSHRN_N:9:result_int64x1 [] = { 3333333333333333, } +VQRSHRN_N:10:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRN_N:11:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRN_N:12:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRN_N:13:result_uint64x1 [] = { 3333333333333333, } +VQRSHRN_N:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRN_N:16:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRN_N:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRN_N:18:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:19:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:20:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRN_N (check saturation: shift by 3) cumulative saturation output: +VQRSHRN_N:30:vqrshrn_n_s16 Neon cumulative saturation 1 +VQRSHRN_N:31:vqrshrn_n_s32 Neon cumulative saturation 1 +VQRSHRN_N:32:vqrshrn_n_s64 Neon cumulative saturation 1 +VQRSHRN_N:33:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:34:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:35:vqrshrn_n_u64 Neon cumulative saturation 1 + +VQRSHRN_N (check saturation: shift by 3) output: +VQRSHRN_N:36:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHRN_N:37:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRSHRN_N:38:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRSHRN_N:39:result_int64x1 [] = { 3333333333333333, } +VQRSHRN_N:40:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRN_N:41:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRN_N:42:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRN_N:43:result_uint64x1 [] = { 3333333333333333, } +VQRSHRN_N:44:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:45:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRN_N:46:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRN_N:47:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRN_N:48:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:49:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:50:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:51:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:52:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:53:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:54:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:55:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:56:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:57:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:58:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:59:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRN_N (check saturation: shift by max) cumulative saturation output: +VQRSHRN_N:60:vqrshrn_n_s16 Neon cumulative saturation 1 +VQRSHRN_N:61:vqrshrn_n_s32 Neon cumulative saturation 1 +VQRSHRN_N:62:vqrshrn_n_s64 Neon cumulative saturation 1 +VQRSHRN_N:63:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:64:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:65:vqrshrn_n_u64 Neon cumulative saturation 1 + +VQRSHRN_N (check saturation: shift by max) output: +VQRSHRN_N:66:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHRN_N:67:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRSHRN_N:68:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRSHRN_N:69:result_int64x1 [] = { 3333333333333333, } +VQRSHRN_N:70:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRN_N:71:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRN_N:72:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRN_N:73:result_uint64x1 [] = { 3333333333333333, } +VQRSHRN_N:74:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:75:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRN_N:76:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRN_N:77:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRN_N:78:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:79:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:80:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:81:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:82:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:83:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:84:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:85:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:86:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:87:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:88:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:89:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSET_LANE/VSET_LANEQ output: +VSET_LANE/VSET_LANEQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, 11, } +VSET_LANE/VSET_LANEQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, 22, } +VSET_LANE/VSET_LANEQ:2:result_int32x2 [] = { fffffff0, 33, } +VSET_LANE/VSET_LANEQ:3:result_int64x1 [] = { 44, } +VSET_LANE/VSET_LANEQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, 55, f7, } +VSET_LANE/VSET_LANEQ:5:result_uint16x4 [] = { fff0, fff1, 66, fff3, } +VSET_LANE/VSET_LANEQ:6:result_uint32x2 [] = { fffffff0, 77, } +VSET_LANE/VSET_LANEQ:7:result_uint64x1 [] = { 88, } +VSET_LANE/VSET_LANEQ:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, 55, f7, } +VSET_LANE/VSET_LANEQ:9:result_poly16x4 [] = { fff0, fff1, 66, fff3, } +VSET_LANE/VSET_LANEQ:10:result_float32x2 [] = { c1800000, 4204cccd, } +VSET_LANE/VSET_LANEQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSET_LANE/VSET_LANEQ:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffff99, } +VSET_LANE/VSET_LANEQ:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, aa, fffffff6, fffffff7, } +VSET_LANE/VSET_LANEQ:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, bb, } +VSET_LANE/VSET_LANEQ:15:result_int64x2 [] = { fffffffffffffff0, cc, } +VSET_LANE/VSET_LANEQ:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, dd, ff, } +VSET_LANE/VSET_LANEQ:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, ee, fff7, } +VSET_LANE/VSET_LANEQ:18:result_uint32x4 [] = { fffffff0, fffffff1, ff, fffffff3, } +VSET_LANE/VSET_LANEQ:19:result_uint64x2 [] = { fffffffffffffff0, 11, } +VSET_LANE/VSET_LANEQ:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, dd, ff, } +VSET_LANE/VSET_LANEQ:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, ee, fff7, } +VSET_LANE/VSET_LANEQ:22:result_float32x4 [] = { c1800000, c1700000, c1600000, 41333333, } +VSET_LANE/VSET_LANEQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VGET_LANE/VGETQ_LANE output: +vget_lane_s8: fffffff7 +vget_lane_s16: fffffff3 +vget_lane_s32: fffffff1 +vget_lane_s64: fffffffffffffff0 +vget_lane_u8: f6 +vget_lane_u16: fff2 +vget_lane_u32: fffffff1 +vget_lane_u64: fffffffffffffff0 +vget_lane_p8: f6 +vget_lane_p16: fff2 +vget_lane_f32: c1700000 +vgetq_lane_s8: ffffffff +vgetq_lane_s16: fffffff5 +vgetq_lane_s32: fffffff3 +vgetq_lane_s64: fffffffffffffff1 +vgetq_lane_u8: fe +vgetq_lane_u16: fff6 +vgetq_lane_u32: fffffff2 +vgetq_lane_u64: fffffffffffffff1 +vgetq_lane_p8: fe +vgetq_lane_p16: fff6 +vgetq_lane_f32: c1500000 + + +VQSUB/VQSUBQ cumulative saturation output: +VQSUB/VQSUBQ:0:vqsub_s8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:1:vqsub_s16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:2:vqsub_s32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:3:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:4:vqsub_u8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:5:vqsub_u16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:6:vqsub_u32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:7:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:8:vqsubq_s8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:9:vqsubq_s16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:10:vqsubq_s32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:11:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:12:vqsubq_u8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:13:vqsubq_u16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:14:vqsubq_u32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:15:vqsubq_u64 Neon cumulative saturation 0 + +VQSUB/VQSUBQ output: +VQSUB/VQSUBQ:16:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, } +VQSUB/VQSUBQ:17:result_int16x4 [] = { ffffffce, ffffffcf, ffffffd0, ffffffd1, } +VQSUB/VQSUBQ:18:result_int32x2 [] = { ffffffbd, ffffffbe, } +VQSUB/VQSUBQ:19:result_int64x1 [] = { ffffffffffffffac, } +VQSUB/VQSUBQ:20:result_uint8x8 [] = { 9b, 9c, 9d, 9e, 9f, a0, a1, a2, } +VQSUB/VQSUBQ:21:result_uint16x4 [] = { ff8a, ff8b, ff8c, ff8d, } +VQSUB/VQSUBQ:22:result_uint32x2 [] = { ffffff79, ffffff7a, } +VQSUB/VQSUBQ:23:result_uint64x1 [] = { ffffffffffffff68, } +VQSUB/VQSUBQ:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSUB/VQSUBQ:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSUB/VQSUBQ:26:result_float32x2 [] = { 33333333, 33333333, } +VQSUB/VQSUBQ:27:result_float16x4 [] = { 0, 0, 0, 0, } +VQSUB/VQSUBQ:28:result_int8x16 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, ffffffeb, ffffffec, ffffffed, ffffffee, } +VQSUB/VQSUBQ:29:result_int16x8 [] = { ffffffce, ffffffcf, ffffffd0, ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, } +VQSUB/VQSUBQ:30:result_int32x4 [] = { ffffffbd, ffffffbe, ffffffbf, ffffffc0, } +VQSUB/VQSUBQ:31:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad, } +VQSUB/VQSUBQ:32:result_uint8x16 [] = { 9b, 9c, 9d, 9e, 9f, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, } +VQSUB/VQSUBQ:33:result_uint16x8 [] = { ff8a, ff8b, ff8c, ff8d, ff8e, ff8f, ff90, ff91, } +VQSUB/VQSUBQ:34:result_uint32x4 [] = { ffffff79, ffffff7a, ffffff7b, ffffff7c, } +VQSUB/VQSUBQ:35:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69, } +VQSUB/VQSUBQ:36:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSUB/VQSUBQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSUB/VQSUBQ:38:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSUB/VQSUBQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:40:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:41:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:42:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:43:vqsubq_u64 Neon cumulative saturation 0 + +64 bits saturation: +VQSUB/VQSUBQ:44:result_int64x1 [] = { fffffffffffffff0, } +VQSUB/VQSUBQ:45:result_uint64x1 [] = { fffffffffffffff0, } +VQSUB/VQSUBQ:46:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQSUB/VQSUBQ:47:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } + +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:48:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:49:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:50:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:51:vqsubq_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:52:result_int64x1 [] = { ffffffffffffffac, } +VQSUB/VQSUBQ:53:result_uint64x1 [] = { ffffffffffffff68, } +VQSUB/VQSUBQ:54:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad, } +VQSUB/VQSUBQ:55:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69, } + +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:56:vqsub_s64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:57:vqsub_u64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:58:vqsubq_s64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:59:vqsubq_u64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:60:result_int64x1 [] = { 8000000000000000, } +VQSUB/VQSUBQ:61:result_uint64x1 [] = { 0, } +VQSUB/VQSUBQ:62:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSUB/VQSUBQ:63:result_uint64x2 [] = { 0, 0, } + +less than 64 bits saturation: +VQSUB/VQSUBQ:64:vqsub_s8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:65:vqsub_s16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:66:vqsub_s32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:67:vqsubq_s8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:68:vqsubq_s16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:69:vqsubq_s32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:70:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSUB/VQSUBQ:71:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQSUB/VQSUBQ:72:result_int32x2 [] = { 80000000, 80000000, } +VQSUB/VQSUBQ:73:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSUB/VQSUBQ:74:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQSUB/VQSUBQ:75:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VQSUB/VQSUBQ less than 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:76:vqsub_u8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:77:vqsub_u16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:78:vqsub_u32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:79:vqsubq_u8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:80:vqsubq_u16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:81:vqsubq_u32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:82:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSUB/VQSUBQ:83:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSUB/VQSUBQ:84:result_uint32x2 [] = { 0, 0, } +VQSUB/VQSUBQ:85:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSUB/VQSUBQ:86:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSUB/VQSUBQ:87:result_uint32x4 [] = { 0, 0, 0, 0, } + +VQDMULH cumulative saturation output: +VQDMULH:0:vqdmulh_s16 Neon cumulative saturation 0 +VQDMULH:1:vqdmulh_s32 Neon cumulative saturation 0 +VQDMULH:2:vqdmulhq_s16 Neon cumulative saturation 0 +VQDMULH:3:vqdmulhq_s32 Neon cumulative saturation 0 + +VQDMULH output: +VQDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:5:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH:6:result_int32x2 [] = { ffffffff, ffffffff, } +VQDMULH:7:result_int64x1 [] = { 3333333333333333, } +VQDMULH:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:10:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH:11:result_uint64x1 [] = { 3333333333333333, } +VQDMULH:12:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:13:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:14:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH:15:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULH:16:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:17:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH:18:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH:19:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:20:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:21:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:22:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH:23:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:24:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:26:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULH cumulative saturation output: +VQDMULH:28:vqdmulh_s16 Neon cumulative saturation 1 +VQDMULH:29:vqdmulh_s32 Neon cumulative saturation 1 +VQDMULH:30:vqdmulhq_s16 Neon cumulative saturation 1 +VQDMULH:31:vqdmulhq_s32 Neon cumulative saturation 1 + +VQDMULH output: +VQDMULH:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQDMULH:34:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQDMULH:35:result_int64x1 [] = { 3333333333333333, } +VQDMULH:36:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:37:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:38:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH:39:result_uint64x1 [] = { 3333333333333333, } +VQDMULH:40:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:41:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:42:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH:43:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULH:44:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:45:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQDMULH:46:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULH:47:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:48:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:49:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:50:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH:51:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:52:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:54:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULH_LANE cumulative saturation output: +VQDMULH_LANE:0:vqdmulh_lane_s16 Neon cumulative saturation 0 +VQDMULH_LANE:1:vqdmulh_lane_s32 Neon cumulative saturation 0 +VQDMULH_LANE:2:vqdmulhq_lane_s16 Neon cumulative saturation 0 +VQDMULH_LANE:3:vqdmulhq_lane_s32 Neon cumulative saturation 0 + +VQDMULH_LANE output: +VQDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:5:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH_LANE:6:result_int32x2 [] = { ffffffff, ffffffff, } +VQDMULH_LANE:7:result_int64x1 [] = { 3333333333333333, } +VQDMULH_LANE:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:10:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:11:result_uint64x1 [] = { 3333333333333333, } +VQDMULH_LANE:12:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:13:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:14:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:15:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULH_LANE:16:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:17:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH_LANE:18:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH_LANE:19:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:20:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:21:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:22:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_LANE:23:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:24:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:26:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_LANE:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULH_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMULH_LANE:28:vqdmulh_lane_s16 Neon cumulative saturation 1 +VQDMULH_LANE:29:vqdmulh_lane_s32 Neon cumulative saturation 1 +VQDMULH_LANE:30:vqdmulhq_lane_s16 Neon cumulative saturation 1 +VQDMULH_LANE:31:vqdmulhq_lane_s32 Neon cumulative saturation 1 + +VQDMULH_LANE (check mul cumulative saturation) output: +VQDMULH_LANE:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQDMULH_LANE:34:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQDMULH_LANE:35:result_int64x1 [] = { 3333333333333333, } +VQDMULH_LANE:36:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:37:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:38:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:39:result_uint64x1 [] = { 3333333333333333, } +VQDMULH_LANE:40:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:41:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:42:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:43:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULH_LANE:44:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:45:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQDMULH_LANE:46:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULH_LANE:47:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:48:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:49:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:50:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_LANE:51:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:52:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:54:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_LANE:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULH_N cumulative saturation output: +VQDMULH_N:0:vqdmulh_n_s16 Neon cumulative saturation 0 +VQDMULH_N:1:vqdmulh_n_s32 Neon cumulative saturation 0 +VQDMULH_N:2:vqdmulhq_n_s16 Neon cumulative saturation 0 +VQDMULH_N:3:vqdmulhq_n_s32 Neon cumulative saturation 0 + +VQDMULH_N output: +VQDMULH_N:4:result_int16x4 [] = { 19, 19, 19, 19, } +VQDMULH_N:5:result_int32x2 [] = { 4, 4, } +VQDMULH_N:6:result_int16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VQDMULH_N:7:result_int32x4 [] = { a, a, a, a, } + +VQDMULH_N (check mul cumulative saturation) cumulative saturation output: +VQDMULH_N:8:vqdmulh_n_s16 Neon cumulative saturation 1 +VQDMULH_N:9:vqdmulh_n_s32 Neon cumulative saturation 1 +VQDMULH_N:10:vqdmulhq_n_s16 Neon cumulative saturation 1 +VQDMULH_N:11:vqdmulhq_n_s32 Neon cumulative saturation 1 + +VQDMULH_N (check mul cumulative saturation) output: +VQDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQDMULH_N:15:result_int64x1 [] = { 3333333333333333, } +VQDMULH_N:16:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:17:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_N:18:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH_N:19:result_uint64x1 [] = { 3333333333333333, } +VQDMULH_N:20:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:21:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_N:22:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH_N:23:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULH_N:24:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:25:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQDMULH_N:26:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULH_N:27:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_N:28:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:29:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_N:30:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_N:31:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_N:32:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:33:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_N:34:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_N:35:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULL cumulative saturation output: +VQDMULL:0:vqdmull_s16 Neon cumulative saturation 0 +VQDMULL:1:vqdmull_s32 Neon cumulative saturation 0 + +VQDMULL output: +VQDMULL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMULL:5:result_int64x1 [] = { 3333333333333333, } +VQDMULL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULL:9:result_uint64x1 [] = { 3333333333333333, } +VQDMULL:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMULL:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULL:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:16:result_int32x4 [] = { 200, 1c2, 188, 152, } +VQDMULL:17:result_int64x2 [] = { 200, 1c2, } +VQDMULL:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULL:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULL:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULL:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULL (check mul cumulative saturation) cumulative saturation output: +VQDMULL:26:vqdmull_s16 Neon cumulative saturation 1 +VQDMULL:27:vqdmull_s32 Neon cumulative saturation 1 + +VQDMULL (check mul cumulative saturation) output: +VQDMULL:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMULL:31:result_int64x1 [] = { 3333333333333333, } +VQDMULL:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULL:35:result_uint64x1 [] = { 3333333333333333, } +VQDMULL:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMULL:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULL:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:42:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULL:43:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQDMULL:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULL:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULL:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULL:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL cumulative saturation output: +VQDMLAL:0:vqdmlal_s16 Neon cumulative saturation 0 +VQDMLAL:1:vqdmlal_s32 Neon cumulative saturation 0 + +VQDMLAL output: +VQDMLAL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL:5:result_int64x1 [] = { 3333333333333333, } +VQDMLAL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:16:result_int32x4 [] = { 7c1e, 7c1f, 7c20, 7c21, } +VQDMLAL:17:result_int64x2 [] = { 7c1e, 7c1f, } +VQDMLAL:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL (check mul cumulative saturation) cumulative saturation output: +VQDMLAL:26:vqdmlal_s16 Neon cumulative saturation 1 +VQDMLAL:27:vqdmlal_s32 Neon cumulative saturation 1 + +VQDMLAL (check mul cumulative saturation) output: +VQDMLAL:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL:31:result_int64x1 [] = { 3333333333333333, } +VQDMLAL:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL:35:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:42:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2, } +VQDMLAL:43:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0, } +VQDMLAL:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL cumulative saturation output: +VQDMLSL:0:vqdmlsl_s16 Neon cumulative saturation 0 +VQDMLSL:1:vqdmlsl_s32 Neon cumulative saturation 0 + +VQDMLSL output: +VQDMLSL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL:5:result_int64x1 [] = { 3333333333333333, } +VQDMLSL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:16:result_int32x4 [] = { ffff83c2, ffff83c3, ffff83c4, ffff83c5, } +VQDMLSL:17:result_int64x2 [] = { ffffffffffff83c2, ffffffffffff83c3, } +VQDMLSL:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL (check mul cumulative saturation) cumulative saturation output: +VQDMLSL:26:vqdmlsl_s16 Neon cumulative saturation 1 +VQDMLSL:27:vqdmlsl_s32 Neon cumulative saturation 1 + +VQDMLSL (check mul cumulative saturation) output: +VQDMLSL:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL:31:result_int64x1 [] = { 3333333333333333, } +VQDMLSL:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL:35:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:42:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQDMLSL:43:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQDMLSL:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCEQ/VCEQQ output: +VCEQ/VCEQQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, ff, 0, } +VCEQ/VCEQQ:1:result_uint16x4 [] = { 0, 0, ffff, 0, } +VCEQ/VCEQQ:2:result_uint32x2 [] = { ffffffff, 0, } +VCEQ/VCEQQ:3:result_uint8x8 [] = { 0, 0, 0, ff, 0, 0, 0, 0, } +VCEQ/VCEQQ:4:result_uint16x4 [] = { 0, 0, ffff, 0, } +VCEQ/VCEQQ:5:result_uint32x2 [] = { 0, ffffffff, } +VCEQ/VCEQQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, 0, 0, 0, } +VCEQ/VCEQQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, 0, } +VCEQ/VCEQQ:8:result_uint32x4 [] = { 0, 0, ffffffff, 0, } +VCEQ/VCEQQ:9:result_uint8x16 [] = { 0, 0, 0, 0, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCEQ/VCEQQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, 0, } +VCEQ/VCEQQ:11:result_uint32x4 [] = { 0, 0, ffffffff, 0, } +VCEQ/VCEQQ:12:result_uint32x2 [] = { 0, ffffffff, } +VCEQ/VCEQQ:13:result_uint32x4 [] = { 0, 0, ffffffff, 0, } +VCEQ/VCEQQ:14:result_uint32x2 [] = { ffffffff, 0, } +VCEQ/VCEQQ:15:result_uint32x2 [] = { 0, ffffffff, } +VCEQ/VCEQQ:16:result_uint32x2 [] = { ffffffff, 0, } +VCEQ/VCEQQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (inf):20:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (-inf):21:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (inf):22:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (-0.0):23:result_uint32x2 [] = { ffffffff, ffffffff, } + +VCEQ/VCEQQ p8 output: +VCEQ/VCEQQ:0:result_uint8x8 [] = { 0, 0, 0, ff, 0, 0, 0, 0, } +VCEQ/VCEQQ:1:result_uint8x16 [] = { 0, 0, 0, 0, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } + +VCGE/VCGEQ output: +VCGE/VCGEQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, ff, ff, } +VCGE/VCGEQ:1:result_uint16x4 [] = { 0, 0, ffff, ffff, } +VCGE/VCGEQ:2:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ:3:result_uint8x8 [] = { 0, 0, 0, ff, ff, ff, ff, ff, } +VCGE/VCGEQ:4:result_uint16x4 [] = { 0, 0, ffff, ffff, } +VCGE/VCGEQ:5:result_uint32x2 [] = { 0, ffffffff, } +VCGE/VCGEQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, ff, ff, ff, } +VCGE/VCGEQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, ffff, } +VCGE/VCGEQ:8:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } +VCGE/VCGEQ:9:result_uint8x16 [] = { 0, 0, 0, 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VCGE/VCGEQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, ffff, } +VCGE/VCGEQ:11:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } +VCGE/VCGEQ:12:result_uint32x2 [] = { 0, ffffffff, } +VCGE/VCGEQ:13:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } +VCGE/VCGEQ:14:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ:15:result_uint32x2 [] = { 0, ffffffff, } +VCGE/VCGEQ:16:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCGE/VCGEQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCGE/VCGEQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCGE/VCGEQ FP special (inf):20:result_uint32x2 [] = { 0, 0, } +VCGE/VCGEQ FP special (-inf):21:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ FP special (inf):22:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ FP special (-0.0):23:result_uint32x2 [] = { ffffffff, ffffffff, } + +VCLE/VCLEQ output: +VCLE/VCLEQ:0:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, 0, } +VCLE/VCLEQ:1:result_uint16x4 [] = { ffff, ffff, ffff, 0, } +VCLE/VCLEQ:2:result_uint32x2 [] = { ffffffff, 0, } +VCLE/VCLEQ:3:result_uint8x8 [] = { ff, ff, ff, ff, 0, 0, 0, 0, } +VCLE/VCLEQ:4:result_uint16x4 [] = { ffff, ffff, ffff, 0, } +VCLE/VCLEQ:5:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ:6:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, 0, 0, 0, } +VCLE/VCLEQ:7:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, 0, } +VCLE/VCLEQ:8:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } +VCLE/VCLEQ:9:result_uint8x16 [] = { ff, ff, ff, ff, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCLE/VCLEQ:10:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, 0, } +VCLE/VCLEQ:11:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } +VCLE/VCLEQ:12:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ:13:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } +VCLE/VCLEQ:14:result_uint32x2 [] = { ffffffff, 0, } +VCLE/VCLEQ:15:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ:16:result_uint32x2 [] = { ffffffff, 0, } +VCLE/VCLEQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (inf):20:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ FP special (-inf):21:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (inf):22:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (-0.0):23:result_uint32x2 [] = { ffffffff, ffffffff, } + +VCGT/VCGTQ output: +VCGT/VCGTQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, ff, } +VCGT/VCGTQ:1:result_uint16x4 [] = { 0, 0, 0, ffff, } +VCGT/VCGTQ:2:result_uint32x2 [] = { 0, ffffffff, } +VCGT/VCGTQ:3:result_uint8x8 [] = { 0, 0, 0, 0, ff, ff, ff, ff, } +VCGT/VCGTQ:4:result_uint16x4 [] = { 0, 0, 0, ffff, } +VCGT/VCGTQ:5:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, ff, ff, } +VCGT/VCGTQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, ffff, } +VCGT/VCGTQ:8:result_uint32x4 [] = { 0, 0, 0, ffffffff, } +VCGT/VCGTQ:9:result_uint8x16 [] = { 0, 0, 0, 0, 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VCGT/VCGTQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, ffff, } +VCGT/VCGTQ:11:result_uint32x4 [] = { 0, 0, 0, ffffffff, } +VCGT/VCGTQ:12:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ:13:result_uint32x4 [] = { 0, 0, 0, ffffffff, } +VCGT/VCGTQ:14:result_uint32x2 [] = { 0, ffffffff, } +VCGT/VCGTQ:15:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ:16:result_uint32x2 [] = { 0, ffffffff, } +VCGT/VCGTQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ FP special (inf):20:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ FP special (-inf):21:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGT/VCGTQ FP special (inf):22:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGT/VCGTQ FP special (-0.0):23:result_uint32x2 [] = { 0, 0, } + +VCLT/VCLTQ output: +VCLT/VCLTQ:0:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, 0, 0, } +VCLT/VCLTQ:1:result_uint16x4 [] = { ffff, ffff, 0, 0, } +VCLT/VCLTQ:2:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ:3:result_uint8x8 [] = { ff, ff, ff, 0, 0, 0, 0, 0, } +VCLT/VCLTQ:4:result_uint16x4 [] = { ffff, ffff, 0, 0, } +VCLT/VCLTQ:5:result_uint32x2 [] = { ffffffff, 0, } +VCLT/VCLTQ:6:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, 0, 0, 0, 0, } +VCLT/VCLTQ:7:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, 0, 0, } +VCLT/VCLTQ:8:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } +VCLT/VCLTQ:9:result_uint8x16 [] = { ff, ff, ff, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCLT/VCLTQ:10:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, 0, 0, } +VCLT/VCLTQ:11:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } +VCLT/VCLTQ:12:result_uint32x2 [] = { ffffffff, 0, } +VCLT/VCLTQ:13:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } +VCLT/VCLTQ:14:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ:15:result_uint32x2 [] = { ffffffff, 0, } +VCLT/VCLTQ:16:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (inf):20:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLT/VCLTQ FP special (-inf):21:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (inf):22:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (-0.0):23:result_uint32x2 [] = { 0, 0, } + +VBSL/VBSLQ output: +VBSL/VBSLQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, } +VBSL/VBSLQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2, } +VBSL/VBSLQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VBSL/VBSLQ:3:result_int64x1 [] = { fffffffffffffffd, } +VBSL/VBSLQ:4:result_uint8x8 [] = { f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:5:result_uint16x4 [] = { fff0, fff0, fff2, fff2, } +VBSL/VBSLQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VBSL/VBSLQ:7:result_uint64x1 [] = { fffffff1, } +VBSL/VBSLQ:8:result_poly8x8 [] = { f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:9:result_poly16x4 [] = { fff0, fff0, fff2, fff2, } +VBSL/VBSLQ:10:result_float32x2 [] = { c1800004, c1700004, } +VBSL/VBSLQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VBSL/VBSLQ:12:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, } +VBSL/VBSLQ:13:result_int16x8 [] = { fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, } +VBSL/VBSLQ:14:result_int32x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2, } +VBSL/VBSLQ:15:result_int64x2 [] = { fffffffffffffffd, fffffffffffffffd, } +VBSL/VBSLQ:16:result_uint8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:17:result_uint16x8 [] = { fff0, fff0, fff2, fff2, fff4, fff4, fff6, fff6, } +VBSL/VBSLQ:18:result_uint32x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2, } +VBSL/VBSLQ:19:result_uint64x2 [] = { fffffff1, fffffff1, } +VBSL/VBSLQ:20:result_poly8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:21:result_poly16x8 [] = { fff0, fff0, fff2, fff2, fff4, fff4, fff6, fff6, } +VBSL/VBSLQ:22:result_float32x4 [] = { c1800001, c1700001, c1600001, c1500001, } +VBSL/VBSLQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHL/VSHLQ output: +VSHL/VSHLQ:0:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHL/VSHLQ:1:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VSHL/VSHLQ:2:result_int32x2 [] = { fffff000, fffff100, } +VSHL/VSHLQ:3:result_int64x1 [] = { ffffffffffffff80, } +VSHL/VSHLQ:4:result_uint8x8 [] = { e0, e2, e4, e6, e8, ea, ec, ee, } +VSHL/VSHLQ:5:result_uint16x4 [] = { ff80, ff88, ff90, ff98, } +VSHL/VSHLQ:6:result_uint32x2 [] = { fffff000, fffff100, } +VSHL/VSHLQ:7:result_uint64x1 [] = { ffffffffffffff80, } +VSHL/VSHLQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:10:result_float32x2 [] = { 33333333, 33333333, } +VSHL/VSHLQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:12:result_int8x16 [] = { 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, } +VSHL/VSHLQ:13:result_int16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VSHL/VSHLQ:14:result_int32x4 [] = { 0, 40000000, 80000000, c0000000, } +VSHL/VSHLQ:15:result_int64x2 [] = { 0, 8000000000000000, } +VSHL/VSHLQ:16:result_uint8x16 [] = { 0, 20, 40, 60, 80, a0, c0, e0, 0, 20, 40, 60, 80, a0, c0, e0, } +VSHL/VSHLQ:17:result_uint16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VSHL/VSHLQ:18:result_uint32x4 [] = { 0, 40000000, 80000000, c0000000, } +VSHL/VSHLQ:19:result_uint64x2 [] = { 0, 8000000000000000, } +VSHL/VSHLQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHL/VSHLQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHL/VSHLQ (large shift amount) output: +VSHL/VSHLQ:24:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:25:result_int16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:26:result_int32x2 [] = { 0, 0, } +VSHL/VSHLQ:27:result_int64x1 [] = { 0, } +VSHL/VSHLQ:28:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:29:result_uint16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:30:result_uint32x2 [] = { 0, 0, } +VSHL/VSHLQ:31:result_uint64x1 [] = { 0, } +VSHL/VSHLQ:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:34:result_float32x2 [] = { 33333333, 33333333, } +VSHL/VSHLQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:36:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:37:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:38:result_int32x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:39:result_int64x2 [] = { 0, 0, } +VSHL/VSHLQ:40:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:41:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:42:result_uint32x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:43:result_uint64x2 [] = { 0, 0, } +VSHL/VSHLQ:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHL/VSHLQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHL/VSHLQ (negative shift amount) output: +VSHL/VSHLQ:48:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VSHL/VSHLQ:49:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9, } +VSHL/VSHLQ:50:result_int32x2 [] = { fffffffc, fffffffc, } +VSHL/VSHLQ:51:result_int64x1 [] = { ffffffffffffffff, } +VSHL/VSHLQ:52:result_uint8x8 [] = { 78, 78, 79, 79, 7a, 7a, 7b, 7b, } +VSHL/VSHLQ:53:result_uint16x4 [] = { 7ff8, 7ff8, 7ff9, 7ff9, } +VSHL/VSHLQ:54:result_uint32x2 [] = { 3ffffffc, 3ffffffc, } +VSHL/VSHLQ:55:result_uint64x1 [] = { fffffffffffffff, } +VSHL/VSHLQ:56:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:57:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:58:result_float32x2 [] = { 33333333, 33333333, } +VSHL/VSHLQ:59:result_float16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:60:result_int8x16 [] = { fffffffc, fffffffc, fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe, fffffffe, fffffffe, ffffffff, ffffffff, ffffffff, ffffffff, } +VSHL/VSHLQ:61:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VSHL/VSHLQ:62:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } +VSHL/VSHLQ:63:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VSHL/VSHLQ:64:result_uint8x16 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, } +VSHL/VSHLQ:65:result_uint16x8 [] = { 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, } +VSHL/VSHLQ:66:result_uint32x4 [] = { 1ffffffe, 1ffffffe, 1ffffffe, 1ffffffe, } +VSHL/VSHLQ:67:result_uint64x2 [] = { 7ffffffffffffff, 7ffffffffffffff, } +VSHL/VSHLQ:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHL/VSHLQ:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHL_N output: +VSHL_N:0:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHL_N:1:result_int16x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VSHL_N:2:result_int32x2 [] = { ffffff80, ffffff88, } +VSHL_N:3:result_int64x1 [] = { ffffffffffffffc0, } +VSHL_N:4:result_uint8x8 [] = { c0, c4, c8, cc, d0, d4, d8, dc, } +VSHL_N:5:result_uint16x4 [] = { ff00, ff10, ff20, ff30, } +VSHL_N:6:result_uint32x2 [] = { ffffff80, ffffff88, } +VSHL_N:7:result_uint64x1 [] = { ffffffffffffffe0, } +VSHL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHL_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSHL_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSHL_N:12:result_int8x16 [] = { 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, } +VSHL_N:13:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHL_N:14:result_int32x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, } +VSHL_N:15:result_int64x2 [] = { ffffffffffffffc0, ffffffffffffffc4, } +VSHL_N:16:result_uint8x16 [] = { c0, c4, c8, cc, d0, d4, d8, dc, e0, e4, e8, ec, f0, f4, f8, fc, } +VSHL_N:17:result_uint16x8 [] = { ff80, ff88, ff90, ff98, ffa0, ffa8, ffb0, ffb8, } +VSHL_N:18:result_uint32x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, } +VSHL_N:19:result_uint64x2 [] = { ffffffffffffffe0, ffffffffffffffe2, } +VSHL_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHL_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHL_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (with input = 0) cumulative saturation output: +VQSHL/VQSHLQ:0:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:1:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:2:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:3:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:4:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:5:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:6:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:7:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:8:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:9:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:10:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:11:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:12:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:13:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:14:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:15:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (with input = 0) output: +VQSHL/VQSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:17:result_int16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:18:result_int32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:19:result_int64x1 [] = { 0, } +VQSHL/VQSHLQ:20:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:21:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:22:result_uint32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:23:result_uint64x1 [] = { 0, } +VQSHL/VQSHLQ:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:26:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:27:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:28:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:29:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:30:result_int32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:31:result_int64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:32:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:33:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:34:result_uint32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:35:result_uint64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:36:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:38:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (input 0 and negative shift amount) cumulative saturation output: +VQSHL/VQSHLQ:40:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:41:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:42:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:43:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:44:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:45:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:46:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:47:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:48:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:49:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:50:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:51:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:52:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:53:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:54:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:55:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (input 0 and negative shift amount) output: +VQSHL/VQSHLQ:56:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:57:result_int16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:58:result_int32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:59:result_int64x1 [] = { 0, } +VQSHL/VQSHLQ:60:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:61:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:62:result_uint32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:63:result_uint64x1 [] = { 0, } +VQSHL/VQSHLQ:64:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:65:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:66:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:67:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:68:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:69:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:70:result_int32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:71:result_int64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:72:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:73:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:74:result_uint32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:75:result_uint64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:76:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:77:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:78:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:79:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ cumulative saturation output: +VQSHL/VQSHLQ:80:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:81:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:82:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:83:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:84:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:85:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:86:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:87:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:88:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:89:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:90:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:91:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:92:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:93:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:94:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:95:vqshlq_u64 Neon cumulative saturation 1 + +VQSHL/VQSHLQ output: +VQSHL/VQSHLQ:96:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VQSHL/VQSHLQ:97:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VQSHL/VQSHLQ:98:result_int32x2 [] = { fffff000, fffff100, } +VQSHL/VQSHLQ:99:result_int64x1 [] = { fffffffffffffffe, } +VQSHL/VQSHLQ:100:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:101:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:102:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:103:result_uint64x1 [] = { 1ffffffffffffffe, } +VQSHL/VQSHLQ:104:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:105:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:106:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:107:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:108:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSHL/VQSHLQ:109:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQSHL/VQSHLQ:110:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQSHL/VQSHLQ:111:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQSHL/VQSHLQ:112:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:113:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:114:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:115:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:116:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:118:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (negative shift amount) cumulative saturation output: +VQSHL/VQSHLQ:120:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:121:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:122:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:123:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:124:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:125:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:126:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:127:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:128:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:129:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:130:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:131:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:132:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:133:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:134:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:135:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (negative shift amount) output: +VQSHL/VQSHLQ:136:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VQSHL/VQSHLQ:137:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffc, } +VQSHL/VQSHLQ:138:result_int32x2 [] = { fffffffe, fffffffe, } +VQSHL/VQSHLQ:139:result_int64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:140:result_uint8x8 [] = { 78, 78, 79, 79, 7a, 7a, 7b, 7b, } +VQSHL/VQSHLQ:141:result_uint16x4 [] = { 3ffc, 3ffc, 3ffc, 3ffc, } +VQSHL/VQSHLQ:142:result_uint32x2 [] = { 1ffffffe, 1ffffffe, } +VQSHL/VQSHLQ:143:result_uint64x1 [] = { fffffffffffffff, } +VQSHL/VQSHLQ:144:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:145:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:146:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:147:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:148:result_int8x16 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:149:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:150:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:151:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:152:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VQSHL/VQSHLQ:153:result_uint16x8 [] = { 1f, 1f, 1f, 1f, 1f, 1f, 1f, 1f, } +VQSHL/VQSHLQ:154:result_uint32x4 [] = { 7ffff, 7ffff, 7ffff, 7ffff, } +VQSHL/VQSHLQ:155:result_uint64x2 [] = { fffffffffff, fffffffffff, } +VQSHL/VQSHLQ:156:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:157:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:158:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:159:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (large shift amount, negative input) cumulative saturation output: +VQSHL/VQSHLQ:160:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:161:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:162:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:163:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:164:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:165:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:166:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:167:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:168:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:169:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:170:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:171:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:172:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:173:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:174:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:175:vqshlq_u64 Neon cumulative saturation 1 + +VQSHL/VQSHLQ (large shift amount, negative input) output: +VQSHL/VQSHLQ:176:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSHL/VQSHLQ:177:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQSHL/VQSHLQ:178:result_int32x2 [] = { 80000000, 80000000, } +VQSHL/VQSHLQ:179:result_int64x1 [] = { 8000000000000000, } +VQSHL/VQSHLQ:180:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:181:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:182:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:183:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:184:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:185:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:186:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:187:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:188:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSHL/VQSHLQ:189:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQSHL/VQSHLQ:190:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQSHL/VQSHLQ:191:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQSHL/VQSHLQ:192:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:193:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:194:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:195:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:196:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:197:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:198:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:199:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (check cumulative saturation) cumulative saturation output: +VQSHL/VQSHLQ:200:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:201:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:202:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:203:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:204:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:205:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:206:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:207:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:208:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:209:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:210:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:211:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:212:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:213:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:214:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:215:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (check cumulative saturation) output: +VQSHL/VQSHLQ:216:result_int8x8 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, } +VQSHL/VQSHLQ:217:result_int16x4 [] = { 3fff, 3fff, 3fff, 3fff, } +VQSHL/VQSHLQ:218:result_int32x2 [] = { 3fffffff, 3fffffff, } +VQSHL/VQSHLQ:219:result_int64x1 [] = { 3fffffffffffffff, } +VQSHL/VQSHLQ:220:result_uint8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:221:result_uint16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:222:result_uint32x2 [] = { 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:223:result_uint64x1 [] = { 7fffffffffffffff, } +VQSHL/VQSHLQ:224:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:225:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:226:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:227:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:228:result_int8x16 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, } +VQSHL/VQSHLQ:229:result_int16x8 [] = { 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, } +VQSHL/VQSHLQ:230:result_int32x4 [] = { 3fffffff, 3fffffff, 3fffffff, 3fffffff, } +VQSHL/VQSHLQ:231:result_int64x2 [] = { 3fffffffffffffff, 3fffffffffffffff, } +VQSHL/VQSHLQ:232:result_uint8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:233:result_uint16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:234:result_uint32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:235:result_uint64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL/VQSHLQ:236:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:237:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:238:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:239:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (large shift amount, positive input) cumulative saturation output: +VQSHL/VQSHLQ:240:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:241:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:242:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:243:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:244:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:245:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:246:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:247:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:248:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:249:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:250:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:251:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:252:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:253:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:254:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:255:vqshlq_u64 Neon cumulative saturation 1 + +VQSHL/VQSHLQ (large shift amount, positive input) output: +VQSHL/VQSHLQ:256:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:257:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:258:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:259:result_int64x1 [] = { 7fffffffffffffff, } +VQSHL/VQSHLQ:260:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:261:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:262:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:263:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:264:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:265:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:266:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:267:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:268:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:269:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:270:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:271:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL/VQSHLQ:272:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:273:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:274:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:275:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:276:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:277:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:278:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:279:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (check saturation on 64 bits) cumulative saturation output: +VQSHL/VQSHLQ:280:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:281:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:282:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:283:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:284:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:285:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:286:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:287:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:288:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:289:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:290:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:291:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:292:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:293:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:294:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:295:vqshlq_u64 Neon cumulative saturation 1 + +VQSHL/VQSHLQ (check saturation on 64 bits) output: +VQSHL/VQSHLQ:296:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:297:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:298:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:299:result_int64x1 [] = { 8000000000000000, } +VQSHL/VQSHLQ:300:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:301:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:302:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:303:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:304:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:305:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:306:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:307:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:308:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:309:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:310:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:311:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL/VQSHLQ:312:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:313:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:314:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:315:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:316:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:317:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:318:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:319:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL_N/VQSHLQ_N cumulative saturation output: +VQSHL_N/VQSHLQ_N:0:vqshl_n_s8 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:1:vqshl_n_s16 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:2:vqshl_n_s32 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:3:vqshl_n_s64 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:4:vqshl_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:5:vqshl_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:6:vqshl_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:7:vqshl_n_u64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:8:vqshlq_n_s8 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:9:vqshlq_n_s16 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:10:vqshlq_n_s32 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:11:vqshlq_n_s64 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:12:vqshlq_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:13:vqshlq_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:14:vqshlq_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:15:vqshlq_n_u64 Neon cumulative saturation 1 + +VQSHL_N/VQSHLQ_N output: +VQSHL_N/VQSHLQ_N:16:result_int8x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, } +VQSHL_N/VQSHLQ_N:17:result_int16x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VQSHL_N/VQSHLQ_N:18:result_int32x2 [] = { ffffffe0, ffffffe2, } +VQSHL_N/VQSHLQ_N:19:result_int64x1 [] = { ffffffffffffffc0, } +VQSHL_N/VQSHLQ_N:20:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:21:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:22:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:23:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL_N/VQSHLQ_N:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL_N/VQSHLQ_N:26:result_float32x2 [] = { 33333333, 33333333, } +VQSHL_N/VQSHLQ_N:27:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL_N/VQSHLQ_N:28:result_int8x16 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, ffffffe0, ffffffe4, ffffffe8, ffffffec, fffffff0, fffffff4, fffffff8, fffffffc, } +VQSHL_N/VQSHLQ_N:29:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VQSHL_N/VQSHLQ_N:30:result_int32x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VQSHL_N/VQSHLQ_N:31:result_int64x2 [] = { ffffffffffffffc0, ffffffffffffffc4, } +VQSHL_N/VQSHLQ_N:32:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:33:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:34:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:35:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:36:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL_N/VQSHLQ_N:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL_N/VQSHLQ_N:38:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL_N/VQSHLQ_N:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL_N/VQSHLQ_N (check saturation with large positive input) cumulative saturation output: +VQSHL_N/VQSHLQ_N:40:vqshl_n_s8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:41:vqshl_n_s16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:42:vqshl_n_s32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:43:vqshl_n_s64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:44:vqshl_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:45:vqshl_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:46:vqshl_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:47:vqshl_n_u64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:48:vqshlq_n_s8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:49:vqshlq_n_s16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:50:vqshlq_n_s32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:51:vqshlq_n_s64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:52:vqshlq_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:53:vqshlq_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:54:vqshlq_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:55:vqshlq_n_u64 Neon cumulative saturation 1 + +VQSHL_N/VQSHLQ_N (check saturation with large positive input) output: +VQSHL_N/VQSHLQ_N:56:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL_N/VQSHLQ_N:57:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL_N/VQSHLQ_N:58:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHL_N/VQSHLQ_N:59:result_int64x1 [] = { 7fffffffffffffff, } +VQSHL_N/VQSHLQ_N:60:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:61:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:62:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:63:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:64:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL_N/VQSHLQ_N:65:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL_N/VQSHLQ_N:66:result_float32x2 [] = { 33333333, 33333333, } +VQSHL_N/VQSHLQ_N:67:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL_N/VQSHLQ_N:68:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL_N/VQSHLQ_N:69:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL_N/VQSHLQ_N:70:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL_N/VQSHLQ_N:71:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL_N/VQSHLQ_N:72:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:73:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:74:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:75:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:76:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL_N/VQSHLQ_N:77:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL_N/VQSHLQ_N:78:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL_N/VQSHLQ_N:79:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (with input = 0) output: +VRSHL/VRSHLQ:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:1:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:2:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:3:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:4:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:5:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:6:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:7:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:10:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:12:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:13:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:14:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:15:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:16:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:17:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:18:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:19:result_uint64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (input 0 and negative shift amount) output: +VRSHL/VRSHLQ:24:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:25:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:26:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:27:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:28:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:29:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:30:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:31:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:34:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:36:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:37:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:38:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:39:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:40:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:41:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:42:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:43:result_uint64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ output: +VRSHL/VRSHLQ:48:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VRSHL/VRSHLQ:49:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VRSHL/VRSHLQ:50:result_int32x2 [] = { fffff000, fffff100, } +VRSHL/VRSHLQ:51:result_int64x1 [] = { fffffffffffffffe, } +VRSHL/VRSHLQ:52:result_uint8x8 [] = { e0, e2, e4, e6, e8, ea, ec, ee, } +VRSHL/VRSHLQ:53:result_uint16x4 [] = { ff80, ff88, ff90, ff98, } +VRSHL/VRSHLQ:54:result_uint32x2 [] = { fffff000, fffff100, } +VRSHL/VRSHLQ:55:result_uint64x1 [] = { 1ffffffffffffffe, } +VRSHL/VRSHLQ:56:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:57:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:58:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:59:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:60:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:61:result_int16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VRSHL/VRSHLQ:62:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:63:result_int64x2 [] = { 0, 8000000000000000, } +VRSHL/VRSHLQ:64:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:65:result_uint16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VRSHL/VRSHLQ:66:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:67:result_uint64x2 [] = { 0, 8000000000000000, } +VRSHL/VRSHLQ:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (negative shift amount) output: +VRSHL/VRSHLQ:72:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VRSHL/VRSHLQ:73:result_int16x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd, } +VRSHL/VRSHLQ:74:result_int32x2 [] = { fffffffe, fffffffe, } +VRSHL/VRSHLQ:75:result_int64x1 [] = { ffffffffffffffff, } +VRSHL/VRSHLQ:76:result_uint8x8 [] = { 78, 79, 79, 7a, 7a, 7b, 7b, 7c, } +VRSHL/VRSHLQ:77:result_uint16x4 [] = { 3ffc, 3ffc, 3ffd, 3ffd, } +VRSHL/VRSHLQ:78:result_uint32x2 [] = { 1ffffffe, 1ffffffe, } +VRSHL/VRSHLQ:79:result_uint64x1 [] = { fffffffffffffff, } +VRSHL/VRSHLQ:80:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:81:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:82:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:83:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:84:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:85:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:86:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:87:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:88:result_uint8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, } +VRSHL/VRSHLQ:89:result_uint16x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHL/VRSHLQ:90:result_uint32x4 [] = { 80000, 80000, 80000, 80000, } +VRSHL/VRSHLQ:91:result_uint64x2 [] = { 100000000000, 100000000000, } +VRSHL/VRSHLQ:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:93:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:94:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (checking round_const overflow: shift by -1) output: +VRSHL/VRSHLQ:96:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHL/VRSHLQ:97:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VRSHL/VRSHLQ:98:result_int32x2 [] = { 40000000, 40000000, } +VRSHL/VRSHLQ:99:result_int64x1 [] = { 4000000000000000, } +VRSHL/VRSHLQ:100:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHL/VRSHLQ:101:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSHL/VRSHLQ:102:result_uint32x2 [] = { 80000000, 80000000, } +VRSHL/VRSHLQ:103:result_uint64x1 [] = { 8000000000000000, } +VRSHL/VRSHLQ:104:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:105:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:106:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:107:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:108:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHL/VRSHLQ:109:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VRSHL/VRSHLQ:110:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VRSHL/VRSHLQ:111:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VRSHL/VRSHLQ:112:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHL/VRSHLQ:113:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSHL/VRSHLQ:114:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSHL/VRSHLQ:115:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSHL/VRSHLQ:116:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:118:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (checking round_const overflow: shift by -3) output: +VRSHL/VRSHLQ:120:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHL/VRSHLQ:121:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VRSHL/VRSHLQ:122:result_int32x2 [] = { 10000000, 10000000, } +VRSHL/VRSHLQ:123:result_int64x1 [] = { 1000000000000000, } +VRSHL/VRSHLQ:124:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHL/VRSHLQ:125:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSHL/VRSHLQ:126:result_uint32x2 [] = { 20000000, 20000000, } +VRSHL/VRSHLQ:127:result_uint64x1 [] = { 2000000000000000, } +VRSHL/VRSHLQ:128:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:129:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:130:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:131:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:132:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHL/VRSHLQ:133:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VRSHL/VRSHLQ:134:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VRSHL/VRSHLQ:135:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VRSHL/VRSHLQ:136:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHL/VRSHLQ:137:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSHL/VRSHLQ:138:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSHL/VRSHLQ:139:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSHL/VRSHLQ:140:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:141:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:142:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:143:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (checking negative shift amount as large as input vector width) output: +VRSHL/VRSHLQ:144:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:145:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:146:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:147:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:148:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHL/VRSHLQ:149:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSHL/VRSHLQ:150:result_uint32x2 [] = { 1, 1, } +VRSHL/VRSHLQ:151:result_uint64x1 [] = { 1, } +VRSHL/VRSHLQ:152:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:153:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:154:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:155:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:156:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:157:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:158:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:159:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:160:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHL/VRSHLQ:161:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHL/VRSHLQ:162:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSHL/VRSHLQ:163:result_uint64x2 [] = { 1, 1, } +VRSHL/VRSHLQ:164:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:165:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:166:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:167:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (large shift amount) output: +VRSHL/VRSHLQ:168:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:169:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:170:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:171:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:172:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:173:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:174:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:175:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:176:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:177:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:178:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:179:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:180:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:181:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:182:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:183:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:184:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:185:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:186:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:187:result_uint64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:188:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:189:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:190:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:191:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (large negative shift amount) output: +VRSHL/VRSHLQ:192:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:193:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:194:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:195:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:196:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:197:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:198:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:199:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:200:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:201:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:202:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:203:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:204:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:205:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:206:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:207:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:208:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:209:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHL/VRSHLQ:210:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSHL/VRSHLQ:211:result_uint64x2 [] = { 1, 1, } +VRSHL/VRSHLQ:212:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:213:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:214:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:215:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD2/VLD2Q chunk 0 output: +VLD2/VLD2Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD2/VLD2Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2/VLD2Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD2/VLD2Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD2/VLD2Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD2/VLD2Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2/VLD2Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD2/VLD2Q:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD2/VLD2Q:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD2/VLD2Q:10:result_float32x2 [] = { c1800000, c1700000, } +VLD2/VLD2Q:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD2/VLD2Q:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD2/VLD2Q:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD2/VLD2Q:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD2/VLD2Q:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:22:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VLD2/VLD2Q:23:result_float16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } + +VLD2/VLD2Q chunk 1 output: +VLD2/VLD2Q:24:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD2/VLD2Q:25:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:26:result_int32x2 [] = { fffffff2, fffffff3, } +VLD2/VLD2Q:27:result_int64x1 [] = { fffffffffffffff1, } +VLD2/VLD2Q:28:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:29:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:30:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD2/VLD2Q:31:result_uint64x1 [] = { fffffffffffffff1, } +VLD2/VLD2Q:32:result_poly8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:33:result_poly16x4 [] = { fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:34:result_float32x2 [] = { c1600000, c1500000, } +VLD2/VLD2Q:35:result_float16x4 [] = { ca00, c980, c900, c880, } +VLD2/VLD2Q:36:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD2/VLD2Q:37:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD2/VLD2Q:38:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:40:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD2/VLD2Q:41:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD2/VLD2Q:42:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:44:result_poly8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD2/VLD2Q:45:result_poly16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD2/VLD2Q:46:result_float32x4 [] = { c1400000, c1300000, c1200000, c1100000, } +VLD2/VLD2Q:47:result_float16x8 [] = { c800, c700, c600, c500, c400, c200, c000, bc00, } + +VLD3/VLD3Q chunk 0 output: +VLD3/VLD3Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD3/VLD3Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD3/VLD3Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD3/VLD3Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD3/VLD3Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD3/VLD3Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD3/VLD3Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD3/VLD3Q:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD3/VLD3Q:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD3/VLD3Q:10:result_float32x2 [] = { c1800000, c1700000, } +VLD3/VLD3Q:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD3/VLD3Q:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD3/VLD3Q:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD3/VLD3Q:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD3/VLD3Q:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:22:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VLD3/VLD3Q:23:result_float16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } + +VLD3/VLD3Q chunk 1 output: +VLD3/VLD3Q:24:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD3/VLD3Q:25:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:26:result_int32x2 [] = { fffffff2, fffffff3, } +VLD3/VLD3Q:27:result_int64x1 [] = { fffffffffffffff1, } +VLD3/VLD3Q:28:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:29:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:30:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD3/VLD3Q:31:result_uint64x1 [] = { fffffffffffffff1, } +VLD3/VLD3Q:32:result_poly8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:33:result_poly16x4 [] = { fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:34:result_float32x2 [] = { c1600000, c1500000, } +VLD3/VLD3Q:35:result_float16x4 [] = { ca00, c980, c900, c880, } +VLD3/VLD3Q:36:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD3/VLD3Q:37:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD3/VLD3Q:38:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:40:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD3/VLD3Q:41:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD3/VLD3Q:42:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:44:result_poly8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD3/VLD3Q:45:result_poly16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD3/VLD3Q:46:result_float32x4 [] = { c1400000, c1300000, c1200000, c1100000, } +VLD3/VLD3Q:47:result_float16x8 [] = { c800, c700, c600, c500, c400, c200, c000, bc00, } + +VLD3/VLD3Q chunk 2 output: +VLD3/VLD3Q:48:result_int8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:49:result_int16x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD3/VLD3Q:50:result_int32x2 [] = { fffffff4, fffffff5, } +VLD3/VLD3Q:51:result_int64x1 [] = { fffffffffffffff2, } +VLD3/VLD3Q:52:result_uint8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:53:result_uint16x4 [] = { fff8, fff9, fffa, fffb, } +VLD3/VLD3Q:54:result_uint32x2 [] = { fffffff4, fffffff5, } +VLD3/VLD3Q:55:result_uint64x1 [] = { fffffffffffffff2, } +VLD3/VLD3Q:56:result_poly8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:57:result_poly16x4 [] = { fff8, fff9, fffa, fffb, } +VLD3/VLD3Q:58:result_float32x2 [] = { c1400000, c1300000, } +VLD3/VLD3Q:59:result_float16x4 [] = { c800, c700, c600, c500, } +VLD3/VLD3Q:60:result_int8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD3/VLD3Q:61:result_int16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:62:result_int32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD3/VLD3Q:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:64:result_uint8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD3/VLD3Q:65:result_uint16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:66:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD3/VLD3Q:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:68:result_poly8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD3/VLD3Q:69:result_poly16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:70:result_float32x4 [] = { c1000000, c0e00000, c0c00000, c0a00000, } +VLD3/VLD3Q:71:result_float16x8 [] = { 0, 3c00, 4000, 4200, 4400, 4500, 4600, 4700, } + +VLD4/VLD4Q chunk 0 output: +VLD4/VLD4Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4/VLD4Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4/VLD4Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD4/VLD4Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD4/VLD4Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4/VLD4Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4/VLD4Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD4/VLD4Q:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD4/VLD4Q:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4/VLD4Q:10:result_float32x2 [] = { c1800000, c1700000, } +VLD4/VLD4Q:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD4/VLD4Q:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4/VLD4Q:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4/VLD4Q:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:22:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VLD4/VLD4Q:23:result_float16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } + +VLD4/VLD4Q chunk 1 output: +VLD4/VLD4Q:24:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:25:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:26:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4/VLD4Q:27:result_int64x1 [] = { fffffffffffffff1, } +VLD4/VLD4Q:28:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:29:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:30:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4/VLD4Q:31:result_uint64x1 [] = { fffffffffffffff1, } +VLD4/VLD4Q:32:result_poly8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:33:result_poly16x4 [] = { fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:34:result_float32x2 [] = { c1600000, c1500000, } +VLD4/VLD4Q:35:result_float16x4 [] = { ca00, c980, c900, c880, } +VLD4/VLD4Q:36:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:37:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:38:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:40:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:41:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:42:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:44:result_poly8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:45:result_poly16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:46:result_float32x4 [] = { c1400000, c1300000, c1200000, c1100000, } +VLD4/VLD4Q:47:result_float16x8 [] = { c800, c700, c600, c500, c400, c200, c000, bc00, } + +VLD4/VLD4Q chunk 2 output: +VLD4/VLD4Q:48:result_int8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:49:result_int16x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD4/VLD4Q:50:result_int32x2 [] = { fffffff4, fffffff5, } +VLD4/VLD4Q:51:result_int64x1 [] = { fffffffffffffff2, } +VLD4/VLD4Q:52:result_uint8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:53:result_uint16x4 [] = { fff8, fff9, fffa, fffb, } +VLD4/VLD4Q:54:result_uint32x2 [] = { fffffff4, fffffff5, } +VLD4/VLD4Q:55:result_uint64x1 [] = { fffffffffffffff2, } +VLD4/VLD4Q:56:result_poly8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:57:result_poly16x4 [] = { fff8, fff9, fffa, fffb, } +VLD4/VLD4Q:58:result_float32x2 [] = { c1400000, c1300000, } +VLD4/VLD4Q:59:result_float16x4 [] = { c800, c700, c600, c500, } +VLD4/VLD4Q:60:result_int8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD4/VLD4Q:61:result_int16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:62:result_int32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD4/VLD4Q:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:64:result_uint8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD4/VLD4Q:65:result_uint16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:66:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD4/VLD4Q:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:68:result_poly8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD4/VLD4Q:69:result_poly16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:70:result_float32x4 [] = { c1000000, c0e00000, c0c00000, c0a00000, } +VLD4/VLD4Q:71:result_float16x8 [] = { 0, 3c00, 4000, 4200, 4400, 4500, 4600, 4700, } + +VLD4/VLD4Q chunk 3 output: +VLD4/VLD4Q:72:result_int8x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:73:result_int16x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:74:result_int32x2 [] = { fffffff6, fffffff7, } +VLD4/VLD4Q:75:result_int64x1 [] = { fffffffffffffff3, } +VLD4/VLD4Q:76:result_uint8x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:77:result_uint16x4 [] = { fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:78:result_uint32x2 [] = { fffffff6, fffffff7, } +VLD4/VLD4Q:79:result_uint64x1 [] = { fffffffffffffff3, } +VLD4/VLD4Q:80:result_poly8x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:81:result_poly16x4 [] = { fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:82:result_float32x2 [] = { c1200000, c1100000, } +VLD4/VLD4Q:83:result_float16x4 [] = { c400, c200, c000, bc00, } +VLD4/VLD4Q:84:result_int8x16 [] = { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 2a, 2b, 2c, 2d, 2e, 2f, } +VLD4/VLD4Q:85:result_int16x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:86:result_int32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:87:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:88:result_uint8x16 [] = { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 2a, 2b, 2c, 2d, 2e, 2f, } +VLD4/VLD4Q:89:result_uint16x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:90:result_uint32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:91:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:92:result_poly8x16 [] = { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 2a, 2b, 2c, 2d, 2e, 2f, } +VLD4/VLD4Q:93:result_poly16x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:94:result_float32x4 [] = { c0800000, c0400000, c0000000, bf800000, } +VLD4/VLD4Q:95:result_float16x8 [] = { 4800, 4880, 4900, 4980, 4a00, 4a80, 4b00, 4b80, } + +VDUP_LANE/VDUP_LANEQ output: +VDUP_LANE/VDUP_LANEQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP_LANE/VDUP_LANEQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:3:result_int64x1 [] = { fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:4:result_uint8x8 [] = { f7, f7, f7, f7, f7, f7, f7, f7, } +VDUP_LANE/VDUP_LANEQ:5:result_uint16x4 [] = { fff3, fff3, fff3, fff3, } +VDUP_LANE/VDUP_LANEQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:8:result_poly8x8 [] = { f7, f7, f7, f7, f7, f7, f7, f7, } +VDUP_LANE/VDUP_LANEQ:9:result_poly16x4 [] = { fff3, fff3, fff3, fff3, } +VDUP_LANE/VDUP_LANEQ:10:result_float32x2 [] = { c1700000, c1700000, } +VDUP_LANE/VDUP_LANEQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VDUP_LANE/VDUP_LANEQ:12:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP_LANE/VDUP_LANEQ:13:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, } +VDUP_LANE/VDUP_LANEQ:14:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:16:result_uint8x16 [] = { f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, } +VDUP_LANE/VDUP_LANEQ:17:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP_LANE/VDUP_LANEQ:18:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP_LANE/VDUP_LANEQ:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:20:result_poly8x16 [] = { f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, } +VDUP_LANE/VDUP_LANEQ:21:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP_LANE/VDUP_LANEQ:22:result_float32x4 [] = { c1700000, c1700000, c1700000, c1700000, } +VDUP_LANE/VDUP_LANEQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULL_LANE cumulative saturation output: +VQDMULL_LANE:0:vqdmull_lane_s16 Neon cumulative saturation 0 +VQDMULL_LANE:1:vqdmull_lane_s32 Neon cumulative saturation 0 + +VQDMULL_LANE output: +VQDMULL_LANE:2:result_int32x4 [] = { 8000, 8000, 8000, 8000, } +VQDMULL_LANE:3:result_int64x2 [] = { 4000, 4000, } + +VQDMULL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMULL_LANE:4:vqdmull_lane_s16 Neon cumulative saturation 1 +VQDMULL_LANE:5:vqdmull_lane_s32 Neon cumulative saturation 1 + +VQDMULL_LANE (check mul cumulative saturation) output: +VQDMULL_LANE:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULL_LANE:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } + +VQDMULL_N cumulative saturation output: +VQDMULL_N:0:vqdmull_n_s16 Neon cumulative saturation 0 +VQDMULL_N:1:vqdmull_n_s32 Neon cumulative saturation 0 + +VQDMULL_N output: +VQDMULL_N:2:result_int32x4 [] = { 44000, 44000, 44000, 44000, } +VQDMULL_N:3:result_int64x2 [] = { aa000, aa000, } + +VQDMULL_N (check mul cumulative saturation) cumulative saturation output: +VQDMULL_N:4:vqdmull_n_s16 Neon cumulative saturation 1 +VQDMULL_N:5:vqdmull_n_s32 Neon cumulative saturation 1 + +VQDMULL_N (check mul cumulative saturation) output: +VQDMULL_N:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULL_N:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } + +VST1_LANE/VST1_LANEQ output: +VST1_LANE/VST1_LANEQ:0:result_int8x8 [] = { fffffff7, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:1:result_int16x4 [] = { fffffff3, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:2:result_int32x2 [] = { fffffff1, 33333333, } +VST1_LANE/VST1_LANEQ:3:result_int64x1 [] = { fffffffffffffff0, } +VST1_LANE/VST1_LANEQ:4:result_uint8x8 [] = { f6, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:5:result_uint16x4 [] = { fff2, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:6:result_uint32x2 [] = { fffffff0, 33333333, } +VST1_LANE/VST1_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VST1_LANE/VST1_LANEQ:8:result_poly8x8 [] = { f6, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:9:result_poly16x4 [] = { fff2, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:10:result_float32x2 [] = { c1700000, 33333333, } +VST1_LANE/VST1_LANEQ:11:result_float16x4 [] = { cb00, 0, 0, 0, } +VST1_LANE/VST1_LANEQ:12:result_int8x16 [] = { ffffffff, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:13:result_int16x8 [] = { fffffff5, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:14:result_int32x4 [] = { fffffff1, 33333333, 33333333, 33333333, } +VST1_LANE/VST1_LANEQ:15:result_int64x2 [] = { fffffffffffffff1, 3333333333333333, } +VST1_LANE/VST1_LANEQ:16:result_uint8x16 [] = { fa, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:17:result_uint16x8 [] = { fff4, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:18:result_uint32x4 [] = { fffffff3, 33333333, 33333333, 33333333, } +VST1_LANE/VST1_LANEQ:19:result_uint64x2 [] = { fffffffffffffff0, 3333333333333333, } +VST1_LANE/VST1_LANEQ:20:result_poly8x16 [] = { fa, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:21:result_poly16x8 [] = { fff4, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:22:result_float32x4 [] = { c1700000, 33333333, 33333333, 33333333, } +VST1_LANE/VST1_LANEQ:23:result_float16x8 [] = { c980, 0, 0, 0, 0, 0, 0, 0, } + +VSUB/VSUBQ output: +VSUB/VSUBQ:0:result_int8x8 [] = { ffffffee, ffffffef, fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, } +VSUB/VSUBQ:1:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VSUB/VSUBQ:2:result_int32x2 [] = { ffffffed, ffffffee, } +VSUB/VSUBQ:3:result_int64x1 [] = { ffffffffffffff8c, } +VSUB/VSUBQ:4:result_uint8x8 [] = { dc, dd, de, df, e0, e1, e2, e3, } +VSUB/VSUBQ:5:result_uint16x4 [] = { ffd2, ffd3, ffd4, ffd5, } +VSUB/VSUBQ:6:result_uint32x2 [] = { ffffffc8, ffffffc9, } +VSUB/VSUBQ:7:result_uint64x1 [] = { ffffffffffffffee, } +VSUB/VSUBQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUB/VSUBQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSUB/VSUBQ:10:result_float32x2 [] = { 33333333, 33333333, } +VSUB/VSUBQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSUB/VSUBQ:12:result_int8x16 [] = { fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, } +VSUB/VSUBQ:13:result_int16x8 [] = { 4, 5, 6, 7, 8, 9, a, b, } +VSUB/VSUBQ:14:result_int32x4 [] = { e, f, 10, 11, } +VSUB/VSUBQ:15:result_int64x2 [] = { ffffffffffffffd8, ffffffffffffffd9, } +VSUB/VSUBQ:16:result_uint8x16 [] = { e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef, f0, f1, f2, f3, } +VSUB/VSUBQ:17:result_uint16x8 [] = { ffed, ffee, ffef, fff0, fff1, fff2, fff3, fff4, } +VSUB/VSUBQ:18:result_uint32x4 [] = { ffffffb9, ffffffba, ffffffbb, ffffffbc, } +VSUB/VSUBQ:19:result_uint64x2 [] = { ffffffffffffffed, ffffffffffffffee, } +VSUB/VSUBQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUB/VSUBQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUB/VSUBQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUB/VSUBQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +float32: +VSUB/VSUBQ:24:result_float32x2 [] = { c00ccccd, c00ccccd, } +VSUB/VSUBQ:25:result_float32x4 [] = { c00ccccc, c00ccccc, c00ccccc, c00ccccc, } + +VQADD/VQADDQ cumulative saturation output: +VQADD/VQADDQ:0:vqadd_s8 Neon cumulative saturation 0 +VQADD/VQADDQ:1:vqadd_s16 Neon cumulative saturation 0 +VQADD/VQADDQ:2:vqadd_s32 Neon cumulative saturation 0 +VQADD/VQADDQ:3:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:4:vqadd_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:5:vqadd_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:6:vqadd_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:7:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:8:vqaddq_s8 Neon cumulative saturation 0 +VQADD/VQADDQ:9:vqaddq_s16 Neon cumulative saturation 0 +VQADD/VQADDQ:10:vqaddq_s32 Neon cumulative saturation 0 +VQADD/VQADDQ:11:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:12:vqaddq_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:13:vqaddq_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:14:vqaddq_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:15:vqaddq_u64 Neon cumulative saturation 1 + +VQADD/VQADDQ output: +VQADD/VQADDQ:16:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8, } +VQADD/VQADDQ:17:result_int16x4 [] = { 12, 13, 14, 15, } +VQADD/VQADDQ:18:result_int32x2 [] = { 23, 24, } +VQADD/VQADDQ:19:result_int64x1 [] = { 34, } +VQADD/VQADDQ:20:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:21:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:22:result_uint32x2 [] = { ffffffff, ffffffff, } +VQADD/VQADDQ:23:result_uint64x1 [] = { ffffffffffffffff, } +VQADD/VQADDQ:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQADD/VQADDQ:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQADD/VQADDQ:26:result_float32x2 [] = { 33333333, 33333333, } +VQADD/VQADDQ:27:result_float16x4 [] = { 0, 0, 0, 0, } +VQADD/VQADDQ:28:result_int8x16 [] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, } +VQADD/VQADDQ:29:result_int16x8 [] = { 12, 13, 14, 15, 16, 17, 18, 19, } +VQADD/VQADDQ:30:result_int32x4 [] = { 23, 24, 25, 26, } +VQADD/VQADDQ:31:result_int64x2 [] = { 34, 35, } +VQADD/VQADDQ:32:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:33:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:34:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQADD/VQADDQ:35:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQADD/VQADDQ:36:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQADD/VQADDQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQADD/VQADDQ:38:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQADD/VQADDQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:40:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:41:vqadd_u64 Neon cumulative saturation 0 +VQADD/VQADDQ:42:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:43:vqaddq_u64 Neon cumulative saturation 0 + +64 bits saturation: +VQADD/VQADDQ:44:result_int64x1 [] = { fffffffffffffff0, } +VQADD/VQADDQ:45:result_uint64x1 [] = { fffffffffffffff0, } +VQADD/VQADDQ:46:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQADD/VQADDQ:47:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } + +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:48:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:49:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:50:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:51:vqaddq_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:52:result_int64x1 [] = { 34, } +VQADD/VQADDQ:53:result_uint64x1 [] = { ffffffffffffffff, } +VQADD/VQADDQ:54:result_int64x2 [] = { 34, 35, } +VQADD/VQADDQ:55:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } + +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:56:vqadd_s64 Neon cumulative saturation 1 +VQADD/VQADDQ:57:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:58:vqaddq_s64 Neon cumulative saturation 1 +VQADD/VQADDQ:59:vqaddq_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:60:result_int64x1 [] = { 8000000000000000, } +VQADD/VQADDQ:61:result_uint64x1 [] = { ffffffffffffffff, } +VQADD/VQADDQ:62:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQADD/VQADDQ:63:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } + +less than 64 bits saturation: +VQADD/VQADDQ:64:vqadd_s8 Neon cumulative saturation 1 +VQADD/VQADDQ:65:vqadd_s16 Neon cumulative saturation 1 +VQADD/VQADDQ:66:vqadd_s32 Neon cumulative saturation 1 +VQADD/VQADDQ:67:vqaddq_s8 Neon cumulative saturation 1 +VQADD/VQADDQ:68:vqaddq_s16 Neon cumulative saturation 1 +VQADD/VQADDQ:69:vqaddq_s32 Neon cumulative saturation 1 +VQADD/VQADDQ:70:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQADD/VQADDQ:71:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQADD/VQADDQ:72:result_int32x2 [] = { 80000000, 80000000, } +VQADD/VQADDQ:73:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQADD/VQADDQ:74:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQADD/VQADDQ:75:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VQADD/VQADDQ less than 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:76:vqadd_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:77:vqadd_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:78:vqadd_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:79:vqaddq_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:80:vqaddq_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:81:vqaddq_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:82:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:83:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:84:result_uint32x2 [] = { ffffffff, ffffffff, } +VQADD/VQADDQ:85:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:86:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:87:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } + +VABS/VABSQ output: +VABS/VABSQ:0:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VABS/VABSQ:1:result_int16x4 [] = { 10, f, e, d, } +VABS/VABSQ:2:result_int32x2 [] = { 10, f, } +VABS/VABSQ:3:result_int64x1 [] = { 3333333333333333, } +VABS/VABSQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABS/VABSQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VABS/VABSQ:7:result_uint64x1 [] = { 3333333333333333, } +VABS/VABSQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABS/VABSQ:10:result_float32x2 [] = { 33333333, 33333333, } +VABS/VABSQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VABS/VABSQ:12:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VABS/VABSQ:13:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VABS/VABSQ:14:result_int32x4 [] = { 10, f, e, d, } +VABS/VABSQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VABS/VABSQ:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABS/VABSQ:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABS/VABSQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VABS/VABSQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABS/VABSQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABS/VABSQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +float32: +VABS/VABSQ:24:result_float32x2 [] = { 40133333, 40133333, } +VABS/VABSQ:25:result_float32x4 [] = { 4059999a, 4059999a, 4059999a, 4059999a, } + +VQABS/VQABSQ cumulative saturation output: +VQABS/VQABSQ:0:vqabs_s8 Neon cumulative saturation 0 +VQABS/VQABSQ:1:vqabs_s16 Neon cumulative saturation 0 +VQABS/VQABSQ:2:vqabs_s32 Neon cumulative saturation 0 +VQABS/VQABSQ:3:vqabsq_s8 Neon cumulative saturation 0 +VQABS/VQABSQ:4:vqabsq_s16 Neon cumulative saturation 0 +VQABS/VQABSQ:5:vqabsq_s32 Neon cumulative saturation 0 + +VQABS/VQABSQ output: +VQABS/VQABSQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VQABS/VQABSQ:7:result_int16x4 [] = { 10, f, e, d, } +VQABS/VQABSQ:8:result_int32x2 [] = { 10, f, } +VQABS/VQABSQ:9:result_int64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:13:result_uint64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:16:result_float32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQABS/VQABSQ:18:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VQABS/VQABSQ:19:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VQABS/VQABSQ:20:result_int32x4 [] = { 10, f, e, d, } +VQABS/VQABSQ:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQABS/VQABSQ:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQABS/VQABSQ:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQABS/VQABSQ cumulative saturation output: +VQABS/VQABSQ:0:vqabs_s8 Neon cumulative saturation 1 +VQABS/VQABSQ:1:vqabs_s16 Neon cumulative saturation 1 +VQABS/VQABSQ:2:vqabs_s32 Neon cumulative saturation 1 +VQABS/VQABSQ:3:vqabsq_s8 Neon cumulative saturation 1 +VQABS/VQABSQ:4:vqabsq_s16 Neon cumulative saturation 1 +VQABS/VQABSQ:5:vqabsq_s32 Neon cumulative saturation 1 + +VQABS/VQABSQ output: +VQABS/VQABSQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQABS/VQABSQ:7:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQABS/VQABSQ:8:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQABS/VQABSQ:9:result_int64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:13:result_uint64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:16:result_float32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQABS/VQABSQ:18:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQABS/VQABSQ:19:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQABS/VQABSQ:20:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQABS/VQABSQ:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQABS/VQABSQ:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQABS/VQABSQ:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCOMBINE output: +VCOMBINE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCOMBINE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VCOMBINE:2:result_int32x2 [] = { 33333333, 33333333, } +VCOMBINE:3:result_int64x1 [] = { 3333333333333333, } +VCOMBINE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCOMBINE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCOMBINE:6:result_uint32x2 [] = { 33333333, 33333333, } +VCOMBINE:7:result_uint64x1 [] = { 3333333333333333, } +VCOMBINE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCOMBINE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCOMBINE:10:result_float32x2 [] = { 33333333, 33333333, } +VCOMBINE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VCOMBINE:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 11, 11, 11, 11, 11, 11, 11, 11, } +VCOMBINE:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 22, 22, 22, 22, } +VCOMBINE:14:result_int32x4 [] = { fffffff0, fffffff1, 33, 33, } +VCOMBINE:15:result_int64x2 [] = { fffffffffffffff0, 44, } +VCOMBINE:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, 55, 55, 55, 55, 55, 55, 55, 55, } +VCOMBINE:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, 66, 66, 66, 66, } +VCOMBINE:18:result_uint32x4 [] = { fffffff0, fffffff1, 77, 77, } +VCOMBINE:19:result_uint64x2 [] = { fffffffffffffff0, 88, } +VCOMBINE:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, 55, 55, 55, 55, 55, 55, 55, 55, } +VCOMBINE:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, 66, 66, 66, 66, } +VCOMBINE:22:result_float32x4 [] = { c1800000, c1700000, 40533333, 40533333, } +VCOMBINE:23:result_float16x8 [] = { cc00, cb80, cb00, ca80, 4b80, 4b80, 4b80, 4b80, } + +VMAX/VMAXQ output: +VMAX/VMAXQ:0:result_int8x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMAX/VMAXQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff3, } +VMAX/VMAXQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VMAX/VMAXQ:3:result_int64x1 [] = { 3333333333333333, } +VMAX/VMAXQ:4:result_uint8x8 [] = { f3, f3, f3, f3, f4, f5, f6, f7, } +VMAX/VMAXQ:5:result_uint16x4 [] = { fff1, fff1, fff2, fff3, } +VMAX/VMAXQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VMAX/VMAXQ:7:result_uint64x1 [] = { 3333333333333333, } +VMAX/VMAXQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMAX/VMAXQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMAX/VMAXQ:10:result_float32x2 [] = { c1780000, c1700000, } +VMAX/VMAXQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMAX/VMAXQ:12:result_int8x16 [] = { fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VMAX/VMAXQ:13:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMAX/VMAXQ:14:result_int32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff3, } +VMAX/VMAXQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMAX/VMAXQ:16:result_uint8x16 [] = { f9, f9, f9, f9, f9, f9, f9, f9, f9, f9, fa, fb, fc, fd, fe, ff, } +VMAX/VMAXQ:17:result_uint16x8 [] = { fff2, fff2, fff2, fff3, fff4, fff5, fff6, fff7, } +VMAX/VMAXQ:18:result_uint32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff3, } +VMAX/VMAXQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMAX/VMAXQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMAX/VMAXQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMAX/VMAXQ:22:result_float32x4 [] = { c1680000, c1680000, c1600000, c1500000, } +VMAX/VMAXQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VMAX/VMAXQ FP special (NaN):24:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } +VMAX/VMAXQ FP special (-NaN):25:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } +VMAX/VMAXQ FP special (inf):26:result_float32x4 [] = { 7f800000, 7f800000, 7f800000, 7f800000, } +VMAX/VMAXQ FP special (-inf):27:result_float32x4 [] = { 3f800000, 3f800000, 3f800000, 3f800000, } +VMAX/VMAXQ FP special (-0.0):28:result_float32x4 [] = { 0, 0, 0, 0, } +VMAX/VMAXQ FP special (-0.0):29:result_float32x4 [] = { 0, 0, 0, 0, } + +VMIN/VMINQ output: +VMIN/VMINQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, } +VMIN/VMINQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff2, } +VMIN/VMINQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VMIN/VMINQ:3:result_int64x1 [] = { 3333333333333333, } +VMIN/VMINQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f3, f3, f3, f3, } +VMIN/VMINQ:5:result_uint16x4 [] = { fff0, fff1, fff1, fff1, } +VMIN/VMINQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VMIN/VMINQ:7:result_uint64x1 [] = { 3333333333333333, } +VMIN/VMINQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMIN/VMINQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMIN/VMINQ:10:result_float32x2 [] = { c1800000, c1780000, } +VMIN/VMINQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMIN/VMINQ:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, } +VMIN/VMINQ:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, } +VMIN/VMINQ:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff1, } +VMIN/VMINQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMIN/VMINQ:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f9, f9, f9, f9, f9, f9, } +VMIN/VMINQ:17:result_uint16x8 [] = { fff0, fff1, fff2, fff2, fff2, fff2, fff2, fff2, } +VMIN/VMINQ:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff1, } +VMIN/VMINQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMIN/VMINQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMIN/VMINQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMIN/VMINQ:22:result_float32x4 [] = { c1800000, c1700000, c1680000, c1680000, } +VMIN/VMINQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VMIN/VMINQ FP special (NaN):24:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } +VMIN/VMINQ FP special (-NaN):25:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } +VMIN/VMINQ FP special (inf):26:result_float32x4 [] = { 3f800000, 3f800000, 3f800000, 3f800000, } +VMIN/VMINQ FP special (-inf):27:result_float32x4 [] = { ff800000, ff800000, ff800000, ff800000, } +VMIN/VMINQ FP special (-0.0):28:result_float32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VMIN/VMINQ FP special (-0.0):29:result_float32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VNEG/VNEGQ output: +VNEG/VNEGQ:0:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VNEG/VNEGQ:1:result_int16x4 [] = { 10, f, e, d, } +VNEG/VNEGQ:2:result_int32x2 [] = { 10, f, } +VNEG/VNEGQ:3:result_int64x1 [] = { 3333333333333333, } +VNEG/VNEGQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VNEG/VNEGQ:7:result_uint64x1 [] = { 3333333333333333, } +VNEG/VNEGQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:10:result_float32x2 [] = { 33333333, 33333333, } +VNEG/VNEGQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VNEG/VNEGQ:12:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VNEG/VNEGQ:13:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VNEG/VNEGQ:14:result_int32x4 [] = { 10, f, e, d, } +VNEG/VNEGQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VNEG/VNEGQ:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VNEG/VNEGQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VNEG/VNEGQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VNEG/VNEGQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +float32: +VNEG/VNEGQ:24:result_float32x2 [] = { c0133333, c0133333, } +VNEG/VNEGQ:25:result_float32x4 [] = { c059999a, c059999a, c059999a, c059999a, } + +VQNEG/VQNEGQ cumulative saturation output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon cumulative saturation 0 +VQNEG/VQNEGQ:1:vqneg_s16 Neon cumulative saturation 0 +VQNEG/VQNEGQ:2:vqneg_s32 Neon cumulative saturation 0 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon cumulative saturation 0 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon cumulative saturation 0 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon cumulative saturation 0 + +VQNEG/VQNEGQ output: +VQNEG/VQNEGQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VQNEG/VQNEGQ:7:result_int16x4 [] = { 10, f, e, d, } +VQNEG/VQNEGQ:8:result_int32x2 [] = { 10, f, } +VQNEG/VQNEGQ:9:result_int64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:13:result_uint64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:16:result_float32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQNEG/VQNEGQ:18:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VQNEG/VQNEGQ:19:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VQNEG/VQNEGQ:20:result_int32x4 [] = { 10, f, e, d, } +VQNEG/VQNEGQ:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQNEG/VQNEGQ:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQNEG/VQNEGQ:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQNEG/VQNEGQ cumulative saturation output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon cumulative saturation 1 +VQNEG/VQNEGQ:1:vqneg_s16 Neon cumulative saturation 1 +VQNEG/VQNEGQ:2:vqneg_s32 Neon cumulative saturation 1 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon cumulative saturation 1 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon cumulative saturation 1 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon cumulative saturation 1 + +VQNEG/VQNEGQ output: +VQNEG/VQNEGQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQNEG/VQNEGQ:7:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQNEG/VQNEGQ:8:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQNEG/VQNEGQ:9:result_int64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:13:result_uint64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:16:result_float32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQNEG/VQNEGQ:18:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQNEG/VQNEGQ:19:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQNEG/VQNEGQ:20:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQNEG/VQNEGQ:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQNEG/VQNEGQ:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQNEG/VQNEGQ:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLAL output: +VMLAL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL:2:result_int32x2 [] = { 33333333, 33333333, } +VMLAL:3:result_int64x1 [] = { 3333333333333333, } +VMLAL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLAL:7:result_uint64x1 [] = { 3333333333333333, } +VMLAL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL:10:result_float32x2 [] = { 33333333, 33333333, } +VMLAL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLAL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:13:result_int16x8 [] = { ffffe907, ffffe908, ffffe909, ffffe90a, ffffe90b, ffffe90c, ffffe90d, ffffe90e, } +VMLAL:14:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL:15:result_int64x2 [] = { 3e07, 3e08, } +VMLAL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:17:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLAL:18:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL:19:result_uint64x2 [] = { 3e07, 3e08, } +VMLAL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMLAL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLSL output: +VMLSL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL:2:result_int32x2 [] = { 33333333, 33333333, } +VMLSL:3:result_int64x1 [] = { 3333333333333333, } +VMLSL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLSL:7:result_uint64x1 [] = { 3333333333333333, } +VMLSL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL:10:result_float32x2 [] = { 33333333, 33333333, } +VMLSL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLSL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:13:result_int16x8 [] = { 16d9, 16da, 16db, 16dc, 16dd, 16de, 16df, 16e0, } +VMLSL:14:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL:15:result_int64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:17:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0, } +VMLSL:18:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL:19:result_uint64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMLSL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLAL_LANE output: +VMLAL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_LANE:2:result_int32x2 [] = { 33333333, 33333333, } +VMLAL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLAL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_LANE:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLAL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLAL_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_LANE:10:result_float32x2 [] = { 33333333, 33333333, } +VMLAL_LANE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLAL_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_LANE:14:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL_LANE:15:result_int64x2 [] = { 3e07, 3e08, } +VMLAL_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_LANE:18:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL_LANE:19:result_uint64x2 [] = { 3e07, 3e08, } +VMLAL_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_LANE:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMLAL_LANE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLSL_LANE output: +VMLSL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_LANE:2:result_int32x2 [] = { 33333333, 33333333, } +VMLSL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLSL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_LANE:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLSL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLSL_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_LANE:10:result_float32x2 [] = { 33333333, 33333333, } +VMLSL_LANE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLSL_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_LANE:14:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL_LANE:15:result_int64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_LANE:18:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL_LANE:19:result_uint64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_LANE:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMLSL_LANE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLAL_N output: +VMLAL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_N:2:result_int32x2 [] = { 33333333, 33333333, } +VMLAL_N:3:result_int64x1 [] = { 3333333333333333, } +VMLAL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_N:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLAL_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLAL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_N:10:result_float32x2 [] = { 33333333, 33333333, } +VMLAL_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLAL_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_N:14:result_int32x4 [] = { 595, 596, 597, 598, } +VMLAL_N:15:result_int64x2 [] = { b3a, b3b, } +VMLAL_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_N:18:result_uint32x4 [] = { 10df, 10e0, 10e1, 10e2, } +VMLAL_N:19:result_uint64x2 [] = { 10df, 10e0, } +VMLAL_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMLAL_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLSL_N output: +VMLSL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_N:2:result_int32x2 [] = { 33333333, 33333333, } +VMLSL_N:3:result_int64x1 [] = { 3333333333333333, } +VMLSL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_N:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLSL_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLSL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_N:10:result_float32x2 [] = { 33333333, 33333333, } +VMLSL_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLSL_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_N:14:result_int32x4 [] = { fffffa4b, fffffa4c, fffffa4d, fffffa4e, } +VMLSL_N:15:result_int64x2 [] = { fffffffffffff4a6, fffffffffffff4a7, } +VMLSL_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_N:18:result_uint32x4 [] = { ffffef01, ffffef02, ffffef03, ffffef04, } +VMLSL_N:19:result_uint64x2 [] = { ffffffffffffef01, ffffffffffffef02, } +VMLSL_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMLSL_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMOVL output: +VMOVL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVL:2:result_int32x2 [] = { 33333333, 33333333, } +VMOVL:3:result_int64x1 [] = { 3333333333333333, } +VMOVL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMOVL:7:result_uint64x1 [] = { 3333333333333333, } +VMOVL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVL:10:result_float32x2 [] = { 33333333, 33333333, } +VMOVL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMOVL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMOVL:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VMOVL:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VMOVL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:17:result_uint16x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VMOVL:18:result_uint32x4 [] = { fff0, fff1, fff2, fff3, } +VMOVL:19:result_uint64x2 [] = { fffffff0, fffffff1, } +VMOVL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMOVL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMOVN output: +VMOVN:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMOVN:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VMOVN:2:result_int32x2 [] = { fffffff0, fffffff1, } +VMOVN:3:result_int64x1 [] = { 3333333333333333, } +VMOVN:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VMOVN:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VMOVN:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VMOVN:7:result_uint64x1 [] = { 3333333333333333, } +VMOVN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVN:10:result_float32x2 [] = { 33333333, 33333333, } +VMOVN:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMOVN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMOVN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMOVN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMOVN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMOVN:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVN:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMOVN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMULL output: +VMULL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL:2:result_int32x2 [] = { 33333333, 33333333, } +VMULL:3:result_int64x1 [] = { 3333333333333333, } +VMULL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMULL:7:result_uint64x1 [] = { 3333333333333333, } +VMULL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL:10:result_float32x2 [] = { 33333333, 33333333, } +VMULL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMULL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:13:result_int16x8 [] = { 100, e1, c4, a9, 90, 79, 64, 51, } +VMULL:14:result_int32x4 [] = { 100, e1, c4, a9, } +VMULL:15:result_int64x2 [] = { 100, e1, } +VMULL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:17:result_uint16x8 [] = { e100, e2e1, e4c4, e6a9, e890, ea79, ec64, ee51, } +VMULL:18:result_uint32x4 [] = { ffe00100, ffe200e1, ffe400c4, ffe600a9, } +VMULL:19:result_uint64x2 [] = { ffffffe000000100, ffffffe2000000e1, } +VMULL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:21:result_poly16x8 [] = { 5500, 5501, 5504, 5505, 5510, 5511, 5514, 5515, } +VMULL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMULL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMULL_LANE output: +VMULL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL_LANE:2:result_int32x2 [] = { 33333333, 33333333, } +VMULL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMULL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL_LANE:6:result_uint32x2 [] = { 33333333, 33333333, } +VMULL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMULL_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL_LANE:10:result_float32x2 [] = { 33333333, 33333333, } +VMULL_LANE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMULL_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMULL_LANE:14:result_int32x4 [] = { 4000, 4000, 4000, 4000, } +VMULL_LANE:15:result_int64x2 [] = { 2000, 2000, } +VMULL_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMULL_LANE:18:result_uint32x4 [] = { 4000, 4000, 4000, 4000, } +VMULL_LANE:19:result_uint64x2 [] = { 2000, 2000, } +VMULL_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMULL_LANE:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMULL_LANE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VREV16 output: +VREV16:0:result_int8x8 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, } +VREV16:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VREV16:2:result_int32x2 [] = { 33333333, 33333333, } +VREV16:3:result_int64x1 [] = { 3333333333333333, } +VREV16:4:result_uint8x8 [] = { f1, f0, f3, f2, f5, f4, f7, f6, } +VREV16:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VREV16:6:result_uint32x2 [] = { 33333333, 33333333, } +VREV16:7:result_uint64x1 [] = { 3333333333333333, } +VREV16:8:result_poly8x8 [] = { f1, f0, f3, f2, f5, f4, f7, f6, } +VREV16:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VREV16:10:result_float32x2 [] = { 33333333, 33333333, } +VREV16:11:result_float16x4 [] = { 0, 0, 0, 0, } +VREV16:12:result_int8x16 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, fffffff9, fffffff8, fffffffb, fffffffa, fffffffd, fffffffc, ffffffff, fffffffe, } +VREV16:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VREV16:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV16:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VREV16:16:result_uint8x16 [] = { f1, f0, f3, f2, f5, f4, f7, f6, f9, f8, fb, fa, fd, fc, ff, fe, } +VREV16:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VREV16:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV16:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VREV16:20:result_poly8x16 [] = { f1, f0, f3, f2, f5, f4, f7, f6, f9, f8, fb, fa, fd, fc, ff, fe, } +VREV16:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VREV16:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV16:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VREV32 output: +VREV32:24:result_int8x8 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, } +VREV32:25:result_int16x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2, } +VREV32:26:result_int32x2 [] = { 33333333, 33333333, } +VREV32:27:result_int64x1 [] = { 3333333333333333, } +VREV32:28:result_uint8x8 [] = { f3, f2, f1, f0, f7, f6, f5, f4, } +VREV32:29:result_uint16x4 [] = { fff1, fff0, fff3, fff2, } +VREV32:30:result_uint32x2 [] = { 33333333, 33333333, } +VREV32:31:result_uint64x1 [] = { 3333333333333333, } +VREV32:32:result_poly8x8 [] = { f3, f2, f1, f0, f7, f6, f5, f4, } +VREV32:33:result_poly16x4 [] = { fff1, fff0, fff3, fff2, } +VREV32:34:result_float32x2 [] = { 33333333, 33333333, } +VREV32:35:result_float16x4 [] = { 0, 0, 0, 0, } +VREV32:36:result_int8x16 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, fffffffb, fffffffa, fffffff9, fffffff8, ffffffff, fffffffe, fffffffd, fffffffc, } +VREV32:37:result_int16x8 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, } +VREV32:38:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV32:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VREV32:40:result_uint8x16 [] = { f3, f2, f1, f0, f7, f6, f5, f4, fb, fa, f9, f8, ff, fe, fd, fc, } +VREV32:41:result_uint16x8 [] = { fff1, fff0, fff3, fff2, fff5, fff4, fff7, fff6, } +VREV32:42:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV32:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VREV32:44:result_poly8x16 [] = { f3, f2, f1, f0, f7, f6, f5, f4, fb, fa, f9, f8, ff, fe, fd, fc, } +VREV32:45:result_poly16x8 [] = { fff1, fff0, fff3, fff2, fff5, fff4, fff7, fff6, } +VREV32:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV32:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VREV64 output: +VREV64:48:result_int8x8 [] = { fffffff7, fffffff6, fffffff5, fffffff4, fffffff3, fffffff2, fffffff1, fffffff0, } +VREV64:49:result_int16x4 [] = { fffffff3, fffffff2, fffffff1, fffffff0, } +VREV64:50:result_int32x2 [] = { fffffff1, fffffff0, } +VREV64:51:result_int64x1 [] = { 3333333333333333, } +VREV64:52:result_uint8x8 [] = { f7, f6, f5, f4, f3, f2, f1, f0, } +VREV64:53:result_uint16x4 [] = { fff3, fff2, fff1, fff0, } +VREV64:54:result_uint32x2 [] = { fffffff1, fffffff0, } +VREV64:55:result_uint64x1 [] = { 3333333333333333, } +VREV64:56:result_poly8x8 [] = { f7, f6, f5, f4, f3, f2, f1, f0, } +VREV64:57:result_poly16x4 [] = { fff3, fff2, fff1, fff0, } +VREV64:58:result_float32x2 [] = { c1700000, c1800000, } +VREV64:59:result_float16x4 [] = { 0, 0, 0, 0, } +VREV64:60:result_int8x16 [] = { fffffff7, fffffff6, fffffff5, fffffff4, fffffff3, fffffff2, fffffff1, fffffff0, ffffffff, fffffffe, fffffffd, fffffffc, fffffffb, fffffffa, fffffff9, fffffff8, } +VREV64:61:result_int16x8 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, } +VREV64:62:result_int32x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2, } +VREV64:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VREV64:64:result_uint8x16 [] = { f7, f6, f5, f4, f3, f2, f1, f0, ff, fe, fd, fc, fb, fa, f9, f8, } +VREV64:65:result_uint16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4, } +VREV64:66:result_uint32x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2, } +VREV64:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VREV64:68:result_poly8x16 [] = { f7, f6, f5, f4, f3, f2, f1, f0, ff, fe, fd, fc, fb, fa, f9, f8, } +VREV64:69:result_poly16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4, } +VREV64:70:result_float32x4 [] = { c1700000, c1800000, c1500000, c1600000, } +VREV64:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSRA_N output: +VSRA_N:0:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VSRA_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRA_N:2:result_int32x2 [] = { fffffffc, fffffffd, } +VSRA_N:3:result_int64x1 [] = { fffffffffffffff0, } +VSRA_N:4:result_uint8x8 [] = { 5, 6, 7, 8, 9, a, b, c, } +VSRA_N:5:result_uint16x4 [] = { fffc, fffd, fffe, ffff, } +VSRA_N:6:result_uint32x2 [] = { fffffff3, fffffff4, } +VSRA_N:7:result_uint64x1 [] = { fffffffffffffff0, } +VSRA_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSRA_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSRA_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSRA_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSRA_N:12:result_int8x16 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, } +VSRA_N:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VSRA_N:14:result_int32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VSRA_N:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRA_N:16:result_uint8x16 [] = { 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, 11, 12, 13, 14, } +VSRA_N:17:result_uint16x8 [] = { fffc, fffd, fffe, ffff, 0, 1, 2, 3, } +VSRA_N:18:result_uint32x4 [] = { fffffff3, fffffff4, fffffff5, fffffff6, } +VSRA_N:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRA_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSRA_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSRA_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSRA_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTRN/VTRNQ chunk 0 output: +VTRN/VTRNQ:0:result_int8x8 [] = { fffffff0, fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, } +VTRN/VTRNQ:1:result_int16x4 [] = { fffffff0, fffffff1, 22, 22, } +VTRN/VTRNQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VTRN/VTRNQ:3:result_int64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:4:result_uint8x8 [] = { f0, f1, 55, 55, f2, f3, 55, 55, } +VTRN/VTRNQ:5:result_uint16x4 [] = { fff0, fff1, 66, 66, } +VTRN/VTRNQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VTRN/VTRNQ:7:result_uint64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:8:result_poly8x8 [] = { f0, f1, 55, 55, f2, f3, 55, 55, } +VTRN/VTRNQ:9:result_poly16x4 [] = { fff0, fff1, 66, 66, } +VTRN/VTRNQ:10:result_float32x2 [] = { c1800000, c1700000, } +VTRN/VTRNQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTRN/VTRNQ:12:result_int8x16 [] = { fffffff0, fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, } +VTRN/VTRNQ:13:result_int16x8 [] = { fffffff0, fffffff1, 22, 22, fffffff2, fffffff3, 22, 22, } +VTRN/VTRNQ:14:result_int32x4 [] = { fffffff0, fffffff1, 33, 33, } +VTRN/VTRNQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:16:result_uint8x16 [] = { f0, f1, 55, 55, f2, f3, 55, 55, f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:17:result_uint16x8 [] = { fff0, fff1, 66, 66, fff2, fff3, 66, 66, } +VTRN/VTRNQ:18:result_uint32x4 [] = { fffffff0, fffffff1, 77, 77, } +VTRN/VTRNQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:20:result_poly8x16 [] = { f0, f1, 55, 55, f2, f3, 55, 55, f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:21:result_poly16x8 [] = { fff0, fff1, 66, 66, fff2, fff3, 66, 66, } +VTRN/VTRNQ:22:result_float32x4 [] = { c1800000, c1700000, 42073333, 42073333, } +VTRN/VTRNQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTRN/VTRNQ chunk 1 output: +VTRN/VTRNQ:24:result_int8x8 [] = { fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, } +VTRN/VTRNQ:25:result_int16x4 [] = { fffffff2, fffffff3, 22, 22, } +VTRN/VTRNQ:26:result_int32x2 [] = { 33, 33, } +VTRN/VTRNQ:27:result_int64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:28:result_uint8x8 [] = { f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:29:result_uint16x4 [] = { fff2, fff3, 66, 66, } +VTRN/VTRNQ:30:result_uint32x2 [] = { 77, 77, } +VTRN/VTRNQ:31:result_uint64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:32:result_poly8x8 [] = { f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:33:result_poly16x4 [] = { fff2, fff3, 66, 66, } +VTRN/VTRNQ:34:result_float32x2 [] = { 42066666, 42066666, } +VTRN/VTRNQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VTRN/VTRNQ:36:result_int8x16 [] = { fffffff8, fffffff9, 11, 11, fffffffa, fffffffb, 11, 11, fffffffc, fffffffd, 11, 11, fffffffe, ffffffff, 11, 11, } +VTRN/VTRNQ:37:result_int16x8 [] = { fffffff4, fffffff5, 22, 22, fffffff6, fffffff7, 22, 22, } +VTRN/VTRNQ:38:result_int32x4 [] = { fffffff2, fffffff3, 33, 33, } +VTRN/VTRNQ:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:40:result_uint8x16 [] = { f8, f9, 55, 55, fa, fb, 55, 55, fc, fd, 55, 55, fe, ff, 55, 55, } +VTRN/VTRNQ:41:result_uint16x8 [] = { fff4, fff5, 66, 66, fff6, fff7, 66, 66, } +VTRN/VTRNQ:42:result_uint32x4 [] = { fffffff2, fffffff3, 77, 77, } +VTRN/VTRNQ:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:44:result_poly8x16 [] = { f8, f9, 55, 55, fa, fb, 55, 55, fc, fd, 55, 55, fe, ff, 55, 55, } +VTRN/VTRNQ:45:result_poly16x8 [] = { fff4, fff5, 66, 66, fff6, fff7, 66, 66, } +VTRN/VTRNQ:46:result_float32x4 [] = { c1600000, c1500000, 42073333, 42073333, } +VTRN/VTRNQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VUZP/VUZPQ chunk 0 output: +VUZP/VUZPQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VUZP/VUZPQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VUZP/VUZPQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VUZP/VUZPQ:3:result_int64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VUZP/VUZPQ:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VUZP/VUZPQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VUZP/VUZPQ:7:result_uint64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VUZP/VUZPQ:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VUZP/VUZPQ:10:result_float32x2 [] = { c1800000, c1700000, } +VUZP/VUZPQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VUZP/VUZPQ:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VUZP/VUZPQ:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VUZP/VUZPQ:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VUZP/VUZPQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VUZP/VUZPQ:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VUZP/VUZPQ:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VUZP/VUZPQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VUZP/VUZPQ:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VUZP/VUZPQ:22:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VUZP/VUZPQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VUZP/VUZPQ chunk 1 output: +VUZP/VUZPQ:24:result_int8x8 [] = { 11, 11, 11, 11, 11, 11, 11, 11, } +VUZP/VUZPQ:25:result_int16x4 [] = { 22, 22, 22, 22, } +VUZP/VUZPQ:26:result_int32x2 [] = { 33, 33, } +VUZP/VUZPQ:27:result_int64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:28:result_uint8x8 [] = { 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:29:result_uint16x4 [] = { 66, 66, 66, 66, } +VUZP/VUZPQ:30:result_uint32x2 [] = { 77, 77, } +VUZP/VUZPQ:31:result_uint64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:32:result_poly8x8 [] = { 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:33:result_poly16x4 [] = { 66, 66, 66, 66, } +VUZP/VUZPQ:34:result_float32x2 [] = { 42066666, 42066666, } +VUZP/VUZPQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VUZP/VUZPQ:36:result_int8x16 [] = { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, } +VUZP/VUZPQ:37:result_int16x8 [] = { 22, 22, 22, 22, 22, 22, 22, 22, } +VUZP/VUZPQ:38:result_int32x4 [] = { 33, 33, 33, 33, } +VUZP/VUZPQ:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:40:result_uint8x16 [] = { 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:41:result_uint16x8 [] = { 66, 66, 66, 66, 66, 66, 66, 66, } +VUZP/VUZPQ:42:result_uint32x4 [] = { 77, 77, 77, 77, } +VUZP/VUZPQ:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:44:result_poly8x16 [] = { 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:45:result_poly16x8 [] = { 66, 66, 66, 66, 66, 66, 66, 66, } +VUZP/VUZPQ:46:result_float32x4 [] = { 42073333, 42073333, 42073333, 42073333, } +VUZP/VUZPQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VZIP/VZIPQ chunk 0 output: +VZIP/VZIPQ:0:result_int8x8 [] = { fffffff0, fffffff4, 11, 11, fffffff1, fffffff5, 11, 11, } +VZIP/VZIPQ:1:result_int16x4 [] = { fffffff0, fffffff2, 22, 22, } +VZIP/VZIPQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VZIP/VZIPQ:3:result_int64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:4:result_uint8x8 [] = { f0, f4, 55, 55, f1, f5, 55, 55, } +VZIP/VZIPQ:5:result_uint16x4 [] = { fff0, fff2, 66, 66, } +VZIP/VZIPQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VZIP/VZIPQ:7:result_uint64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:8:result_poly8x8 [] = { f0, f4, 55, 55, f1, f5, 55, 55, } +VZIP/VZIPQ:9:result_poly16x4 [] = { fff0, fff2, 66, 66, } +VZIP/VZIPQ:10:result_float32x2 [] = { c1800000, c1700000, } +VZIP/VZIPQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VZIP/VZIPQ:12:result_int8x16 [] = { fffffff0, fffffff8, 11, 11, fffffff1, fffffff9, 11, 11, fffffff2, fffffffa, 11, 11, fffffff3, fffffffb, 11, 11, } +VZIP/VZIPQ:13:result_int16x8 [] = { fffffff0, fffffff4, 22, 22, fffffff1, fffffff5, 22, 22, } +VZIP/VZIPQ:14:result_int32x4 [] = { fffffff0, fffffff2, 33, 33, } +VZIP/VZIPQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:16:result_uint8x16 [] = { f0, f8, 55, 55, f1, f9, 55, 55, f2, fa, 55, 55, f3, fb, 55, 55, } +VZIP/VZIPQ:17:result_uint16x8 [] = { fff0, fff4, 66, 66, fff1, fff5, 66, 66, } +VZIP/VZIPQ:18:result_uint32x4 [] = { fffffff0, fffffff2, 77, 77, } +VZIP/VZIPQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:20:result_poly8x16 [] = { f0, f8, 55, 55, f1, f9, 55, 55, f2, fa, 55, 55, f3, fb, 55, 55, } +VZIP/VZIPQ:21:result_poly16x8 [] = { fff0, fff4, 66, 66, fff1, fff5, 66, 66, } +VZIP/VZIPQ:22:result_float32x4 [] = { c1800000, c1600000, 42073333, 42073333, } +VZIP/VZIPQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VZIP/VZIPQ chunk 1 output: +VZIP/VZIPQ:24:result_int8x8 [] = { fffffff2, fffffff6, 11, 11, fffffff3, fffffff7, 11, 11, } +VZIP/VZIPQ:25:result_int16x4 [] = { fffffff1, fffffff3, 22, 22, } +VZIP/VZIPQ:26:result_int32x2 [] = { 33, 33, } +VZIP/VZIPQ:27:result_int64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:28:result_uint8x8 [] = { f2, f6, 55, 55, f3, f7, 55, 55, } +VZIP/VZIPQ:29:result_uint16x4 [] = { fff1, fff3, 66, 66, } +VZIP/VZIPQ:30:result_uint32x2 [] = { 77, 77, } +VZIP/VZIPQ:31:result_uint64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:32:result_poly8x8 [] = { f2, f6, 55, 55, f3, f7, 55, 55, } +VZIP/VZIPQ:33:result_poly16x4 [] = { fff1, fff3, 66, 66, } +VZIP/VZIPQ:34:result_float32x2 [] = { 42066666, 42066666, } +VZIP/VZIPQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VZIP/VZIPQ:36:result_int8x16 [] = { fffffff4, fffffffc, 11, 11, fffffff5, fffffffd, 11, 11, fffffff6, fffffffe, 11, 11, fffffff7, ffffffff, 11, 11, } +VZIP/VZIPQ:37:result_int16x8 [] = { fffffff2, fffffff6, 22, 22, fffffff3, fffffff7, 22, 22, } +VZIP/VZIPQ:38:result_int32x4 [] = { fffffff1, fffffff3, 33, 33, } +VZIP/VZIPQ:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:40:result_uint8x16 [] = { f4, fc, 55, 55, f5, fd, 55, 55, f6, fe, 55, 55, f7, ff, 55, 55, } +VZIP/VZIPQ:41:result_uint16x8 [] = { fff2, fff6, 66, 66, fff3, fff7, 66, 66, } +VZIP/VZIPQ:42:result_uint32x4 [] = { fffffff1, fffffff3, 77, 77, } +VZIP/VZIPQ:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:44:result_poly8x16 [] = { f4, fc, 55, 55, f5, fd, 55, 55, f6, fe, 55, 55, f7, ff, 55, 55, } +VZIP/VZIPQ:45:result_poly16x8 [] = { fff2, fff6, 66, 66, fff3, fff7, 66, 66, } +VZIP/VZIPQ:46:result_float32x4 [] = { c1700000, c1500000, 42073333, 42073333, } +VZIP/VZIPQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VREINTERPRET/VREINTERPRETQ output: +VREINTERPRET/VREINTERPRETQ:0:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:1:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:2:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:3:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:4:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:5:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:6:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:7:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:8:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:9:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, } +VREINTERPRET/VREINTERPRETQ:10:result_int16x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:11:result_int16x4 [] = { fffffff0, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:12:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, } +VREINTERPRET/VREINTERPRETQ:13:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:14:result_int16x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:15:result_int16x4 [] = { fffffff0, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:16:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, } +VREINTERPRET/VREINTERPRETQ:17:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:18:result_int32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:19:result_int32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:20:result_int32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:21:result_int32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:22:result_int32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:23:result_int32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:24:result_int32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:25:result_int32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:26:result_int32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:27:result_int64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:28:result_int64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:29:result_int64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:30:result_int64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:31:result_int64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:32:result_int64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:33:result_int64x1 [] = { fffffffffffffff0, } +VREINTERPRET/VREINTERPRETQ:34:result_int64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:35:result_int64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:36:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:37:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:38:result_uint8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:39:result_uint8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:40:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:41:result_uint8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:42:result_uint8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:43:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:44:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:45:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:46:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:47:result_uint16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:48:result_uint16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:49:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:50:result_uint16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:51:result_uint16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:52:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:53:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:54:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:55:result_uint32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:56:result_uint32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:57:result_uint32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:58:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:59:result_uint32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:60:result_uint32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:61:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:62:result_uint32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:63:result_uint64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:64:result_uint64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:65:result_uint64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:66:result_uint64x1 [] = { fffffffffffffff0, } +VREINTERPRET/VREINTERPRETQ:67:result_uint64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:68:result_uint64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:69:result_uint64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:70:result_uint64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:71:result_uint64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:72:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:73:result_poly8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:74:result_poly8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:75:result_poly8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:76:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:77:result_poly8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:78:result_poly8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:79:result_poly8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:80:result_poly8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:81:result_poly16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:82:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:83:result_poly16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:84:result_poly16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:85:result_poly16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:86:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:87:result_poly16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:88:result_poly16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:89:result_poly16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:90:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VREINTERPRET/VREINTERPRETQ:91:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, fffffff2, ffffffff, ffffffff, ffffffff, fffffff3, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:92:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:93:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VREINTERPRET/VREINTERPRETQ:94:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VREINTERPRET/VREINTERPRETQ:95:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, fffffff2, ffffffff, ffffffff, ffffffff, fffffff3, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:96:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:97:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VREINTERPRET/VREINTERPRETQ:98:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VREINTERPRET/VREINTERPRETQ:99:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe, } +VREINTERPRET/VREINTERPRETQ:100:result_int16x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:101:result_int16x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:102:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe, } +VREINTERPRET/VREINTERPRETQ:103:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:104:result_int16x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:105:result_int16x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:106:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe, } +VREINTERPRET/VREINTERPRETQ:107:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:108:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:109:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:110:result_int32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:111:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:112:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:113:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:114:result_int32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:115:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:116:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:117:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:118:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:119:result_int64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:120:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:121:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:122:result_int64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:123:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VREINTERPRET/VREINTERPRETQ:124:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:125:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:126:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:127:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:128:result_uint16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff, } +VREINTERPRET/VREINTERPRETQ:129:result_uint16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:130:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:131:result_uint16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff, } +VREINTERPRET/VREINTERPRETQ:132:result_uint16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:133:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:134:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:135:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:136:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:137:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:138:result_uint32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:139:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:140:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:141:result_uint32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:142:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:143:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:144:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:145:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:146:result_uint64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:147:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VREINTERPRET/VREINTERPRETQ:148:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:149:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:150:result_uint64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:151:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:152:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:153:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VREINTERPRET/VREINTERPRETQ:154:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VREINTERPRET/VREINTERPRETQ:155:result_uint8x16 [] = { f0, ff, ff, ff, f1, ff, ff, ff, f2, ff, ff, ff, f3, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:156:result_uint8x16 [] = { f0, ff, ff, ff, ff, ff, ff, ff, f1, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:157:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VREINTERPRET/VREINTERPRETQ:158:result_uint8x16 [] = { f0, ff, ff, ff, f1, ff, ff, ff, f2, ff, ff, ff, f3, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:159:result_uint8x16 [] = { f0, ff, ff, ff, ff, ff, ff, ff, f1, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:160:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VREINTERPRET/VREINTERPRETQ:161:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VREINTERPRET/VREINTERPRETQ:162:result_float32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:163:result_float32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:164:result_float32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:165:result_float32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:166:result_float32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:167:result_float32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:168:result_float32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:169:result_float32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:170:result_float32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:171:result_float32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:172:result_float32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:173:result_float32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:174:result_float32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:175:result_float32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:176:result_float32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:177:result_float32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:178:result_float32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:179:result_float32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:180:result_float32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:181:result_float32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:182:result_int8x8 [] = { 0, 0, ffffff80, ffffffc1, 0, 0, 70, ffffffc1, } +VREINTERPRET/VREINTERPRETQ:183:result_int16x4 [] = { 0, ffffc180, 0, ffffc170, } +VREINTERPRET/VREINTERPRETQ:184:result_int32x2 [] = { c1800000, c1700000, } +VREINTERPRET/VREINTERPRETQ:185:result_int64x1 [] = { c1700000c1800000, } +VREINTERPRET/VREINTERPRETQ:186:result_uint8x8 [] = { 0, 0, 80, c1, 0, 0, 70, c1, } +VREINTERPRET/VREINTERPRETQ:187:result_uint16x4 [] = { 0, c180, 0, c170, } +VREINTERPRET/VREINTERPRETQ:188:result_uint32x2 [] = { c1800000, c1700000, } +VREINTERPRET/VREINTERPRETQ:189:result_uint64x1 [] = { c1700000c1800000, } +VREINTERPRET/VREINTERPRETQ:190:result_poly8x8 [] = { 0, 0, 80, c1, 0, 0, 70, c1, } +VREINTERPRET/VREINTERPRETQ:191:result_poly16x4 [] = { 0, c180, 0, c170, } +VREINTERPRET/VREINTERPRETQ:192:result_int8x16 [] = { 0, 0, ffffff80, ffffffc1, 0, 0, 70, ffffffc1, 0, 0, 60, ffffffc1, 0, 0, 50, ffffffc1, } +VREINTERPRET/VREINTERPRETQ:193:result_int16x8 [] = { 0, ffffc180, 0, ffffc170, 0, ffffc160, 0, ffffc150, } +VREINTERPRET/VREINTERPRETQ:194:result_int32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VREINTERPRET/VREINTERPRETQ:195:result_int64x2 [] = { c1700000c1800000, c1500000c1600000, } +VREINTERPRET/VREINTERPRETQ:196:result_uint8x16 [] = { 0, 0, 80, c1, 0, 0, 70, c1, 0, 0, 60, c1, 0, 0, 50, c1, } +VREINTERPRET/VREINTERPRETQ:197:result_uint16x8 [] = { 0, c180, 0, c170, 0, c160, 0, c150, } +VREINTERPRET/VREINTERPRETQ:198:result_uint32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VREINTERPRET/VREINTERPRETQ:199:result_uint64x2 [] = { c1700000c1800000, c1500000c1600000, } +VREINTERPRET/VREINTERPRETQ:200:result_poly8x16 [] = { 0, 0, 80, c1, 0, 0, 70, c1, 0, 0, 60, c1, 0, 0, 50, c1, } +VREINTERPRET/VREINTERPRETQ:201:result_poly16x8 [] = { 0, c180, 0, c170, 0, c160, 0, c150, } +VREINTERPRET/VREINTERPRETQ:202:result_float16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:203:result_float16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:204:result_float16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:205:result_float16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:206:result_float16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:207:result_float16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:208:result_float16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:209:result_float16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:210:result_float16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:211:result_float16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:212:result_float16x4 [] = { 0, c180, 0, c170, } +VREINTERPRET/VREINTERPRETQ:213:result_float16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:214:result_float16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:215:result_float16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff, } +VREINTERPRET/VREINTERPRETQ:216:result_float16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:217:result_float16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:218:result_float16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:219:result_float16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff, } +VREINTERPRET/VREINTERPRETQ:220:result_float16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:221:result_float16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:222:result_float16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:223:result_float16x8 [] = { 0, c180, 0, c170, 0, c160, 0, c150, } +VREINTERPRET/VREINTERPRETQ:224:result_int8x8 [] = { 0, ffffffcc, ffffff80, ffffffcb, 0, ffffffcb, ffffff80, ffffffca, } +VREINTERPRET/VREINTERPRETQ:225:result_int16x4 [] = { ffffcc00, ffffcb80, ffffcb00, ffffca80, } +VREINTERPRET/VREINTERPRETQ:226:result_int32x2 [] = { cb80cc00, ca80cb00, } +VREINTERPRET/VREINTERPRETQ:227:result_int64x1 [] = { ca80cb00cb80cc00, } +VREINTERPRET/VREINTERPRETQ:228:result_uint8x8 [] = { 0, cc, 80, cb, 0, cb, 80, ca, } +VREINTERPRET/VREINTERPRETQ:229:result_uint16x4 [] = { cc00, cb80, cb00, ca80, } +VREINTERPRET/VREINTERPRETQ:230:result_uint32x2 [] = { cb80cc00, ca80cb00, } +VREINTERPRET/VREINTERPRETQ:231:result_uint64x1 [] = { ca80cb00cb80cc00, } +VREINTERPRET/VREINTERPRETQ:232:result_poly8x8 [] = { 0, cc, 80, cb, 0, cb, 80, ca, } +VREINTERPRET/VREINTERPRETQ:233:result_poly16x4 [] = { cc00, cb80, cb00, ca80, } +VREINTERPRET/VREINTERPRETQ:234:result_float32x2 [] = { cb80cc00, ca80cb00, } +VREINTERPRET/VREINTERPRETQ:235:result_int8x16 [] = { 0, ffffffcc, ffffff80, ffffffcb, 0, ffffffcb, ffffff80, ffffffca, 0, ffffffca, ffffff80, ffffffc9, 0, ffffffc9, ffffff80, ffffffc8, } +VREINTERPRET/VREINTERPRETQ:236:result_int16x8 [] = { ffffcc00, ffffcb80, ffffcb00, ffffca80, ffffca00, ffffc980, ffffc900, ffffc880, } +VREINTERPRET/VREINTERPRETQ:237:result_int32x4 [] = { cb80cc00, ca80cb00, c980ca00, c880c900, } +VREINTERPRET/VREINTERPRETQ:238:result_int64x2 [] = { ca80cb00cb80cc00, c880c900c980ca00, } +VREINTERPRET/VREINTERPRETQ:239:result_uint8x16 [] = { 0, cc, 80, cb, 0, cb, 80, ca, 0, ca, 80, c9, 0, c9, 80, c8, } +VREINTERPRET/VREINTERPRETQ:240:result_uint16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } +VREINTERPRET/VREINTERPRETQ:241:result_uint32x4 [] = { cb80cc00, ca80cb00, c980ca00, c880c900, } +VREINTERPRET/VREINTERPRETQ:242:result_uint64x2 [] = { ca80cb00cb80cc00, c880c900c980ca00, } +VREINTERPRET/VREINTERPRETQ:243:result_poly8x16 [] = { 0, cc, 80, cb, 0, cb, 80, ca, 0, ca, 80, c9, 0, c9, 80, c8, } +VREINTERPRET/VREINTERPRETQ:244:result_poly16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } +VREINTERPRET/VREINTERPRETQ:245:result_float32x4 [] = { cb80cc00, ca80cb00, c980ca00, c880c900, } + +VQRDMULH cumulative saturation output: +VQRDMULH:0:vqrdmulh_s16 Neon cumulative saturation 0 +VQRDMULH:1:vqrdmulh_s32 Neon cumulative saturation 0 +VQRDMULH:2:vqrdmulhq_s16 Neon cumulative saturation 0 +VQRDMULH:3:vqrdmulhq_s32 Neon cumulative saturation 0 + +VQRDMULH output: +VQRDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:5:result_int16x4 [] = { fffffff5, fffffff6, fffffff7, fffffff7, } +VQRDMULH:6:result_int32x2 [] = { 0, 0, } +VQRDMULH:7:result_int64x1 [] = { 3333333333333333, } +VQRDMULH:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:10:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH:11:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH:12:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:13:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:14:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH:15:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH:16:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:17:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRDMULH:18:result_int32x4 [] = { 0, 0, 0, 0, } +VQRDMULH:19:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:20:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:21:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:22:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:23:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:24:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:26:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH (check mul cumulative saturation) cumulative saturation output: +VQRDMULH:28:vqrdmulh_s16 Neon cumulative saturation 1 +VQRDMULH:29:vqrdmulh_s32 Neon cumulative saturation 1 +VQRDMULH:30:vqrdmulhq_s16 Neon cumulative saturation 1 +VQRDMULH:31:vqrdmulhq_s32 Neon cumulative saturation 1 + +VQRDMULH (check mul cumulative saturation) output: +VQRDMULH:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:34:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH:35:result_int64x1 [] = { 3333333333333333, } +VQRDMULH:36:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:37:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:38:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH:39:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH:40:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:41:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:42:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH:43:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH:44:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:45:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:46:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH:47:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:48:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:49:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:50:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:51:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:52:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:54:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH:56:vqrdmulh_s16 Neon cumulative saturation 0 +VQRDMULH:57:vqrdmulh_s32 Neon cumulative saturation 0 +VQRDMULH:58:vqrdmulhq_s16 Neon cumulative saturation 0 +VQRDMULH:59:vqrdmulhq_s32 Neon cumulative saturation 0 + +VQRDMULH (check rounding cumulative saturation) output: +VQRDMULH:60:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:61:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:62:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH:63:result_int64x1 [] = { 3333333333333333, } +VQRDMULH:64:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:65:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:66:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH:67:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH:68:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:69:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:70:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH:71:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH:72:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:73:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:74:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH:75:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:76:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:77:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:78:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:79:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:80:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:81:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:82:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:83:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH_LANE cumulative saturation output: +VQRDMULH_LANE:0:vqrdmulh_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:1:vqrdmulh_lane_s32 Neon cumulative saturation 0 +VQRDMULH_LANE:2:vqrdmulhq_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:3:vqrdmulhq_lane_s32 Neon cumulative saturation 0 + +VQRDMULH_LANE output: +VQRDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:5:result_int16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:6:result_int32x2 [] = { 0, 0, } +VQRDMULH_LANE:7:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:10:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:11:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:12:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:13:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:14:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:15:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:16:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:17:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRDMULH_LANE:18:result_int32x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:19:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:20:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:21:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:22:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:23:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:24:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:26:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH_LANE (check mul cumulative saturation) cumulative saturation output: +VQRDMULH_LANE:28:vqrdmulh_lane_s16 Neon cumulative saturation 1 +VQRDMULH_LANE:29:vqrdmulh_lane_s32 Neon cumulative saturation 1 +VQRDMULH_LANE:30:vqrdmulhq_lane_s16 Neon cumulative saturation 1 +VQRDMULH_LANE:31:vqrdmulhq_lane_s32 Neon cumulative saturation 1 + +VQRDMULH_LANE (check mul cumulative saturation) output: +VQRDMULH_LANE:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:34:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_LANE:35:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:36:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:37:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:38:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:39:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:40:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:41:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:42:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:43:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:44:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:45:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:46:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_LANE:47:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:48:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:49:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:50:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:51:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:52:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:54:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH_LANE (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH_LANE:56:vqrdmulh_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:57:vqrdmulh_lane_s32 Neon cumulative saturation 0 +VQRDMULH_LANE:58:vqrdmulhq_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:59:vqrdmulhq_lane_s32 Neon cumulative saturation 0 + +VQRDMULH_LANE (check rounding cumulative saturation) output: +VQRDMULH_LANE:60:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:61:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:62:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_LANE:63:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:64:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:65:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:66:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:67:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:68:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:69:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:70:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:71:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:72:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:73:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:74:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_LANE:75:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:76:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:77:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:78:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:79:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:80:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:81:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:82:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:83:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH_N cumulative saturation output: +VQRDMULH_N:0:vqrdmulh_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:1:vqrdmulh_n_s32 Neon cumulative saturation 0 +VQRDMULH_N:2:vqrdmulhq_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:3:vqrdmulhq_n_s32 Neon cumulative saturation 0 + +VQRDMULH_N output: +VQRDMULH_N:4:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffd, } +VQRDMULH_N:5:result_int32x2 [] = { fffffffe, fffffffe, } +VQRDMULH_N:6:result_int16x8 [] = { 6, 6, 6, 5, 5, 4, 4, 4, } +VQRDMULH_N:7:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } + +VQRDMULH_N (check mul cumulative saturation) cumulative saturation output: +VQRDMULH_N:8:vqrdmulh_n_s16 Neon cumulative saturation 1 +VQRDMULH_N:9:vqrdmulh_n_s32 Neon cumulative saturation 1 +VQRDMULH_N:10:vqrdmulhq_n_s16 Neon cumulative saturation 1 +VQRDMULH_N:11:vqrdmulhq_n_s32 Neon cumulative saturation 1 + +VQRDMULH_N (check mul cumulative saturation) output: +VQRDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_N:15:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_N:16:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:17:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:18:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:19:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_N:20:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:21:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:22:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:23:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_N:24:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:25:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:26:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_N:27:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:28:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:29:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:30:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_N:31:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:32:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:33:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:34:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_N:35:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH_N (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH_N:36:vqrdmulh_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:37:vqrdmulh_n_s32 Neon cumulative saturation 0 +VQRDMULH_N:38:vqrdmulhq_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:39:vqrdmulhq_n_s32 Neon cumulative saturation 0 + +VQRDMULH_N (check rounding cumulative saturation) output: +VQRDMULH_N:40:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:41:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:42:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_N:43:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_N:44:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:45:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:46:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:47:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_N:48:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:49:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:50:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:51:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_N:52:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:53:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:54:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_N:55:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:56:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:57:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:58:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_N:59:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:60:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:61:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:62:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_N:63:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (with input = 0) cumulative saturation output: +VQRSHL/VQRSHLQ:0:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:1:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:2:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:3:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:4:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:5:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:6:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:7:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:8:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:9:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:10:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:11:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:12:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:13:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:14:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:15:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (with input = 0) output: +VQRSHL/VQRSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:17:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:18:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:19:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:20:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:21:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:22:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:23:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:26:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:27:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:28:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:29:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:30:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:31:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:32:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:33:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:34:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:35:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:36:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:38:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (input 0 and negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:40:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:41:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:42:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:43:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:44:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:45:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:46:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:47:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:48:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:49:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:50:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:51:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:52:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:53:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:54:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:55:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (input 0 and negative shift amount) output: +VQRSHL/VQRSHLQ:56:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:57:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:58:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:59:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:60:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:61:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:62:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:63:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:64:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:65:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:66:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:67:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:68:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:69:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:70:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:71:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:72:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:73:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:74:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:75:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:76:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:77:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:78:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:79:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ cumulative saturation output: +VQRSHL/VQRSHLQ:80:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:81:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:82:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:83:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:84:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:85:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:86:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:87:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:88:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:89:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:90:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:91:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:92:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:93:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:94:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:95:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ output: +VQRSHL/VQRSHLQ:96:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VQRSHL/VQRSHLQ:97:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VQRSHL/VQRSHLQ:98:result_int32x2 [] = { fffff000, fffff100, } +VQRSHL/VQRSHLQ:99:result_int64x1 [] = { ffffffffffffff80, } +VQRSHL/VQRSHLQ:100:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:101:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:102:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:103:result_uint64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:104:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:105:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:106:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:107:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:108:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQRSHL/VQRSHLQ:109:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQRSHL/VQRSHLQ:110:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQRSHL/VQRSHLQ:111:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQRSHL/VQRSHLQ:112:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:113:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:114:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:115:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQRSHL/VQRSHLQ:116:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:118:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:120:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:121:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:122:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:123:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:124:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:125:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:126:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:127:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:128:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:129:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:130:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:131:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:132:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:133:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:134:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:135:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (negative shift amount) output: +VQRSHL/VQRSHLQ:136:result_int8x8 [] = { fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe, } +VQRSHL/VQRSHLQ:137:result_int16x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd, } +VQRSHL/VQRSHLQ:138:result_int32x2 [] = { fffffffe, fffffffe, } +VQRSHL/VQRSHLQ:139:result_int64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:140:result_uint8x8 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, } +VQRSHL/VQRSHLQ:141:result_uint16x4 [] = { 3ffc, 3ffc, 3ffd, 3ffd, } +VQRSHL/VQRSHLQ:142:result_uint32x2 [] = { 1ffffffe, 1ffffffe, } +VQRSHL/VQRSHLQ:143:result_uint64x1 [] = { fffffffffffffff, } +VQRSHL/VQRSHLQ:144:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:145:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:146:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:147:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:148:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:149:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:150:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:151:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:152:result_uint8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, } +VQRSHL/VQRSHLQ:153:result_uint16x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VQRSHL/VQRSHLQ:154:result_uint32x4 [] = { 80000, 80000, 80000, 80000, } +VQRSHL/VQRSHLQ:155:result_uint64x2 [] = { 100000000000, 100000000000, } +VQRSHL/VQRSHLQ:156:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:157:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:158:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:159:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) cumulative saturation output: +VQRSHL/VQRSHLQ:160:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:161:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:162:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:163:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:164:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:165:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:166:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:167:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:168:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:169:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:170:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:171:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:172:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:173:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:174:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:175:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) output: +VQRSHL/VQRSHLQ:176:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VQRSHL/VQRSHLQ:177:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VQRSHL/VQRSHLQ:178:result_int32x2 [] = { 40000000, 40000000, } +VQRSHL/VQRSHLQ:179:result_int64x1 [] = { 4000000000000000, } +VQRSHL/VQRSHLQ:180:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VQRSHL/VQRSHLQ:181:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VQRSHL/VQRSHLQ:182:result_uint32x2 [] = { 80000000, 80000000, } +VQRSHL/VQRSHLQ:183:result_uint64x1 [] = { 8000000000000000, } +VQRSHL/VQRSHLQ:184:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:185:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:186:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:187:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:188:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VQRSHL/VQRSHLQ:189:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VQRSHL/VQRSHLQ:190:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VQRSHL/VQRSHLQ:191:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VQRSHL/VQRSHLQ:192:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VQRSHL/VQRSHLQ:193:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VQRSHL/VQRSHLQ:194:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQRSHL/VQRSHLQ:195:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VQRSHL/VQRSHLQ:196:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:197:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:198:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:199:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) cumulative saturation output: +VQRSHL/VQRSHLQ:200:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:201:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:202:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:203:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:204:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:205:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:206:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:207:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:208:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:209:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:210:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:211:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:212:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:213:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:214:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:215:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) output: +VQRSHL/VQRSHLQ:216:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VQRSHL/VQRSHLQ:217:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VQRSHL/VQRSHLQ:218:result_int32x2 [] = { 10000000, 10000000, } +VQRSHL/VQRSHLQ:219:result_int64x1 [] = { 1000000000000000, } +VQRSHL/VQRSHLQ:220:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VQRSHL/VQRSHLQ:221:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VQRSHL/VQRSHLQ:222:result_uint32x2 [] = { 20000000, 20000000, } +VQRSHL/VQRSHLQ:223:result_uint64x1 [] = { 2000000000000000, } +VQRSHL/VQRSHLQ:224:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:225:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:226:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:227:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:228:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VQRSHL/VQRSHLQ:229:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VQRSHL/VQRSHLQ:230:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VQRSHL/VQRSHLQ:231:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VQRSHL/VQRSHLQ:232:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VQRSHL/VQRSHLQ:233:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VQRSHL/VQRSHLQ:234:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VQRSHL/VQRSHLQ:235:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VQRSHL/VQRSHLQ:236:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:237:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:238:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:239:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:240:vqrshl_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:241:vqrshl_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:242:vqrshl_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:243:vqrshl_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:244:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:245:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:246:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:247:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:248:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:249:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:250:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:251:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:252:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:253:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:254:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:255:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) output: +VQRSHL/VQRSHLQ:256:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHL/VQRSHLQ:257:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRSHL/VQRSHLQ:258:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRSHL/VQRSHLQ:259:result_int64x1 [] = { 7fffffffffffffff, } +VQRSHL/VQRSHLQ:260:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:261:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:262:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:263:result_uint64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:264:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:265:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:266:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:267:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:268:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHL/VQRSHLQ:269:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRSHL/VQRSHLQ:270:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRSHL/VQRSHLQ:271:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQRSHL/VQRSHLQ:272:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:273:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:274:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:275:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQRSHL/VQRSHLQ:276:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:277:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:278:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:279:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) cumulative saturation output: +VQRSHL/VQRSHLQ:280:vqrshl_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:281:vqrshl_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:282:vqrshl_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:283:vqrshl_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:284:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:285:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:286:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:287:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:288:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:289:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:290:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:291:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:292:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:293:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:294:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:295:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) output: +VQRSHL/VQRSHLQ:296:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQRSHL/VQRSHLQ:297:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQRSHL/VQRSHLQ:298:result_int32x2 [] = { 80000000, 80000000, } +VQRSHL/VQRSHLQ:299:result_int64x1 [] = { 8000000000000000, } +VQRSHL/VQRSHLQ:300:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:301:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:302:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:303:result_uint64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:304:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:305:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:306:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:307:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:308:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQRSHL/VQRSHLQ:309:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQRSHL/VQRSHLQ:310:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQRSHL/VQRSHLQ:311:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQRSHL/VQRSHLQ:312:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:313:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:314:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:315:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQRSHL/VQRSHLQ:316:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:317:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:318:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:319:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:320:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:321:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:322:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:323:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:324:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:325:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:326:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:327:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:328:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:329:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:330:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:331:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:332:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:333:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:334:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:335:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) output: +VQRSHL/VQRSHLQ:336:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:337:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:338:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:339:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:340:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:341:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:342:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:343:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:344:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:345:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:346:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:347:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:348:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:349:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:350:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:351:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:352:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:353:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:354:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:355:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:356:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:357:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:358:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:359:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) cumulative saturation output: +VQRSHL/VQRSHLQ:360:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:361:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:362:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:363:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:364:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:365:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:366:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:367:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:368:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:369:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:370:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:371:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:372:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:373:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:374:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:375:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) output: +VQRSHL/VQRSHLQ:376:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:377:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:378:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:379:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:380:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:381:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:382:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:383:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:384:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:385:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:386:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:387:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:388:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:389:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:390:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:391:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:392:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:393:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:394:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:395:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:396:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:397:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:398:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:399:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VABA/VABAQ output: +VABA/VABAQ:0:result_int8x8 [] = { fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, } +VABA/VABAQ:1:result_int16x4 [] = { 16, 17, 18, 19, } +VABA/VABAQ:2:result_int32x2 [] = { 20, 21, } +VABA/VABAQ:3:result_int64x1 [] = { 3333333333333333, } +VABA/VABAQ:4:result_uint8x8 [] = { 53, 54, 55, 56, 57, 58, 59, 5a, } +VABA/VABAQ:5:result_uint16x4 [] = { 907, 908, 909, 90a, } +VABA/VABAQ:6:result_uint32x2 [] = { ffffffe7, ffffffe8, } +VABA/VABAQ:7:result_uint64x1 [] = { 3333333333333333, } +VABA/VABAQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABA/VABAQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABA/VABAQ:10:result_float32x2 [] = { 33333333, 33333333, } +VABA/VABAQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VABA/VABAQ:12:result_int8x16 [] = { 5e, 5f, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 6a, 6b, 6c, 6d, } +VABA/VABAQ:13:result_int16x8 [] = { b9c, b9d, b9e, b9f, ba0, ba1, ba2, ba3, } +VABA/VABAQ:14:result_int32x4 [] = { 26e0, 26e1, 26e2, 26e3, } +VABA/VABAQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VABA/VABAQ:16:result_uint8x16 [] = { f8, f9, fa, fb, fc, fd, fe, ff, 0, 1, 2, 3, 4, 5, 6, 7, } +VABA/VABAQ:17:result_uint16x8 [] = { fff9, fffa, fffb, fffc, fffd, fffe, ffff, 0, } +VABA/VABAQ:18:result_uint32x4 [] = { c, d, e, f, } +VABA/VABAQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VABA/VABAQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABA/VABAQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABA/VABAQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABA/VABAQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VABAL output: +VABAL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:2:result_int32x2 [] = { 33333333, 33333333, } +VABAL:3:result_int64x1 [] = { 3333333333333333, } +VABAL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:6:result_uint32x2 [] = { 33333333, 33333333, } +VABAL:7:result_uint64x1 [] = { 3333333333333333, } +VABAL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:10:result_float32x2 [] = { 33333333, 33333333, } +VABAL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VABAL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:13:result_int16x8 [] = { fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, } +VABAL:14:result_int32x4 [] = { 16, 17, 18, 19, } +VABAL:15:result_int64x2 [] = { 20, 21, } +VABAL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:17:result_uint16x8 [] = { 53, 54, 55, 56, 57, 58, 59, 5a, } +VABAL:18:result_uint32x4 [] = { 907, 908, 909, 90a, } +VABAL:19:result_uint64x2 [] = { ffffffe7, ffffffe8, } +VABAL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABAL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABAL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VABAL test intermediate overflow output: +VABAL:24:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:25:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:26:result_int32x2 [] = { 33333333, 33333333, } +VABAL:27:result_int64x1 [] = { 3333333333333333, } +VABAL:28:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:29:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:30:result_uint32x2 [] = { 33333333, 33333333, } +VABAL:31:result_uint64x1 [] = { 3333333333333333, } +VABAL:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:34:result_float32x2 [] = { 33333333, 33333333, } +VABAL:35:result_float16x4 [] = { 0, 0, 0, 0, } +VABAL:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:37:result_int16x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6, } +VABAL:38:result_int32x4 [] = { ffef, fff0, fff1, fff2, } +VABAL:39:result_int64x2 [] = { ffffffef, fffffff0, } +VABAL:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:41:result_uint16x8 [] = { ee, ef, f0, f1, f2, f3, f4, f5, } +VABAL:42:result_uint32x4 [] = { ffe2, ffe3, ffe4, ffe5, } +VABAL:43:result_uint64x2 [] = { ffffffe7, ffffffe8, } +VABAL:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABAL:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABAL:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VABD/VABDQ output: +VABD/VABDQ:0:result_int8x8 [] = { 11, 10, f, e, d, c, b, a, } +VABD/VABDQ:1:result_int16x4 [] = { 3, 2, 1, 0, } +VABD/VABDQ:2:result_int32x2 [] = { 18, 17, } +VABD/VABDQ:3:result_int64x1 [] = { 3333333333333333, } +VABD/VABDQ:4:result_uint8x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6, } +VABD/VABDQ:5:result_uint16x4 [] = { ffe3, ffe4, ffe5, ffe6, } +VABD/VABDQ:6:result_uint32x2 [] = { ffffffe8, ffffffe9, } +VABD/VABDQ:7:result_uint64x1 [] = { 3333333333333333, } +VABD/VABDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABD/VABDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABD/VABDQ:10:result_float32x2 [] = { 41c26666, 41ba6666, } +VABD/VABDQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VABD/VABDQ:12:result_int8x16 [] = { 1a, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, f, e, d, c, b, } +VABD/VABDQ:13:result_int16x8 [] = { 4, 3, 2, 1, 0, 1, 2, 3, } +VABD/VABDQ:14:result_int32x4 [] = { 30, 2f, 2e, 2d, } +VABD/VABDQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VABD/VABDQ:16:result_uint8x16 [] = { e6, e7, e8, e9, ea, eb, ec, ed, ee, ef, f0, f1, f2, f3, f4, f5, } +VABD/VABDQ:17:result_uint16x8 [] = { ffe4, ffe5, ffe6, ffe7, ffe8, ffe9, ffea, ffeb, } +VABD/VABDQ:18:result_uint32x4 [] = { ffffffd0, ffffffd1, ffffffd2, ffffffd3, } +VABD/VABDQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VABD/VABDQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABD/VABDQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABD/VABDQ:22:result_float32x4 [] = { 42407ae1, 423c7ae1, 42387ae1, 42347ae1, } +VABD/VABDQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VABD/VABDQ FP special (-0.0):24:result_float32x4 [] = { 0, 0, 0, 0, } +VABD/VABDQ FP special (-0.0):25:result_float32x4 [] = { 0, 0, 0, 0, } + +VABDL output: +VABDL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VABDL:2:result_int32x2 [] = { 33333333, 33333333, } +VABDL:3:result_int64x1 [] = { 3333333333333333, } +VABDL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABDL:6:result_uint32x2 [] = { 33333333, 33333333, } +VABDL:7:result_uint64x1 [] = { 3333333333333333, } +VABDL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABDL:10:result_float32x2 [] = { 33333333, 33333333, } +VABDL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VABDL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:13:result_int16x8 [] = { 11, 10, f, e, d, c, b, a, } +VABDL:14:result_int32x4 [] = { 3, 2, 1, 0, } +VABDL:15:result_int64x2 [] = { 18, 17, } +VABDL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:17:result_uint16x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6, } +VABDL:18:result_uint32x4 [] = { ffe3, ffe4, ffe5, ffe6, } +VABDL:19:result_uint64x2 [] = { ffffffe8, ffffffe9, } +VABDL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABDL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABDL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VAND/VANDQ output: +VAND/VANDQ:0:result_int8x8 [] = { 0, 0, 2, 2, 0, 0, 2, 2, } +VAND/VANDQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VAND/VANDQ:2:result_int32x2 [] = { 0, 1, } +VAND/VANDQ:3:result_int64x1 [] = { 60, } +VAND/VANDQ:4:result_uint8x8 [] = { 10, 10, 10, 10, 14, 14, 14, 14, } +VAND/VANDQ:5:result_uint16x4 [] = { 10, 10, 12, 12, } +VAND/VANDQ:6:result_uint32x2 [] = { 20, 20, } +VAND/VANDQ:7:result_uint64x1 [] = { 0, } +VAND/VANDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VAND/VANDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VAND/VANDQ:10:result_float32x2 [] = { 33333333, 33333333, } +VAND/VANDQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VAND/VANDQ:12:result_int8x16 [] = { fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, } +VAND/VANDQ:13:result_int16x8 [] = { ffffffe0, ffffffe0, ffffffe0, ffffffe0, ffffffe4, ffffffe4, ffffffe4, ffffffe4, } +VAND/VANDQ:14:result_int32x4 [] = { ffffffe0, ffffffe0, ffffffe2, ffffffe2, } +VAND/VANDQ:15:result_int64x2 [] = { 10, 10, } +VAND/VANDQ:16:result_uint8x16 [] = { 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, c, c, c, c, } +VAND/VANDQ:17:result_uint16x8 [] = { 0, 1, 2, 3, 0, 1, 2, 3, } +VAND/VANDQ:18:result_uint32x4 [] = { 30, 31, 32, 33, } +VAND/VANDQ:19:result_uint64x2 [] = { 0, 1, } +VAND/VANDQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VAND/VANDQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VAND/VANDQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VAND/VANDQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VORR/VORRQ output: +VORR/VORRQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff2, fffffff3, fffffff6, fffffff7, fffffff6, fffffff7, } +VORR/VORRQ:1:result_int16x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VORR/VORRQ:2:result_int32x2 [] = { fffffff3, fffffff3, } +VORR/VORRQ:3:result_int64x1 [] = { fffffffffffffff4, } +VORR/VORRQ:4:result_uint8x8 [] = { f4, f5, f6, f7, f4, f5, f6, f7, } +VORR/VORRQ:5:result_uint16x4 [] = { fffe, ffff, fffe, ffff, } +VORR/VORRQ:6:result_uint32x2 [] = { fffffff8, fffffff9, } +VORR/VORRQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VORR/VORRQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VORR/VORRQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VORR/VORRQ:10:result_float32x2 [] = { 33333333, 33333333, } +VORR/VORRQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VORR/VORRQ:12:result_int8x16 [] = { fffffff6, fffffff7, fffffff6, fffffff7, fffffff6, fffffff7, fffffff6, fffffff7, fffffffe, ffffffff, fffffffe, ffffffff, fffffffe, ffffffff, fffffffe, ffffffff, } +VORR/VORRQ:13:result_int16x8 [] = { fffffffc, fffffffd, fffffffe, ffffffff, fffffffc, fffffffd, fffffffe, ffffffff, } +VORR/VORRQ:14:result_int32x4 [] = { fffffff2, fffffff3, fffffff2, fffffff3, } +VORR/VORRQ:15:result_int64x2 [] = { fffffffffffffff8, fffffffffffffff9, } +VORR/VORRQ:16:result_uint8x16 [] = { fc, fd, fe, ff, fc, fd, fe, ff, fc, fd, fe, ff, fc, fd, fe, ff, } +VORR/VORRQ:17:result_uint16x8 [] = { fff3, fff3, fff3, fff3, fff7, fff7, fff7, fff7, } +VORR/VORRQ:18:result_uint32x4 [] = { fffffff7, fffffff7, fffffff7, fffffff7, } +VORR/VORRQ:19:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff3, } +VORR/VORRQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VORR/VORRQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VORR/VORRQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VORR/VORRQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VORN/VORNQ output: +VORN/VORNQ:0:result_int8x8 [] = { fffffffd, fffffffd, ffffffff, ffffffff, fffffffd, fffffffd, ffffffff, ffffffff, } +VORN/VORNQ:1:result_int16x4 [] = { fffffff3, fffffff3, fffffff3, fffffff3, } +VORN/VORNQ:2:result_int32x2 [] = { fffffffc, fffffffd, } +VORN/VORNQ:3:result_int64x1 [] = { fffffffffffffffb, } +VORN/VORNQ:4:result_uint8x8 [] = { fb, fb, fb, fb, ff, ff, ff, ff, } +VORN/VORNQ:5:result_uint16x4 [] = { fff1, fff1, fff3, fff3, } +VORN/VORNQ:6:result_uint32x2 [] = { fffffff7, fffffff7, } +VORN/VORNQ:7:result_uint64x1 [] = { fffffffffffffffd, } +VORN/VORNQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VORN/VORNQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VORN/VORNQ:10:result_float32x2 [] = { 33333333, 33333333, } +VORN/VORNQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VORN/VORNQ:12:result_int8x16 [] = { fffffff9, fffffff9, fffffffb, fffffffb, fffffffd, fffffffd, ffffffff, ffffffff, fffffff9, fffffff9, fffffffb, fffffffb, fffffffd, fffffffd, ffffffff, ffffffff, } +VORN/VORNQ:13:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff7, fffffff7, fffffff7, fffffff7, } +VORN/VORNQ:14:result_int32x4 [] = { fffffffd, fffffffd, ffffffff, ffffffff, } +VORN/VORNQ:15:result_int64x2 [] = { fffffffffffffff7, fffffffffffffff7, } +VORN/VORNQ:16:result_uint8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, fb, fb, fb, fb, ff, ff, ff, ff, } +VORN/VORNQ:17:result_uint16x8 [] = { fffc, fffd, fffe, ffff, fffc, fffd, fffe, ffff, } +VORN/VORNQ:18:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VORN/VORNQ:19:result_uint64x2 [] = { fffffffffffffffc, fffffffffffffffd, } +VORN/VORNQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VORN/VORNQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VORN/VORNQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VORN/VORNQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VEOR/VEORQ output: +VEOR/VEORQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff0, fffffff1, fffffff6, fffffff7, fffffff4, fffffff5, } +VEOR/VEORQ:1:result_int16x4 [] = { c, d, e, f, } +VEOR/VEORQ:2:result_int32x2 [] = { fffffff3, fffffff2, } +VEOR/VEORQ:3:result_int64x1 [] = { ffffffffffffff94, } +VEOR/VEORQ:4:result_uint8x8 [] = { e4, e5, e6, e7, e0, e1, e2, e3, } +VEOR/VEORQ:5:result_uint16x4 [] = { ffee, ffef, ffec, ffed, } +VEOR/VEORQ:6:result_uint32x2 [] = { ffffffd8, ffffffd9, } +VEOR/VEORQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VEOR/VEORQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VEOR/VEORQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VEOR/VEORQ:10:result_float32x2 [] = { 33333333, 33333333, } +VEOR/VEORQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VEOR/VEORQ:12:result_int8x16 [] = { 6, 7, 4, 5, 2, 3, 0, 1, e, f, c, d, a, b, 8, 9, } +VEOR/VEORQ:13:result_int16x8 [] = { 1c, 1d, 1e, 1f, 18, 19, 1a, 1b, } +VEOR/VEORQ:14:result_int32x4 [] = { 12, 13, 10, 11, } +VEOR/VEORQ:15:result_int64x2 [] = { ffffffffffffffe8, ffffffffffffffe9, } +VEOR/VEORQ:16:result_uint8x16 [] = { fc, fd, fe, ff, f8, f9, fa, fb, f4, f5, f6, f7, f0, f1, f2, f3, } +VEOR/VEORQ:17:result_uint16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4, } +VEOR/VEORQ:18:result_uint32x4 [] = { ffffffc7, ffffffc6, ffffffc5, ffffffc4, } +VEOR/VEORQ:19:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff2, } +VEOR/VEORQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VEOR/VEORQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VEOR/VEORQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VEOR/VEORQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VBIC/VBICQ output: +VBIC/VBICQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff4, fffffff5, fffffff4, fffffff5, } +VBIC/VBICQ:1:result_int16x4 [] = { 0, 1, 2, 3, } +VBIC/VBICQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VBIC/VBICQ:3:result_int64x1 [] = { ffffffffffffff90, } +VBIC/VBICQ:4:result_uint8x8 [] = { e0, e1, e2, e3, e0, e1, e2, e3, } +VBIC/VBICQ:5:result_uint16x4 [] = { ffe0, ffe1, ffe0, ffe1, } +VBIC/VBICQ:6:result_uint32x2 [] = { ffffffd0, ffffffd1, } +VBIC/VBICQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VBIC/VBICQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VBIC/VBICQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VBIC/VBICQ:10:result_float32x2 [] = { 33333333, 33333333, } +VBIC/VBICQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VBIC/VBICQ:12:result_int8x16 [] = { 0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9, } +VBIC/VBICQ:13:result_int16x8 [] = { 10, 11, 12, 13, 10, 11, 12, 13, } +VBIC/VBICQ:14:result_int32x4 [] = { 10, 11, 10, 11, } +VBIC/VBICQ:15:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1, } +VBIC/VBICQ:16:result_uint8x16 [] = { f0, f1, f2, f3, f0, f1, f2, f3, f0, f1, f2, f3, f0, f1, f2, f3, } +VBIC/VBICQ:17:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff4, fff4, fff4, fff4, } +VBIC/VBICQ:18:result_uint32x4 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VBIC/VBICQ:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VBIC/VBICQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VBIC/VBICQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VBIC/VBICQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VBIC/VBICQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCREATE output: +VCREATE:0:result_int8x8 [] = { fffffff0, ffffffde, ffffffbc, ffffff9a, 78, 56, 34, 12, } +VCREATE:1:result_int16x4 [] = { ffffdef0, ffff9abc, 5678, 1234, } +VCREATE:2:result_int32x2 [] = { 9abcdef0, 12345678, } +VCREATE:3:result_int64x1 [] = { 123456789abcdef0, } +VCREATE:4:result_uint8x8 [] = { f0, de, bc, 9a, 78, 56, 34, 12, } +VCREATE:5:result_uint16x4 [] = { def0, 9abc, 5678, 1234, } +VCREATE:6:result_uint32x2 [] = { 9abcdef0, 12345678, } +VCREATE:7:result_uint64x1 [] = { 123456789abcdef0, } +VCREATE:8:result_poly8x8 [] = { f0, de, bc, 9a, 78, 56, 34, 12, } +VCREATE:9:result_poly16x4 [] = { def0, 9abc, 5678, 1234, } +VCREATE:10:result_float32x2 [] = { 9abcdef0, 12345678, } +VCREATE:11:result_float16x4 [] = { def0, 9abc, 5678, 1234, } +VCREATE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCREATE:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCREATE:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCREATE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCREATE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCREATE:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCREATE:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCREATE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCREATE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCREATE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCREATE:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCREATE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD2_LANE/VLD2Q_LANE chunk 0 output: +VLD2_LANE/VLD2Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD2_LANE/VLD2Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2_LANE/VLD2Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:8:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:9:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VLD2_LANE/VLD2Q_LANE:11:result_float16x4 [] = { cc00, cb80, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:13:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:14:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:17:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:18:result_uint32x4 [] = { fffffff0, fffffff1, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:21:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:22:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:23:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, cc00, cb80, aaaa, aaaa, } + +VLD2_LANE/VLD2Q_LANE chunk 1 output: +VLD2_LANE/VLD2Q_LANE:24:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, } +VLD2_LANE/VLD2Q_LANE:25:result_int16x4 [] = { fffffff0, fffffff1, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:26:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:27:result_int64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:28:result_uint8x8 [] = { f0, f1, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:29:result_uint16x4 [] = { aaaa, aaaa, fff0, fff1, } +VLD2_LANE/VLD2Q_LANE:30:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2_LANE/VLD2Q_LANE:31:result_uint64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:32:result_poly8x8 [] = { f0, f1, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:33:result_poly16x4 [] = { aaaa, aaaa, fff0, fff1, } +VLD2_LANE/VLD2Q_LANE:34:result_float32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:35:result_float16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:37:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, fffffff1, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:38:result_int32x4 [] = { fffffff0, fffffff1, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:41:result_uint16x8 [] = { aaaa, aaaa, fff0, fff1, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:42:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:45:result_poly16x8 [] = { aaaa, aaaa, fff0, fff1, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:46:result_float32x4 [] = { c1800000, c1700000, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:47:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } + +VLD3_LANE/VLD3Q_LANE chunk 0 output: +VLD3_LANE/VLD3Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD3_LANE/VLD3Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD3_LANE/VLD3Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:8:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:9:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VLD3_LANE/VLD3Q_LANE:11:result_float16x4 [] = { cc00, cb80, cb00, aaaa, } +VLD3_LANE/VLD3Q_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:13:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:14:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:17:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:21:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:22:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:23:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, cc00, cb80, } + +VLD3_LANE/VLD3Q_LANE chunk 1 output: +VLD3_LANE/VLD3Q_LANE:24:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD3_LANE/VLD3Q_LANE:25:result_int16x4 [] = { ffffaaaa, ffffaaaa, fffffff0, fffffff1, } +VLD3_LANE/VLD3Q_LANE:26:result_int32x2 [] = { fffffff2, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:27:result_int64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:28:result_uint8x8 [] = { aa, aa, aa, aa, f0, f1, f2, aa, } +VLD3_LANE/VLD3Q_LANE:29:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:30:result_uint32x2 [] = { aaaaaaaa, fffffff0, } +VLD3_LANE/VLD3Q_LANE:31:result_uint64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:32:result_poly8x8 [] = { aa, aa, aa, aa, f0, f1, f2, aa, } +VLD3_LANE/VLD3Q_LANE:33:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:34:result_float32x2 [] = { c1600000, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:35:result_float16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:37:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:38:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, fffffff1, } +VLD3_LANE/VLD3Q_LANE:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:41:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, } +VLD3_LANE/VLD3Q_LANE:42:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:45:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, } +VLD3_LANE/VLD3Q_LANE:46:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, c1800000, c1700000, } +VLD3_LANE/VLD3Q_LANE:47:result_float16x8 [] = { cb00, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } + +VLD3_LANE/VLD3Q_LANE chunk 2 output: +VLD3_LANE/VLD3Q_LANE:48:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, fffffff2, } +VLD3_LANE/VLD3Q_LANE:49:result_int16x4 [] = { fffffff2, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:50:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:51:result_int64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:52:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:53:result_uint16x4 [] = { aaaa, fff0, fff1, fff2, } +VLD3_LANE/VLD3Q_LANE:54:result_uint32x2 [] = { fffffff1, fffffff2, } +VLD3_LANE/VLD3Q_LANE:55:result_uint64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:56:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:57:result_poly16x4 [] = { aaaa, fff0, fff1, fff2, } +VLD3_LANE/VLD3Q_LANE:58:result_float32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:59:result_float16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:61:result_int16x8 [] = { ffffaaaa, ffffaaaa, fffffff0, fffffff1, fffffff2, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:62:result_int32x4 [] = { fffffff2, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:65:result_uint16x8 [] = { fff1, fff2, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:66:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:69:result_poly16x8 [] = { fff1, fff2, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:70:result_float32x4 [] = { c1600000, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:71:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } + +VLD4_LANE/VLD4Q_LANE chunk 0 output: +VLD4_LANE/VLD4Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD4_LANE/VLD4Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4_LANE/VLD4Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:8:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:9:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VLD4_LANE/VLD4Q_LANE:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD4_LANE/VLD4Q_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:13:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:14:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:17:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:21:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:22:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:23:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } + +VLD4_LANE/VLD4Q_LANE chunk 1 output: +VLD4_LANE/VLD4Q_LANE:24:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD4_LANE/VLD4Q_LANE:25:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:26:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:27:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:28:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:29:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:30:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:31:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:32:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:33:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:34:result_float32x2 [] = { c1600000, c1500000, } +VLD4_LANE/VLD4Q_LANE:35:result_float16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:37:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:38:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:41:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:42:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:45:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:46:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:47:result_float16x8 [] = { cc00, cb80, cb00, ca80, aaaa, aaaa, aaaa, aaaa, } + +VLD4_LANE/VLD4Q_LANE chunk 2 output: +VLD4_LANE/VLD4Q_LANE:48:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD4_LANE/VLD4Q_LANE:49:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:50:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:51:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:52:result_uint8x8 [] = { f0, f1, f2, f3, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:53:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:54:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4_LANE/VLD4Q_LANE:55:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:56:result_poly8x8 [] = { f0, f1, f2, f3, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:57:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:58:result_float32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:59:result_float16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:61:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:62:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:65:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:66:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:69:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:70:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VLD4_LANE/VLD4Q_LANE:71:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } + +VLD4_LANE/VLD4Q_LANE chunk 3 output: +VLD4_LANE/VLD4Q_LANE:72:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:73:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:74:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:75:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:76:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:77:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:78:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:79:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:80:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:81:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:82:result_float32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:83:result_float16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:84:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:85:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:86:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:87:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:88:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:89:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:90:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:91:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:93:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:94:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:95:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } + +VLD2_DUP/VLD2Q_DUP chunk 0 output: +VLD2_DUP/VLD2Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:3:result_int64x1 [] = { fffffffffffffff0, } +VLD2_DUP/VLD2Q_DUP:4:result_uint8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD2_DUP/VLD2Q_DUP:8:result_poly8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:9:result_poly16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:10:result_float32x2 [] = { c1800000, c1700000, } +VLD2_DUP/VLD2Q_DUP:11:result_float16x4 [] = { cc00, cb80, cc00, cb80, } +VLD2_DUP/VLD2Q_DUP:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD2_DUP/VLD2Q_DUP chunk 1 output: +VLD2_DUP/VLD2Q_DUP:24:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:25:result_int16x4 [] = { fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:26:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:27:result_int64x1 [] = { fffffffffffffff1, } +VLD2_DUP/VLD2Q_DUP:28:result_uint8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:29:result_uint16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:30:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:31:result_uint64x1 [] = { fffffffffffffff1, } +VLD2_DUP/VLD2Q_DUP:32:result_poly8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:33:result_poly16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:34:result_float32x2 [] = { c1800000, c1700000, } +VLD2_DUP/VLD2Q_DUP:35:result_float16x4 [] = { cc00, cb80, cc00, cb80, } +VLD2_DUP/VLD2Q_DUP:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:37:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:38:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:41:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:42:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD3_DUP/VLD3Q_DUP chunk 0 output: +VLD3_DUP/VLD3Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:3:result_int64x1 [] = { fffffffffffffff0, } +VLD3_DUP/VLD3Q_DUP:4:result_uint8x8 [] = { f0, f1, f2, f0, f1, f2, f0, f1, } +VLD3_DUP/VLD3Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff2, fff0, } +VLD3_DUP/VLD3Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD3_DUP/VLD3Q_DUP:8:result_poly8x8 [] = { f0, f1, f2, f0, f1, f2, f0, f1, } +VLD3_DUP/VLD3Q_DUP:9:result_poly16x4 [] = { fff0, fff1, fff2, fff0, } +VLD3_DUP/VLD3Q_DUP:10:result_float32x2 [] = { c1800000, c1700000, } +VLD3_DUP/VLD3Q_DUP:11:result_float16x4 [] = { cc00, cb80, cb00, cc00, } +VLD3_DUP/VLD3Q_DUP:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD3_DUP/VLD3Q_DUP chunk 1 output: +VLD3_DUP/VLD3Q_DUP:24:result_int8x8 [] = { fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:25:result_int16x4 [] = { fffffff1, fffffff2, fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:26:result_int32x2 [] = { fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:27:result_int64x1 [] = { fffffffffffffff1, } +VLD3_DUP/VLD3Q_DUP:28:result_uint8x8 [] = { f2, f0, f1, f2, f0, f1, f2, f0, } +VLD3_DUP/VLD3Q_DUP:29:result_uint16x4 [] = { fff1, fff2, fff0, fff1, } +VLD3_DUP/VLD3Q_DUP:30:result_uint32x2 [] = { fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:31:result_uint64x1 [] = { fffffffffffffff1, } +VLD3_DUP/VLD3Q_DUP:32:result_poly8x8 [] = { f2, f0, f1, f2, f0, f1, f2, f0, } +VLD3_DUP/VLD3Q_DUP:33:result_poly16x4 [] = { fff1, fff2, fff0, fff1, } +VLD3_DUP/VLD3Q_DUP:34:result_float32x2 [] = { c1600000, c1800000, } +VLD3_DUP/VLD3Q_DUP:35:result_float16x4 [] = { cb80, cb00, cc00, cb80, } +VLD3_DUP/VLD3Q_DUP:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:37:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:38:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:41:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:42:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD3_DUP/VLD3Q_DUP chunk 2 output: +VLD3_DUP/VLD3Q_DUP:48:result_int8x8 [] = { fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:49:result_int16x4 [] = { fffffff2, fffffff0, fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:50:result_int32x2 [] = { fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:51:result_int64x1 [] = { fffffffffffffff2, } +VLD3_DUP/VLD3Q_DUP:52:result_uint8x8 [] = { f1, f2, f0, f1, f2, f0, f1, f2, } +VLD3_DUP/VLD3Q_DUP:53:result_uint16x4 [] = { fff2, fff0, fff1, fff2, } +VLD3_DUP/VLD3Q_DUP:54:result_uint32x2 [] = { fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:55:result_uint64x1 [] = { fffffffffffffff2, } +VLD3_DUP/VLD3Q_DUP:56:result_poly8x8 [] = { f1, f2, f0, f1, f2, f0, f1, f2, } +VLD3_DUP/VLD3Q_DUP:57:result_poly16x4 [] = { fff2, fff0, fff1, fff2, } +VLD3_DUP/VLD3Q_DUP:58:result_float32x2 [] = { c1700000, c1600000, } +VLD3_DUP/VLD3Q_DUP:59:result_float16x4 [] = { cb00, cc00, cb80, cb00, } +VLD3_DUP/VLD3Q_DUP:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:61:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:62:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:65:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:66:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD4_DUP/VLD4Q_DUP chunk 0 output: +VLD4_DUP/VLD4Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:3:result_int64x1 [] = { fffffffffffffff0, } +VLD4_DUP/VLD4Q_DUP:4:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD4_DUP/VLD4Q_DUP:8:result_poly8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:10:result_float32x2 [] = { c1800000, c1700000, } +VLD4_DUP/VLD4Q_DUP:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD4_DUP/VLD4Q_DUP:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD4_DUP/VLD4Q_DUP chunk 1 output: +VLD4_DUP/VLD4Q_DUP:24:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:25:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:26:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:27:result_int64x1 [] = { fffffffffffffff1, } +VLD4_DUP/VLD4Q_DUP:28:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:29:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:30:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:31:result_uint64x1 [] = { fffffffffffffff1, } +VLD4_DUP/VLD4Q_DUP:32:result_poly8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:33:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:34:result_float32x2 [] = { c1600000, c1500000, } +VLD4_DUP/VLD4Q_DUP:35:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD4_DUP/VLD4Q_DUP:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:37:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:38:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:41:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:42:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD4_DUP/VLD4Q_DUP chunk 2 output: +VLD4_DUP/VLD4Q_DUP:48:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:49:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:50:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:51:result_int64x1 [] = { fffffffffffffff2, } +VLD4_DUP/VLD4Q_DUP:52:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:53:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:54:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:55:result_uint64x1 [] = { fffffffffffffff2, } +VLD4_DUP/VLD4Q_DUP:56:result_poly8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:57:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:58:result_float32x2 [] = { c1800000, c1700000, } +VLD4_DUP/VLD4Q_DUP:59:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD4_DUP/VLD4Q_DUP:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:61:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:62:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:65:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:66:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD4_DUP/VLD4Q_DUP chunk 3 output: +VLD4_DUP/VLD4Q_DUP:72:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:73:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:74:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:75:result_int64x1 [] = { fffffffffffffff3, } +VLD4_DUP/VLD4Q_DUP:76:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:77:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:78:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:79:result_uint64x1 [] = { fffffffffffffff3, } +VLD4_DUP/VLD4Q_DUP:80:result_poly8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:81:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:82:result_float32x2 [] = { c1600000, c1500000, } +VLD4_DUP/VLD4Q_DUP:83:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD4_DUP/VLD4Q_DUP:84:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:85:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:86:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:87:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:88:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:89:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:90:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:91:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:93:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:94:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLA output: +VMLA:0:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, } +VMLA:1:result_int16x4 [] = { 1f8c, 1f8d, 1f8e, 1f8f, } +VMLA:2:result_int32x2 [] = { 2bf7, 2bf8, } +VMLA:3:result_int64x1 [] = { 3333333333333333, } +VMLA:4:result_uint8x8 [] = { 20, 21, 22, 23, 24, 25, 26, 27, } +VMLA:5:result_uint16x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA:6:result_uint32x2 [] = { 43ac, 43ad, } +VMLA:7:result_uint64x1 [] = { 3333333333333333, } +VMLA:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLA:10:result_float32x2 [] = { 43a14e76, 43a1ce76, } +VMLA:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLA:12:result_int8x16 [] = { f, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, } +VMLA:13:result_int16x8 [] = { 4830, 4831, 4832, 4833, 4834, 4835, 4836, 4837, } +VMLA:14:result_int32x4 [] = { 470f, 4710, 4711, 4712, } +VMLA:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA:16:result_uint8x16 [] = { ac, ad, ae, af, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, } +VMLA:17:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLA:18:result_uint32x4 [] = { 3620, 3621, 3622, 3623, } +VMLA:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLA:22:result_float32x4 [] = { 45f0ae15, 45f0b615, 45f0be15, 45f0c615, } +VMLA:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLS output: +VMLS:0:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8, } +VMLS:1:result_int16x4 [] = { ffffe054, ffffe055, ffffe056, ffffe057, } +VMLS:2:result_int32x2 [] = { ffffd3e9, ffffd3ea, } +VMLS:3:result_int64x1 [] = { 3333333333333333, } +VMLS:4:result_uint8x8 [] = { c0, c1, c2, c3, c4, c5, c6, c7, } +VMLS:5:result_uint16x4 [] = { c1d9, c1da, c1db, c1dc, } +VMLS:6:result_uint32x2 [] = { ffffbc34, ffffbc35, } +VMLS:7:result_uint64x1 [] = { 3333333333333333, } +VMLS:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLS:10:result_float32x2 [] = { c3b14e76, c3b0ce76, } +VMLS:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLS:12:result_int8x16 [] = { ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, ffffffd6, ffffffd7, ffffffd8, ffffffd9, ffffffda, ffffffdb, ffffffdc, ffffffdd, ffffffde, ffffffdf, ffffffe0, } +VMLS:13:result_int16x8 [] = { ffffb7b0, ffffb7b1, ffffb7b2, ffffb7b3, ffffb7b4, ffffb7b5, ffffb7b6, ffffb7b7, } +VMLS:14:result_int32x4 [] = { ffffb8d1, ffffb8d2, ffffb8d3, ffffb8d4, } +VMLS:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS:16:result_uint8x16 [] = { 34, 35, 36, 37, 38, 39, 3a, 3b, 3c, 3d, 3e, 3f, 40, 41, 42, 43, } +VMLS:17:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0, } +VMLS:18:result_uint32x4 [] = { ffffc9c0, ffffc9c1, ffffc9c2, ffffc9c3, } +VMLS:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLS:22:result_float32x4 [] = { c5f1ae15, c5f1a615, c5f19e15, c5f19615, } +VMLS:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMUL output: +VMUL:0:result_int8x8 [] = { fffffff0, 1, 12, 23, 34, 45, 56, 67, } +VMUL:1:result_int16x4 [] = { fffffde0, fffffe02, fffffe24, fffffe46, } +VMUL:2:result_int32x2 [] = { fffffcd0, fffffd03, } +VMUL:3:result_int64x1 [] = { 3333333333333333, } +VMUL:4:result_uint8x8 [] = { c0, 4, 48, 8c, d0, 14, 58, 9c, } +VMUL:5:result_uint16x4 [] = { fab0, fb05, fb5a, fbaf, } +VMUL:6:result_uint32x2 [] = { fffff9a0, fffffa06, } +VMUL:7:result_uint64x1 [] = { 3333333333333333, } +VMUL:8:result_poly8x8 [] = { c0, 84, 48, c, d0, 94, 58, 1c, } +VMUL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMUL:10:result_float32x2 [] = { c4053333, c3f9c000, } +VMUL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMUL:12:result_int8x16 [] = { ffffff90, 7, 7e, fffffff5, 6c, ffffffe3, 5a, ffffffd1, 48, ffffffbf, 36, ffffffad, 24, ffffff9b, 12, ffffff89, } +VMUL:13:result_int16x8 [] = { fffff780, fffff808, fffff890, fffff918, fffff9a0, fffffa28, fffffab0, fffffb38, } +VMUL:14:result_int32x4 [] = { fffff670, fffff709, fffff7a2, fffff83b, } +VMUL:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL:16:result_uint8x16 [] = { 60, a, b4, 5e, 8, b2, 5c, 6, b0, 5a, 4, ae, 58, 2, ac, 56, } +VMUL:17:result_uint16x8 [] = { f450, f50b, f5c6, f681, f73c, f7f7, f8b2, f96d, } +VMUL:18:result_uint32x4 [] = { fffff340, fffff40c, fffff4d8, fffff5a4, } +VMUL:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL:20:result_poly8x16 [] = { 60, ca, 34, 9e, c8, 62, 9c, 36, 30, 9a, 64, ce, 98, 32, cc, 66, } +VMUL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMUL:22:result_float32x4 [] = { c4c73333, c4bac000, c4ae4ccd, c4a1d999, } +VMUL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMUL_LANE output: +VMUL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:1:result_int16x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, } +VMUL_LANE:2:result_int32x2 [] = { fffffde0, fffffe02, } +VMUL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMUL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:5:result_uint16x4 [] = { bbc0, c004, c448, c88c, } +VMUL_LANE:6:result_uint32x2 [] = { fffface0, ffffb212, } +VMUL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMUL_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMUL_LANE:10:result_float32x2 [] = { c3b66666, c3ab0000, } +VMUL_LANE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMUL_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:13:result_int16x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, } +VMUL_LANE:14:result_int32x4 [] = { fffffde0, fffffe02, fffffe24, fffffe46, } +VMUL_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:17:result_uint16x8 [] = { bbc0, c004, c448, c88c, ccd0, d114, d558, d99c, } +VMUL_LANE:18:result_uint32x4 [] = { fffface0, ffffb212, ffffb744, ffffbc76, } +VMUL_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMUL_LANE:22:result_float32x4 [] = { c3b66666, c3ab0000, c39f9999, c3943333, } +VMUL_LANE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMUL_N output: +VMUL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:1:result_int16x4 [] = { fffffef0, ffffff01, ffffff12, ffffff23, } +VMUL_N:2:result_int32x2 [] = { fffffde0, fffffe02, } +VMUL_N:3:result_int64x1 [] = { 3333333333333333, } +VMUL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:5:result_uint16x4 [] = { fcd0, fd03, fd36, fd69, } +VMUL_N:6:result_uint32x2 [] = { fffffbc0, fffffc04, } +VMUL_N:7:result_uint64x1 [] = { 3333333333333333, } +VMUL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMUL_N:10:result_float32x2 [] = { c3b26666, c3a74000, } +VMUL_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMUL_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:13:result_int16x8 [] = { fffffab0, fffffb05, fffffb5a, fffffbaf, fffffc04, fffffc59, fffffcae, fffffd03, } +VMUL_N:14:result_int32x4 [] = { fffff9a0, fffffa06, fffffa6c, fffffad2, } +VMUL_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:17:result_uint16x8 [] = { f890, f907, f97e, f9f5, fa6c, fae3, fb5a, fbd1, } +VMUL_N:18:result_uint32x4 [] = { fffff780, fffff808, fffff890, fffff918, } +VMUL_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMUL_N:22:result_float32x4 [] = { c4b1cccd, c4a6b000, c49b9333, c4907667, } +VMUL_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMULL_N output: +VMULL_N:0:result_int32x4 [] = { 11000, 11000, 11000, 11000, } +VMULL_N:1:result_int64x2 [] = { 22000, 22000, } +VMULL_N:2:result_uint32x4 [] = { 33000, 33000, 33000, 33000, } +VMULL_N:3:result_uint64x2 [] = { 44000, 44000, } + +VMLA_LANE output: +VMLA_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:1:result_int16x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:2:result_int32x2 [] = { 3e07, 3e08, } +VMLA_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLA_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:5:result_uint16x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:6:result_uint32x2 [] = { 3e07, 3e08, } +VMLA_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLA_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLA_LANE:10:result_float32x2 [] = { 4418c687, 44190687, } +VMLA_LANE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLA_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:13:result_int16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLA_LANE:14:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:17:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLA_LANE:18:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLA_LANE:22:result_float32x4 [] = { 441a3168, 441a7168, 441ab168, 441af168, } +VMLA_LANE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLS_LANE output: +VMLS_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:1:result_int16x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLS_LANE:2:result_int32x2 [] = { ffffc1d9, ffffc1da, } +VMLS_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLS_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:5:result_uint16x4 [] = { c1d9, c1da, c1db, c1dc, } +VMLS_LANE:6:result_uint32x2 [] = { ffffc1d9, ffffc1da, } +VMLS_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLS_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLS_LANE:10:result_float32x2 [] = { c420c687, c4208687, } +VMLS_LANE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLS_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:13:result_int16x8 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, ffffc1dd, ffffc1de, ffffc1df, ffffc1e0, } +VMLS_LANE:14:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLS_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:17:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0, } +VMLS_LANE:18:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLS_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLS_LANE:22:result_float32x4 [] = { c4223168, c421f168, c421b168, c4217168, } +VMLS_LANE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLA_N output: +VMLA_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:1:result_int16x4 [] = { 595, 596, 597, 598, } +VMLA_N:2:result_int32x2 [] = { b3a, b3b, } +VMLA_N:3:result_int64x1 [] = { 3333333333333333, } +VMLA_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:5:result_uint16x4 [] = { 10df, 10e0, 10e1, 10e2, } +VMLA_N:6:result_uint32x2 [] = { 1684, 1685, } +VMLA_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLA_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLA_N:10:result_float32x2 [] = { 4497deb8, 4497feb8, } +VMLA_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLA_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:13:result_int16x8 [] = { 1c29, 1c2a, 1c2b, 1c2c, 1c2d, 1c2e, 1c2f, 1c30, } +VMLA_N:14:result_int32x4 [] = { 21ce, 21cf, 21d0, 21d1, } +VMLA_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:17:result_uint16x8 [] = { 2773, 2774, 2775, 2776, 2777, 2778, 2779, 277a, } +VMLA_N:18:result_uint32x4 [] = { 2d18, 2d19, 2d1a, 2d1b, } +VMLA_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLA_N:22:result_float32x4 [] = { 4568087b, 4568187b, 4568287b, 4568387b, } +VMLA_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLS_N output: +VMLS_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:1:result_int16x4 [] = { fffffa4b, fffffa4c, fffffa4d, fffffa4e, } +VMLS_N:2:result_int32x2 [] = { fffff4a6, fffff4a7, } +VMLS_N:3:result_int64x1 [] = { 3333333333333333, } +VMLS_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:5:result_uint16x4 [] = { ef01, ef02, ef03, ef04, } +VMLS_N:6:result_uint32x2 [] = { ffffe95c, ffffe95d, } +VMLS_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLS_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLS_N:10:result_float32x2 [] = { c49bdeb8, c49bbeb8, } +VMLS_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLS_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:13:result_int16x8 [] = { ffffe3b7, ffffe3b8, ffffe3b9, ffffe3ba, ffffe3bb, ffffe3bc, ffffe3bd, ffffe3be, } +VMLS_N:14:result_int32x4 [] = { ffffde12, ffffde13, ffffde14, ffffde15, } +VMLS_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:17:result_uint16x8 [] = { d86d, d86e, d86f, d870, d871, d872, d873, d874, } +VMLS_N:18:result_uint32x4 [] = { ffffd2c8, ffffd2c9, ffffd2ca, ffffd2cb, } +VMLS_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLS_N:22:result_float32x4 [] = { c56a087b, c569f87b, c569e87b, c569d87b, } +VMLS_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSLI_N output: +VSLI_N:0:result_int8x8 [] = { 20, 21, 22, 23, 24, 25, 26, 27, } +VSLI_N:1:result_int16x4 [] = { ffffffe0, ffffffe1, ffffffe2, ffffffe3, } +VSLI_N:2:result_int32x2 [] = { 6, 7, } +VSLI_N:3:result_int64x1 [] = { 64fffffff0, } +VSLI_N:4:result_uint8x8 [] = { 50, 51, 52, 53, 50, 51, 52, 53, } +VSLI_N:5:result_uint16x4 [] = { 7bf0, 7bf1, 7bf2, 7bf3, } +VSLI_N:6:result_uint32x2 [] = { 3ffffff0, 3ffffff1, } +VSLI_N:7:result_uint64x1 [] = { 10, } +VSLI_N:8:result_poly8x8 [] = { 50, 51, 52, 53, 50, 51, 52, 53, } +VSLI_N:9:result_poly16x4 [] = { 7bf0, 7bf1, 7bf2, 7bf3, } +VSLI_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSLI_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSLI_N:12:result_int8x16 [] = { ffffffd0, ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, ffffffd6, ffffffd7, ffffffd8, ffffffd9, ffffffda, ffffffdb, ffffffdc, ffffffdd, ffffffde, ffffffdf, } +VSLI_N:13:result_int16x8 [] = { ffffff60, ffffff61, ffffff62, ffffff63, ffffff64, ffffff65, ffffff66, ffffff67, } +VSLI_N:14:result_int32x4 [] = { fe2ffff0, fe2ffff1, fe2ffff2, fe2ffff3, } +VSLI_N:15:result_int64x2 [] = { 18fff0, 18fff1, } +VSLI_N:16:result_uint8x16 [] = { 60, 61, 62, 63, 64, 65, 66, 67, 60, 61, 62, 63, 64, 65, 66, 67, } +VSLI_N:17:result_uint16x8 [] = { 3ff0, 3ff1, 3ff2, 3ff3, 3ff4, 3ff5, 3ff6, 3ff7, } +VSLI_N:18:result_uint32x4 [] = { 1bfffff0, 1bfffff1, 1bfffff2, 1bfffff3, } +VSLI_N:19:result_uint64x2 [] = { 7ffffffffffff0, 7ffffffffffff1, } +VSLI_N:20:result_poly8x16 [] = { 60, 61, 62, 63, 64, 65, 66, 67, 60, 61, 62, 63, 64, 65, 66, 67, } +VSLI_N:21:result_poly16x8 [] = { 3ff0, 3ff1, 3ff2, 3ff3, 3ff4, 3ff5, 3ff6, 3ff7, } +VSLI_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSLI_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSLI_Nmax shift amount output: +VSLI_N:0:result_int8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, } +VSLI_N:1:result_int16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, } +VSLI_N:2:result_int32x2 [] = { fffffff0, fffffff1, } +VSLI_N:3:result_int64x1 [] = { 7ffffffffffffff0, } +VSLI_N:4:result_uint8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, } +VSLI_N:5:result_uint16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, } +VSLI_N:6:result_uint32x2 [] = { 7ffffff0, 7ffffff1, } +VSLI_N:7:result_uint64x1 [] = { 7ffffffffffffff0, } +VSLI_N:8:result_poly8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, } +VSLI_N:9:result_poly16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, } +VSLI_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSLI_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSLI_N:12:result_int8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, } +VSLI_N:13:result_int16x8 [] = { 7ff0, 7ff1, 7ff2, 7ff3, 7ff4, 7ff5, 7ff6, 7ff7, } +VSLI_N:14:result_int32x4 [] = { 7ffffff0, 7ffffff1, 7ffffff2, 7ffffff3, } +VSLI_N:15:result_int64x2 [] = { 7ffffffffffffff0, 7ffffffffffffff1, } +VSLI_N:16:result_uint8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, } +VSLI_N:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSLI_N:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSLI_N:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSLI_N:20:result_poly8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, } +VSLI_N:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSLI_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSLI_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSRI_N output: +VSRI_N:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VSRI_N:1:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VSRI_N:2:result_int32x2 [] = { 80000001, 80000001, } +VSRI_N:3:result_int64x1 [] = { ffffffff00000000, } +VSRI_N:4:result_uint8x8 [] = { c5, c5, c5, c5, c5, c5, c5, c5, } +VSRI_N:5:result_uint16x4 [] = { ffc0, ffc0, ffc0, ffc0, } +VSRI_N:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VSRI_N:7:result_uint64x1 [] = { e000000000000000, } +VSRI_N:8:result_poly8x8 [] = { c5, c5, c5, c5, c5, c5, c5, c5, } +VSRI_N:9:result_poly16x4 [] = { ffc0, ffc0, ffc0, ffc0, } +VSRI_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSRI_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSRI_N:12:result_int8x16 [] = { fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VSRI_N:13:result_int16x8 [] = { fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, } +VSRI_N:14:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VSRI_N:15:result_int64x2 [] = { ffff000000000000, ffff000000000000, } +VSRI_N:16:result_uint8x16 [] = { e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, } +VSRI_N:17:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VSRI_N:18:result_uint32x4 [] = { fffffe00, fffffe00, fffffe00, fffffe00, } +VSRI_N:19:result_uint64x2 [] = { fffffffffffff800, fffffffffffff800, } +VSRI_N:20:result_poly8x16 [] = { e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, } +VSRI_N:21:result_poly16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VSRI_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSRI_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSRI_N max shift amount output: +VSRI_N:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VSRI_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRI_N:2:result_int32x2 [] = { fffffff0, fffffff1, } +VSRI_N:3:result_int64x1 [] = { fffffffffffffff0, } +VSRI_N:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VSRI_N:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VSRI_N:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VSRI_N:7:result_uint64x1 [] = { fffffffffffffff0, } +VSRI_N:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VSRI_N:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VSRI_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSRI_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSRI_N:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VSRI_N:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VSRI_N:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRI_N:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRI_N:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VSRI_N:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSRI_N:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRI_N:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRI_N:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VSRI_N:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSRI_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSRI_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTST/VTSTQ (signed input) output: +VTST/VTSTQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:2:result_int32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:3:result_int64x1 [] = { 3333333333333333, } +VTST/VTSTQ:4:result_uint8x8 [] = { 0, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:5:result_uint16x4 [] = { 0, ffff, 0, ffff, } +VTST/VTSTQ:6:result_uint32x2 [] = { 0, ffffffff, } +VTST/VTSTQ:7:result_uint64x1 [] = { 3333333333333333, } +VTST/VTSTQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:10:result_float32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTST/VTSTQ:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTST/VTSTQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:16:result_uint8x16 [] = { 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:17:result_uint16x8 [] = { 0, ffff, 0, ffff, ffff, ffff, ffff, ffff, } +VTST/VTSTQ:18:result_uint32x4 [] = { 0, ffffffff, 0, ffffffff, } +VTST/VTSTQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTST/VTSTQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTST/VTSTQ (unsigned input) output: +VTST/VTSTQ:24:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:25:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:26:result_int32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:27:result_int64x1 [] = { 3333333333333333, } +VTST/VTSTQ:28:result_uint8x8 [] = { 0, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:29:result_uint16x4 [] = { 0, ffff, 0, ffff, } +VTST/VTSTQ:30:result_uint32x2 [] = { 0, ffffffff, } +VTST/VTSTQ:31:result_uint64x1 [] = { 3333333333333333, } +VTST/VTSTQ:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:34:result_float32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VTST/VTSTQ:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:37:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:38:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTST/VTSTQ:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:40:result_uint8x16 [] = { 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:41:result_uint16x8 [] = { 0, ffff, 0, ffff, ffff, ffff, ffff, ffff, } +VTST/VTSTQ:42:result_uint32x4 [] = { 0, ffffffff, 0, ffffffff, } +VTST/VTSTQ:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTST/VTSTQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VADDHN output: +VADDHN:0:result_int8x8 [] = { 32, 32, 32, 32, 32, 32, 32, 32, } +VADDHN:1:result_int16x4 [] = { 32, 32, 32, 32, } +VADDHN:2:result_int32x2 [] = { 18, 18, } +VADDHN:3:result_int64x1 [] = { 3333333333333333, } +VADDHN:4:result_uint8x8 [] = { 3, 3, 3, 3, 3, 3, 3, 3, } +VADDHN:5:result_uint16x4 [] = { 37, 37, 37, 37, } +VADDHN:6:result_uint32x2 [] = { 3, 3, } +VADDHN:7:result_uint64x1 [] = { 3333333333333333, } +VADDHN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VADDHN:10:result_float32x2 [] = { 33333333, 33333333, } +VADDHN:11:result_float16x4 [] = { 0, 0, 0, 0, } +VADDHN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDHN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDHN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VADDHN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDHN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDHN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VADDHN:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDHN:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDHN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRADDHN output: +VRADDHN:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:1:result_int16x4 [] = { 33, 33, 33, 33, } +VRADDHN:2:result_int32x2 [] = { 19, 19, } +VRADDHN:3:result_int64x1 [] = { 3333333333333333, } +VRADDHN:4:result_uint8x8 [] = { 4, 4, 4, 4, 4, 4, 4, 4, } +VRADDHN:5:result_uint16x4 [] = { 38, 38, 38, 38, } +VRADDHN:6:result_uint32x2 [] = { 4, 4, } +VRADDHN:7:result_uint64x1 [] = { 3333333333333333, } +VRADDHN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRADDHN:10:result_float32x2 [] = { 33333333, 33333333, } +VRADDHN:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRADDHN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRADDHN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRADDHN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRADDHN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRADDHN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRADDHN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRADDHN:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRADDHN:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRADDHN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VADDL output: +VADDL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VADDL:2:result_int32x2 [] = { 33333333, 33333333, } +VADDL:3:result_int64x1 [] = { 3333333333333333, } +VADDL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VADDL:6:result_uint32x2 [] = { 33333333, 33333333, } +VADDL:7:result_uint64x1 [] = { 3333333333333333, } +VADDL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VADDL:10:result_float32x2 [] = { 33333333, 33333333, } +VADDL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VADDL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:13:result_int16x8 [] = { ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, } +VADDL:14:result_int32x4 [] = { ffffffe2, ffffffe3, ffffffe4, ffffffe5, } +VADDL:15:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1, } +VADDL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:17:result_uint16x8 [] = { 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1ea, } +VADDL:18:result_uint32x4 [] = { 1ffe1, 1ffe2, 1ffe3, 1ffe4, } +VADDL:19:result_uint64x2 [] = { 1ffffffe0, 1ffffffe1, } +VADDL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VADDW output: +VADDW:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VADDW:2:result_int32x2 [] = { 33333333, 33333333, } +VADDW:3:result_int64x1 [] = { 3333333333333333, } +VADDW:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VADDW:6:result_uint32x2 [] = { 33333333, 33333333, } +VADDW:7:result_uint64x1 [] = { 3333333333333333, } +VADDW:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VADDW:10:result_float32x2 [] = { 33333333, 33333333, } +VADDW:11:result_float16x4 [] = { 0, 0, 0, 0, } +VADDW:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:13:result_int16x8 [] = { ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, } +VADDW:14:result_int32x4 [] = { ffffffe2, ffffffe3, ffffffe4, ffffffe5, } +VADDW:15:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1, } +VADDW:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:17:result_uint16x8 [] = { e3, e4, e5, e6, e7, e8, e9, ea, } +VADDW:18:result_uint32x4 [] = { ffe1, ffe2, ffe3, ffe4, } +VADDW:19:result_uint64x2 [] = { ffffffe0, ffffffe1, } +VADDW:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDW:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDW:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VHADD/VHADDQ output: +VHADD/VHADDQ:0:result_int8x8 [] = { fffffff1, fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, } +VHADD/VHADDQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2, } +VHADD/VHADDQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VHADD/VHADDQ:3:result_int64x1 [] = { 3333333333333333, } +VHADD/VHADDQ:4:result_uint8x8 [] = { f1, f2, f2, f3, f3, f4, f4, f5, } +VHADD/VHADDQ:5:result_uint16x4 [] = { fff0, fff1, fff1, fff2, } +VHADD/VHADDQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VHADD/VHADDQ:7:result_uint64x1 [] = { 3333333333333333, } +VHADD/VHADDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VHADD/VHADDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VHADD/VHADDQ:10:result_float32x2 [] = { 33333333, 33333333, } +VHADD/VHADDQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VHADD/VHADDQ:12:result_int8x16 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, fffffff6, fffffff6, fffffff7, fffffff7, fffffff8, fffffff8, fffffff9, fffffff9, } +VHADD/VHADDQ:13:result_int16x8 [] = { fffffff1, fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, } +VHADD/VHADDQ:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff2, } +VHADD/VHADDQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VHADD/VHADDQ:16:result_uint8x16 [] = { f4, f5, f5, f6, f6, f7, f7, f8, f8, f9, f9, fa, fa, fb, fb, fc, } +VHADD/VHADDQ:17:result_uint16x8 [] = { fff1, fff1, fff2, fff2, fff3, fff3, fff4, fff4, } +VHADD/VHADDQ:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff2, } +VHADD/VHADDQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VHADD/VHADDQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VHADD/VHADDQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VHADD/VHADDQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VHADD/VHADDQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRHADD/VRHADDQ output: +VRHADD/VRHADDQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, } +VRHADD/VRHADDQ:1:result_int16x4 [] = { fffffff1, fffffff2, fffffff2, fffffff3, } +VRHADD/VRHADDQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VRHADD/VRHADDQ:3:result_int64x1 [] = { 3333333333333333, } +VRHADD/VRHADDQ:4:result_uint8x8 [] = { f2, f2, f3, f3, f4, f4, f5, f5, } +VRHADD/VRHADDQ:5:result_uint16x4 [] = { fff1, fff1, fff2, fff2, } +VRHADD/VRHADDQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VRHADD/VRHADDQ:7:result_uint64x1 [] = { 3333333333333333, } +VRHADD/VRHADDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRHADD/VRHADDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRHADD/VRHADDQ:10:result_float32x2 [] = { 33333333, 33333333, } +VRHADD/VRHADDQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRHADD/VRHADDQ:12:result_int8x16 [] = { fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, fffffff6, fffffff6, fffffff7, fffffff7, fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, } +VRHADD/VRHADDQ:13:result_int16x8 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, } +VRHADD/VRHADDQ:14:result_int32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2, } +VRHADD/VRHADDQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRHADD/VRHADDQ:16:result_uint8x16 [] = { f5, f5, f6, f6, f7, f7, f8, f8, f9, f9, fa, fa, fb, fb, fc, fc, } +VRHADD/VRHADDQ:17:result_uint16x8 [] = { fff1, fff2, fff2, fff3, fff3, fff4, fff4, fff5, } +VRHADD/VRHADDQ:18:result_uint32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2, } +VRHADD/VRHADDQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRHADD/VRHADDQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRHADD/VRHADDQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRHADD/VRHADDQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRHADD/VRHADDQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VHSUB/VHSUBQ output: +VHSUB/VHSUBQ:0:result_int8x8 [] = { fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, } +VHSUB/VHSUBQ:1:result_int16x4 [] = { ffffffff, ffffffff, 0, 0, } +VHSUB/VHSUBQ:2:result_int32x2 [] = { 0, 0, } +VHSUB/VHSUBQ:3:result_int64x1 [] = { 3333333333333333, } +VHSUB/VHSUBQ:4:result_uint8x8 [] = { fe, ff, ff, 0, 0, 1, 1, 2, } +VHSUB/VHSUBQ:5:result_uint16x4 [] = { ffff, 0, 0, 1, } +VHSUB/VHSUBQ:6:result_uint32x2 [] = { 0, 0, } +VHSUB/VHSUBQ:7:result_uint64x1 [] = { 3333333333333333, } +VHSUB/VHSUBQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VHSUB/VHSUBQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VHSUB/VHSUBQ:10:result_float32x2 [] = { 33333333, 33333333, } +VHSUB/VHSUBQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VHSUB/VHSUBQ:12:result_int8x16 [] = { fffffffe, fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, } +VHSUB/VHSUBQ:13:result_int16x8 [] = { fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, } +VHSUB/VHSUBQ:14:result_int32x4 [] = { ffffffff, 0, 0, 1, } +VHSUB/VHSUBQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VHSUB/VHSUBQ:16:result_uint8x16 [] = { fb, fc, fc, fd, fd, fe, fe, ff, ff, 0, 0, 1, 1, 2, 2, 3, } +VHSUB/VHSUBQ:17:result_uint16x8 [] = { ffff, ffff, 0, 0, 1, 1, 2, 2, } +VHSUB/VHSUBQ:18:result_uint32x4 [] = { ffffffff, 0, 0, 1, } +VHSUB/VHSUBQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VHSUB/VHSUBQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VHSUB/VHSUBQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VHSUB/VHSUBQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VHSUB/VHSUBQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSUBL output: +VSUBL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBL:2:result_int32x2 [] = { 33333333, 33333333, } +VSUBL:3:result_int64x1 [] = { 3333333333333333, } +VSUBL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBL:6:result_uint32x2 [] = { 33333333, 33333333, } +VSUBL:7:result_uint64x1 [] = { 3333333333333333, } +VSUBL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBL:10:result_float32x2 [] = { 33333333, 33333333, } +VSUBL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSUBL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:13:result_int16x8 [] = { fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, } +VSUBL:14:result_int32x4 [] = { fffffffe, ffffffff, 0, 1, } +VSUBL:15:result_int64x2 [] = { 0, 1, } +VSUBL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:17:result_uint16x8 [] = { fffd, fffe, ffff, 0, 1, 2, 3, 4, } +VSUBL:18:result_uint32x4 [] = { ffffffff, 0, 1, 2, } +VSUBL:19:result_uint64x2 [] = { 0, 1, } +VSUBL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSUBW output: +VSUBW:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBW:2:result_int32x2 [] = { 33333333, 33333333, } +VSUBW:3:result_int64x1 [] = { 3333333333333333, } +VSUBW:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBW:6:result_uint32x2 [] = { 33333333, 33333333, } +VSUBW:7:result_uint64x1 [] = { 3333333333333333, } +VSUBW:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBW:10:result_float32x2 [] = { 33333333, 33333333, } +VSUBW:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSUBW:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:13:result_int16x8 [] = { fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, } +VSUBW:14:result_int32x4 [] = { fffffffe, ffffffff, 0, 1, } +VSUBW:15:result_int64x2 [] = { 0, 1, } +VSUBW:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:17:result_uint16x8 [] = { fefd, fefe, feff, ff00, ff01, ff02, ff03, ff04, } +VSUBW:18:result_uint32x4 [] = { fffeffff, ffff0000, ffff0001, ffff0002, } +VSUBW:19:result_uint64x2 [] = { ffffffff00000000, ffffffff00000001, } +VSUBW:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBW:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBW:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSUBHN output: +VSUBHN:0:result_int8x8 [] = { 31, 31, 31, 31, 31, 31, 31, 31, } +VSUBHN:1:result_int16x4 [] = { 31, 31, 31, 31, } +VSUBHN:2:result_int32x2 [] = { 17, 17, } +VSUBHN:3:result_int64x1 [] = { 3333333333333333, } +VSUBHN:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VSUBHN:5:result_uint16x4 [] = { 36, 36, 36, 36, } +VSUBHN:6:result_uint32x2 [] = { 2, 2, } +VSUBHN:7:result_uint64x1 [] = { 3333333333333333, } +VSUBHN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBHN:10:result_float32x2 [] = { 33333333, 33333333, } +VSUBHN:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSUBHN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBHN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBHN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VSUBHN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBHN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBHN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VSUBHN:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBHN:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBHN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSUBHN output: +VRSUBHN:0:result_int8x8 [] = { 31, 31, 31, 31, 31, 31, 31, 31, } +VRSUBHN:1:result_int16x4 [] = { 31, 31, 31, 31, } +VRSUBHN:2:result_int32x2 [] = { 17, 17, } +VRSUBHN:3:result_int64x1 [] = { 3333333333333333, } +VRSUBHN:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VRSUBHN:5:result_uint16x4 [] = { 36, 36, 36, 36, } +VRSUBHN:6:result_uint32x2 [] = { 2, 2, } +VRSUBHN:7:result_uint64x1 [] = { 3333333333333333, } +VRSUBHN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSUBHN:10:result_float32x2 [] = { 33333333, 33333333, } +VRSUBHN:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRSUBHN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSUBHN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSUBHN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSUBHN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSUBHN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSUBHN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSUBHN:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSUBHN:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSUBHN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMVN/VMVNQ output: +VMVN/VMVNQ:0:result_int8x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:1:result_int16x4 [] = { f, e, d, c, } +VMVN/VMVNQ:2:result_int32x2 [] = { f, e, } +VMVN/VMVNQ:3:result_int64x1 [] = { 3333333333333333, } +VMVN/VMVNQ:4:result_uint8x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:5:result_uint16x4 [] = { f, e, d, c, } +VMVN/VMVNQ:6:result_uint32x2 [] = { f, e, } +VMVN/VMVNQ:7:result_uint64x1 [] = { 3333333333333333, } +VMVN/VMVNQ:8:result_poly8x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMVN/VMVNQ:10:result_float32x2 [] = { 33333333, 33333333, } +VMVN/VMVNQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMVN/VMVNQ:12:result_int8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, } +VMVN/VMVNQ:13:result_int16x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:14:result_int32x4 [] = { f, e, d, c, } +VMVN/VMVNQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMVN/VMVNQ:16:result_uint8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, } +VMVN/VMVNQ:17:result_uint16x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:18:result_uint32x4 [] = { f, e, d, c, } +VMVN/VMVNQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMVN/VMVNQ:20:result_poly8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, } +VMVN/VMVNQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMVN/VMVNQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMVN/VMVNQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQMOVN cumulative saturation output: +VQMOVN:0:vqmovn_s16 Neon cumulative saturation 0 +VQMOVN:1:vqmovn_s32 Neon cumulative saturation 0 +VQMOVN:2:vqmovn_s64 Neon cumulative saturation 0 +VQMOVN:3:vqmovn_u16 Neon cumulative saturation 0 +VQMOVN:4:vqmovn_u32 Neon cumulative saturation 0 +VQMOVN:5:vqmovn_u64 Neon cumulative saturation 0 + +VQMOVN output: +VQMOVN:6:result_int8x8 [] = { 12, 12, 12, 12, 12, 12, 12, 12, } +VQMOVN:7:result_int16x4 [] = { 1278, 1278, 1278, 1278, } +VQMOVN:8:result_int32x2 [] = { 12345678, 12345678, } +VQMOVN:9:result_int64x1 [] = { 3333333333333333, } +VQMOVN:10:result_uint8x8 [] = { 82, 82, 82, 82, 82, 82, 82, 82, } +VQMOVN:11:result_uint16x4 [] = { 8765, 8765, 8765, 8765, } +VQMOVN:12:result_uint32x2 [] = { 87654321, 87654321, } +VQMOVN:13:result_uint64x1 [] = { 3333333333333333, } +VQMOVN:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVN:16:result_float32x2 [] = { 33333333, 33333333, } +VQMOVN:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQMOVN:18:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:19:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:20:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQMOVN cumulative saturation output: +VQMOVN:30:vqmovn_s16 Neon cumulative saturation 1 +VQMOVN:31:vqmovn_s32 Neon cumulative saturation 1 +VQMOVN:32:vqmovn_s64 Neon cumulative saturation 1 +VQMOVN:33:vqmovn_u16 Neon cumulative saturation 1 +VQMOVN:34:vqmovn_u32 Neon cumulative saturation 1 +VQMOVN:35:vqmovn_u64 Neon cumulative saturation 1 + +VQMOVN output: +VQMOVN:36:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQMOVN:37:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQMOVN:38:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQMOVN:39:result_int64x1 [] = { 3333333333333333, } +VQMOVN:40:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQMOVN:41:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQMOVN:42:result_uint32x2 [] = { ffffffff, ffffffff, } +VQMOVN:43:result_uint64x1 [] = { 3333333333333333, } +VQMOVN:44:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:45:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVN:46:result_float32x2 [] = { 33333333, 33333333, } +VQMOVN:47:result_float16x4 [] = { 0, 0, 0, 0, } +VQMOVN:48:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:49:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:50:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:51:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:52:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:53:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:54:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:55:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:56:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:57:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:58:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:59:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQMOVUN cumulative saturation output: +VQMOVUN:0:vqmovun_s16 Neon cumulative saturation 0 +VQMOVUN:1:vqmovun_s32 Neon cumulative saturation 0 +VQMOVUN:2:vqmovun_s64 Neon cumulative saturation 0 + +VQMOVUN output: +VQMOVUN:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:4:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:5:result_int32x2 [] = { 33333333, 33333333, } +VQMOVUN:6:result_int64x1 [] = { 3333333333333333, } +VQMOVUN:7:result_uint8x8 [] = { 34, 34, 34, 34, 34, 34, 34, 34, } +VQMOVUN:8:result_uint16x4 [] = { 5678, 5678, 5678, 5678, } +VQMOVUN:9:result_uint32x2 [] = { 12345678, 12345678, } +VQMOVUN:10:result_uint64x1 [] = { 3333333333333333, } +VQMOVUN:11:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:12:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:13:result_float32x2 [] = { 33333333, 33333333, } +VQMOVUN:14:result_float16x4 [] = { 0, 0, 0, 0, } +VQMOVUN:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:16:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:17:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:25:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:26:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQMOVUN (negative input) cumulative saturation output: +VQMOVUN:27:vqmovun_s16 Neon cumulative saturation 1 +VQMOVUN:28:vqmovun_s32 Neon cumulative saturation 1 +VQMOVUN:29:vqmovun_s64 Neon cumulative saturation 1 + +VQMOVUN (negative input) output: +VQMOVUN:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:31:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:32:result_int32x2 [] = { 33333333, 33333333, } +VQMOVUN:33:result_int64x1 [] = { 3333333333333333, } +VQMOVUN:34:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQMOVUN:35:result_uint16x4 [] = { 0, 0, 0, 0, } +VQMOVUN:36:result_uint32x2 [] = { 0, 0, } +VQMOVUN:37:result_uint64x1 [] = { 3333333333333333, } +VQMOVUN:38:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:39:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:40:result_float32x2 [] = { 33333333, 33333333, } +VQMOVUN:41:result_float16x4 [] = { 0, 0, 0, 0, } +VQMOVUN:42:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:43:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:44:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:45:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:46:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:47:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:48:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:49:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:50:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:51:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:52:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:53:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHR_N output: +VRSHR_N:0:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VRSHR_N:1:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:2:result_int32x2 [] = { fffffffc, fffffffc, } +VRSHR_N:3:result_int64x1 [] = { 0, } +VRSHR_N:4:result_uint8x8 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, } +VRSHR_N:5:result_uint16x4 [] = { 1ffe, 1ffe, 1ffe, 1ffe, } +VRSHR_N:6:result_uint32x2 [] = { 8000000, 8000000, } +VRSHR_N:7:result_uint64x1 [] = { 80000000, } +VRSHR_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:10:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:12:result_int8x16 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, fffffffc, fffffffd, fffffffd, fffffffe, fffffffe, ffffffff, ffffffff, 0, } +VRSHR_N:13:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:14:result_int32x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd, } +VRSHR_N:15:result_int64x2 [] = { 0, 0, } +VRSHR_N:16:result_uint8x16 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, 40, 40, } +VRSHR_N:17:result_uint16x8 [] = { 1ffe, 1ffe, 1ffe, 1ffe, 1fff, 1fff, 1fff, 1fff, } +VRSHR_N:18:result_uint32x4 [] = { 8000000, 8000000, 8000000, 8000000, } +VRSHR_N:19:result_uint64x2 [] = { 80000000, 80000000, } +VRSHR_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHR_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHR_N (overflow test: max shift amount, positive input) output: +VRSHR_N:24:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:25:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:26:result_int32x2 [] = { 0, 0, } +VRSHR_N:27:result_int64x1 [] = { 0, } +VRSHR_N:28:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHR_N:29:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSHR_N:30:result_uint32x2 [] = { 1, 1, } +VRSHR_N:31:result_uint64x1 [] = { 1, } +VRSHR_N:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:34:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:35:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:36:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:37:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:38:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHR_N:39:result_int64x2 [] = { 0, 0, } +VRSHR_N:40:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHR_N:41:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHR_N:42:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSHR_N:43:result_uint64x2 [] = { 1, 1, } +VRSHR_N:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHR_N:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHR_N (overflow test: shift by 1, with negative input) output: +VRSHR_N:48:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHR_N:49:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VRSHR_N:50:result_int32x2 [] = { 40000000, 40000000, } +VRSHR_N:51:result_int64x1 [] = { 4000000000000000, } +VRSHR_N:52:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:53:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSHR_N:54:result_uint32x2 [] = { 80000000, 80000000, } +VRSHR_N:55:result_uint64x1 [] = { 8000000000000000, } +VRSHR_N:56:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:57:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:58:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:59:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:60:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHR_N:61:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VRSHR_N:62:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VRSHR_N:63:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VRSHR_N:64:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:65:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSHR_N:66:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSHR_N:67:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSHR_N:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHR_N:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHR_N (overflow test: shift by 3, positive input) output: +VRSHR_N:72:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHR_N:73:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VRSHR_N:74:result_int32x2 [] = { 10000000, 10000000, } +VRSHR_N:75:result_int64x1 [] = { 1000000000000000, } +VRSHR_N:76:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:77:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSHR_N:78:result_uint32x2 [] = { 20000000, 20000000, } +VRSHR_N:79:result_uint64x1 [] = { 2000000000000000, } +VRSHR_N:80:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:81:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:82:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:83:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:84:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHR_N:85:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VRSHR_N:86:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VRSHR_N:87:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VRSHR_N:88:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:89:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSHR_N:90:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSHR_N:91:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSHR_N:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:93:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:94:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHR_N:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHR_N (overflow test: shift by 1, with negative input) output: +VRSHR_N:96:result_int8x8 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSHR_N:97:result_int16x4 [] = { ffffc000, ffffc000, ffffc000, ffffc000, } +VRSHR_N:98:result_int32x2 [] = { c0000000, c0000000, } +VRSHR_N:99:result_int64x1 [] = { c000000000000000, } +VRSHR_N:100:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:101:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSHR_N:102:result_uint32x2 [] = { 80000000, 80000000, } +VRSHR_N:103:result_uint64x1 [] = { 8000000000000000, } +VRSHR_N:104:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:105:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:106:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:107:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:108:result_int8x16 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSHR_N:109:result_int16x8 [] = { ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, } +VRSHR_N:110:result_int32x4 [] = { c0000000, c0000000, c0000000, c0000000, } +VRSHR_N:111:result_int64x2 [] = { c000000000000000, c000000000000000, } +VRSHR_N:112:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:113:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSHR_N:114:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSHR_N:115:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSHR_N:116:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:118:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHR_N:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHR_N (overflow test: shift by 3, with negative input) output: +VRSHR_N:120:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSHR_N:121:result_int16x4 [] = { fffff000, fffff000, fffff000, fffff000, } +VRSHR_N:122:result_int32x2 [] = { f0000000, f0000000, } +VRSHR_N:123:result_int64x1 [] = { f000000000000000, } +VRSHR_N:124:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:125:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSHR_N:126:result_uint32x2 [] = { 20000000, 20000000, } +VRSHR_N:127:result_uint64x1 [] = { 2000000000000000, } +VRSHR_N:128:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:129:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:130:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:131:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:132:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSHR_N:133:result_int16x8 [] = { fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, } +VRSHR_N:134:result_int32x4 [] = { f0000000, f0000000, f0000000, f0000000, } +VRSHR_N:135:result_int64x2 [] = { f000000000000000, f000000000000000, } +VRSHR_N:136:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:137:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSHR_N:138:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSHR_N:139:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSHR_N:140:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:141:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:142:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHR_N:143:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N output: +VRSRA_N:0:result_int8x8 [] = { fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, } +VRSRA_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VRSRA_N:2:result_int32x2 [] = { fffffffd, fffffffe, } +VRSRA_N:3:result_int64x1 [] = { fffffffffffffff0, } +VRSRA_N:4:result_uint8x8 [] = { 5, 6, 7, 8, 9, a, b, c, } +VRSRA_N:5:result_uint16x4 [] = { fffd, fffe, ffff, 0, } +VRSRA_N:6:result_uint32x2 [] = { fffffff4, fffffff5, } +VRSRA_N:7:result_uint64x1 [] = { fffffffffffffff0, } +VRSRA_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:10:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:12:result_int8x16 [] = { fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, 8, } +VRSRA_N:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VRSRA_N:14:result_int32x4 [] = { fffffffd, fffffffe, ffffffff, 0, } +VRSRA_N:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VRSRA_N:16:result_uint8x16 [] = { 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, 11, 12, 13, 14, } +VRSRA_N:17:result_uint16x8 [] = { fffd, fffe, ffff, 0, 1, 2, 3, 4, } +VRSRA_N:18:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VRSRA_N:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VRSRA_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N (checking overflow: shift by 1, positive input) output: +VRSRA_N:24:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VRSRA_N:25:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VRSRA_N:26:result_int32x2 [] = { 40000000, 40000000, } +VRSRA_N:27:result_int64x1 [] = { 4000000000000000, } +VRSRA_N:28:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSRA_N:29:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSRA_N:30:result_uint32x2 [] = { 80000000, 80000000, } +VRSRA_N:31:result_uint64x1 [] = { 8000000000000000, } +VRSRA_N:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:34:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:35:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:36:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VRSRA_N:37:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VRSRA_N:38:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VRSRA_N:39:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VRSRA_N:40:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSRA_N:41:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSRA_N:42:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSRA_N:43:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSRA_N:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N (checking overflow: shift by 3, positive input) output: +VRSRA_N:48:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VRSRA_N:49:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VRSRA_N:50:result_int32x2 [] = { 10000000, 10000000, } +VRSRA_N:51:result_int64x1 [] = { 1000000000000000, } +VRSRA_N:52:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSRA_N:53:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSRA_N:54:result_uint32x2 [] = { 20000000, 20000000, } +VRSRA_N:55:result_uint64x1 [] = { 2000000000000000, } +VRSRA_N:56:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:57:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:58:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:59:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:60:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VRSRA_N:61:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VRSRA_N:62:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VRSRA_N:63:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VRSRA_N:64:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSRA_N:65:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSRA_N:66:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSRA_N:67:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSRA_N:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N (checking overflow: shift by max, positive input) output: +VRSRA_N:72:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:73:result_int16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:74:result_int32x2 [] = { 0, 0, } +VRSRA_N:75:result_int64x1 [] = { 0, } +VRSRA_N:76:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:77:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:78:result_uint32x2 [] = { 1, 1, } +VRSRA_N:79:result_uint64x1 [] = { 1, } +VRSRA_N:80:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:81:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:82:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:83:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:84:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:85:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:86:result_int32x4 [] = { 0, 0, 0, 0, } +VRSRA_N:87:result_int64x2 [] = { 0, 0, } +VRSRA_N:88:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:89:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:90:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:91:result_uint64x2 [] = { 1, 1, } +VRSRA_N:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:93:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:94:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N (checking overflow: shift by 1, negative input) output: +VRSRA_N:96:result_int8x8 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSRA_N:97:result_int16x4 [] = { ffffc000, ffffc000, ffffc000, ffffc000, } +VRSRA_N:98:result_int32x2 [] = { c0000000, c0000000, } +VRSRA_N:99:result_int64x1 [] = { c000000000000000, } +VRSRA_N:100:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:101:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:102:result_uint32x2 [] = { 1, 1, } +VRSRA_N:103:result_uint64x1 [] = { 1, } +VRSRA_N:104:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:105:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:106:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:107:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:108:result_int8x16 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSRA_N:109:result_int16x8 [] = { ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, } +VRSRA_N:110:result_int32x4 [] = { c0000000, c0000000, c0000000, c0000000, } +VRSRA_N:111:result_int64x2 [] = { c000000000000000, c000000000000000, } +VRSRA_N:112:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:113:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:114:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:115:result_uint64x2 [] = { 1, 1, } +VRSRA_N:116:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:118:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N (checking overflow: shift by max, negative input) output: +VRSRA_N:120:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSRA_N:121:result_int16x4 [] = { fffff000, fffff000, fffff000, fffff000, } +VRSRA_N:122:result_int32x2 [] = { f0000000, f0000000, } +VRSRA_N:123:result_int64x1 [] = { f000000000000000, } +VRSRA_N:124:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:125:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:126:result_uint32x2 [] = { 1, 1, } +VRSRA_N:127:result_uint64x1 [] = { 1, } +VRSRA_N:128:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:129:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:130:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:131:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:132:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSRA_N:133:result_int16x8 [] = { fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, } +VRSRA_N:134:result_int32x4 [] = { f0000000, f0000000, f0000000, f0000000, } +VRSRA_N:135:result_int64x2 [] = { f000000000000000, f000000000000000, } +VRSRA_N:136:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:137:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:138:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:139:result_uint64x2 [] = { 1, 1, } +VRSRA_N:140:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:141:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:142:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:143:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N (checking overflow: shift by max, negative input) output: +VRSRA_N:144:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:145:result_int16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:146:result_int32x2 [] = { 0, 0, } +VRSRA_N:147:result_int64x1 [] = { 0, } +VRSRA_N:148:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:149:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:150:result_uint32x2 [] = { 1, 1, } +VRSRA_N:151:result_uint64x1 [] = { 1, } +VRSRA_N:152:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:153:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:154:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:155:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:156:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:157:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:158:result_int32x4 [] = { 0, 0, 0, 0, } +VRSRA_N:159:result_int64x2 [] = { 0, 0, } +VRSRA_N:160:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:161:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:162:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:163:result_uint64x2 [] = { 1, 1, } +VRSRA_N:164:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:165:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:166:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:167:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHLL_N output: +VSHLL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VSHLL_N:2:result_int32x2 [] = { 33333333, 33333333, } +VSHLL_N:3:result_int64x1 [] = { 3333333333333333, } +VSHLL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VSHLL_N:6:result_uint32x2 [] = { 33333333, 33333333, } +VSHLL_N:7:result_uint64x1 [] = { 3333333333333333, } +VSHLL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHLL_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSHLL_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSHLL_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:13:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHLL_N:14:result_int32x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VSHLL_N:15:result_int64x2 [] = { ffffffffffffff80, ffffffffffffff88, } +VSHLL_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:17:result_uint16x8 [] = { 3c0, 3c4, 3c8, 3cc, 3d0, 3d4, 3d8, 3dc, } +VSHLL_N:18:result_uint32x4 [] = { fff00, fff10, fff20, fff30, } +VSHLL_N:19:result_uint64x2 [] = { 7ffffff80, 7ffffff88, } +VSHLL_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHLL_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHLL_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VPADDL/VPADDLQ output: +VPADDL/VPADDLQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:1:result_int16x4 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, } +VPADDL/VPADDLQ:2:result_int32x2 [] = { ffffffe1, ffffffe5, } +VPADDL/VPADDLQ:3:result_int64x1 [] = { ffffffffffffffe1, } +VPADDL/VPADDLQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:5:result_uint16x4 [] = { 1e1, 1e5, 1e9, 1ed, } +VPADDL/VPADDLQ:6:result_uint32x2 [] = { 1ffe1, 1ffe5, } +VPADDL/VPADDLQ:7:result_uint64x1 [] = { 1ffffffe1, } +VPADDL/VPADDLQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPADDL/VPADDLQ:10:result_float32x2 [] = { 33333333, 33333333, } +VPADDL/VPADDLQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VPADDL/VPADDLQ:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:13:result_int16x8 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, fffffff1, fffffff5, fffffff9, fffffffd, } +VPADDL/VPADDLQ:14:result_int32x4 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, } +VPADDL/VPADDLQ:15:result_int64x2 [] = { ffffffffffffffe1, ffffffffffffffe5, } +VPADDL/VPADDLQ:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:17:result_uint16x8 [] = { 1e1, 1e5, 1e9, 1ed, 1f1, 1f5, 1f9, 1fd, } +VPADDL/VPADDLQ:18:result_uint32x4 [] = { 1ffe1, 1ffe5, 1ffe9, 1ffed, } +VPADDL/VPADDLQ:19:result_uint64x2 [] = { 1ffffffe1, 1ffffffe5, } +VPADDL/VPADDLQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADDL/VPADDLQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADDL/VPADDLQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VPADD output: +VPADD:0:result_int8x8 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, ffffffe1, ffffffe5, ffffffe9, ffffffed, } +VPADD:1:result_int16x4 [] = { ffffffe1, ffffffe5, ffffffe1, ffffffe5, } +VPADD:2:result_int32x2 [] = { ffffffe1, ffffffe1, } +VPADD:3:result_int64x1 [] = { 3333333333333333, } +VPADD:4:result_uint8x8 [] = { e1, e5, e9, ed, e1, e5, e9, ed, } +VPADD:5:result_uint16x4 [] = { ffe1, ffe5, ffe1, ffe5, } +VPADD:6:result_uint32x2 [] = { ffffffe1, ffffffe1, } +VPADD:7:result_uint64x1 [] = { 3333333333333333, } +VPADD:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPADD:10:result_float32x2 [] = { c1f80000, c1f80000, } +VPADD:11:result_float16x4 [] = { 0, 0, 0, 0, } +VPADD:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADD:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADD:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VPADD:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADD:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADD:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VPADD:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADD:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADD:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VPADAL/VPADALQ output: +VPADAL/VPADALQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:1:result_int16x4 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, } +VPADAL/VPADALQ:2:result_int32x2 [] = { ffffffd1, ffffffd6, } +VPADAL/VPADALQ:3:result_int64x1 [] = { ffffffffffffffd1, } +VPADAL/VPADALQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:5:result_uint16x4 [] = { 1d1, 1d6, 1db, 1e0, } +VPADAL/VPADALQ:6:result_uint32x2 [] = { 1ffd1, 1ffd6, } +VPADAL/VPADALQ:7:result_uint64x1 [] = { 1ffffffd1, } +VPADAL/VPADALQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPADAL/VPADALQ:10:result_float32x2 [] = { 33333333, 33333333, } +VPADAL/VPADALQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VPADAL/VPADALQ:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:13:result_int16x8 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, ffffffe5, ffffffea, ffffffef, fffffff4, } +VPADAL/VPADALQ:14:result_int32x4 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, } +VPADAL/VPADALQ:15:result_int64x2 [] = { ffffffffffffffd1, ffffffffffffffd6, } +VPADAL/VPADALQ:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:17:result_uint16x8 [] = { 1d1, 1d6, 1db, 1e0, 1e5, 1ea, 1ef, 1f4, } +VPADAL/VPADALQ:18:result_uint32x4 [] = { 1ffd1, 1ffd6, 1ffdb, 1ffe0, } +VPADAL/VPADALQ:19:result_uint64x2 [] = { 1ffffffd1, 1ffffffd6, } +VPADAL/VPADALQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADAL/VPADALQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADAL/VPADALQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHLU_N/VQSHLUQ_N (negative input) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:0:vqshlu_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:1:vqshlu_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:2:vqshlu_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:3:vqshlu_n_s64 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:4:vqshluq_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:5:vqshluq_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:6:vqshluq_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:7:vqshluq_n_s64 Neon cumulative saturation 1 + +VQSHLU_N/VQSHLUQ_N (negative input) output: +VQSHLU_N/VQSHLUQ_N:8:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:9:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:10:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:11:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:12:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:13:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:14:result_uint32x2 [] = { 0, 0, } +VQSHLU_N/VQSHLUQ_N:15:result_uint64x1 [] = { 0, } +VQSHLU_N/VQSHLUQ_N:16:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:17:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:18:result_float32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:19:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:20:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:21:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:22:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:23:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:24:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:25:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:26:result_uint32x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:27:result_uint64x2 [] = { 0, 0, } +VQSHLU_N/VQSHLUQ_N:28:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:29:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:30:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:31:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:32:vqshlu_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:33:vqshlu_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:34:vqshlu_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:35:vqshlu_n_s64 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:36:vqshluq_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:37:vqshluq_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:38:vqshluq_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:39:vqshluq_n_s64 Neon cumulative saturation 0 + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) output: +VQSHLU_N/VQSHLUQ_N:40:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:41:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:42:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:43:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:44:result_uint8x8 [] = { fe, fe, fe, fe, fe, fe, fe, fe, } +VQSHLU_N/VQSHLUQ_N:45:result_uint16x4 [] = { fffe, fffe, fffe, fffe, } +VQSHLU_N/VQSHLUQ_N:46:result_uint32x2 [] = { fffffffe, fffffffe, } +VQSHLU_N/VQSHLUQ_N:47:result_uint64x1 [] = { fffffffffffffffe, } +VQSHLU_N/VQSHLUQ_N:48:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:49:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:50:result_float32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:51:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:52:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:53:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:54:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:55:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:56:result_uint8x16 [] = { fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, } +VQSHLU_N/VQSHLUQ_N:57:result_uint16x8 [] = { fffe, fffe, fffe, fffe, fffe, fffe, fffe, fffe, } +VQSHLU_N/VQSHLUQ_N:58:result_uint32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } +VQSHLU_N/VQSHLUQ_N:59:result_uint64x2 [] = { fffffffffffffffe, fffffffffffffffe, } +VQSHLU_N/VQSHLUQ_N:60:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:61:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:62:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:63:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:64:vqshlu_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:65:vqshlu_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:66:vqshlu_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:67:vqshlu_n_s64 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:68:vqshluq_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:69:vqshluq_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:70:vqshluq_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:71:vqshluq_n_s64 Neon cumulative saturation 1 + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) output: +VQSHLU_N/VQSHLUQ_N:72:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:73:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:74:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:75:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:76:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHLU_N/VQSHLUQ_N:77:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHLU_N/VQSHLUQ_N:78:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHLU_N/VQSHLUQ_N:79:result_uint64x1 [] = { ffffffffffffffff, } +VQSHLU_N/VQSHLUQ_N:80:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:81:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:82:result_float32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:83:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:84:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:85:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:86:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:87:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:88:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHLU_N/VQSHLUQ_N:89:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHLU_N/VQSHLUQ_N:90:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHLU_N/VQSHLUQ_N:91:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHLU_N/VQSHLUQ_N:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:93:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:94:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHLU_N/VQSHLUQ_N cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:96:vqshlu_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:97:vqshlu_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:98:vqshlu_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:99:vqshlu_n_s64 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:100:vqshluq_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:101:vqshluq_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:102:vqshluq_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:103:vqshluq_n_s64 Neon cumulative saturation 0 + +VQSHLU_N/VQSHLUQ_N output: +VQSHLU_N/VQSHLUQ_N:104:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:105:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:106:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:107:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:108:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VQSHLU_N/VQSHLUQ_N:109:result_uint16x4 [] = { 8, 8, 8, 8, } +VQSHLU_N/VQSHLUQ_N:110:result_uint32x2 [] = { 18, 18, } +VQSHLU_N/VQSHLUQ_N:111:result_uint64x1 [] = { 40, } +VQSHLU_N/VQSHLUQ_N:112:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:113:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:114:result_float32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:115:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:116:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:117:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:118:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:119:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:120:result_uint8x16 [] = { a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, } +VQSHLU_N/VQSHLUQ_N:121:result_uint16x8 [] = { 180, 180, 180, 180, 180, 180, 180, 180, } +VQSHLU_N/VQSHLUQ_N:122:result_uint32x4 [] = { 380, 380, 380, 380, } +VQSHLU_N/VQSHLUQ_N:123:result_uint64x2 [] = { 800, 800, } +VQSHLU_N/VQSHLUQ_N:124:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:125:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:126:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:127:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCLZ/VCLZQ output: +VCLZ/VCLZQ:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VCLZ/VCLZQ:1:result_int16x4 [] = { 3, 3, 3, 3, } +VCLZ/VCLZQ:2:result_int32x2 [] = { 11, 11, } +VCLZ/VCLZQ:3:result_int64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VCLZ/VCLZQ:5:result_uint16x4 [] = { 0, 0, 0, 0, } +VCLZ/VCLZQ:6:result_uint32x2 [] = { 5, 5, } +VCLZ/VCLZQ:7:result_uint64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLZ/VCLZQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCLZ/VCLZQ:10:result_float32x2 [] = { 33333333, 33333333, } +VCLZ/VCLZQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VCLZ/VCLZQ:12:result_int8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, } +VCLZ/VCLZQ:13:result_int16x8 [] = { 3, 3, 3, 3, 3, 3, 3, 3, } +VCLZ/VCLZQ:14:result_int32x4 [] = { 3, 3, 3, 3, } +VCLZ/VCLZQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:16:result_uint8x16 [] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, } +VCLZ/VCLZQ:17:result_uint16x8 [] = { d, d, d, d, d, d, d, d, } +VCLZ/VCLZQ:18:result_uint32x4 [] = { 1f, 1f, 1f, 1f, } +VCLZ/VCLZQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLZ/VCLZQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLZ/VCLZQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLZ/VCLZQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCLZ/VCLZQ (input=0) output: +VCLZ/VCLZQ:24:result_int8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8, } +VCLZ/VCLZQ:25:result_int16x4 [] = { 10, 10, 10, 10, } +VCLZ/VCLZQ:26:result_int32x2 [] = { 20, 20, } +VCLZ/VCLZQ:27:result_int64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:28:result_uint8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8, } +VCLZ/VCLZQ:29:result_uint16x4 [] = { 10, 10, 10, 10, } +VCLZ/VCLZQ:30:result_uint32x2 [] = { 20, 20, } +VCLZ/VCLZQ:31:result_uint64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLZ/VCLZQ:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCLZ/VCLZQ:34:result_float32x2 [] = { 33333333, 33333333, } +VCLZ/VCLZQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VCLZ/VCLZQ:36:result_int8x16 [] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, } +VCLZ/VCLZQ:37:result_int16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VCLZ/VCLZQ:38:result_int32x4 [] = { 20, 20, 20, 20, } +VCLZ/VCLZQ:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:40:result_uint8x16 [] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, } +VCLZ/VCLZQ:41:result_uint16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VCLZ/VCLZQ:42:result_uint32x4 [] = { 20, 20, 20, 20, } +VCLZ/VCLZQ:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLZ/VCLZQ:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLZ/VCLZQ:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLZ/VCLZQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCLS/VCLSQ (positive input) output: +VCLS/VCLSQ:0:result_int8x8 [] = { 6, 6, 6, 6, 6, 6, 6, 6, } +VCLS/VCLSQ:1:result_int16x4 [] = { 2, 2, 2, 2, } +VCLS/VCLSQ:2:result_int32x2 [] = { 19, 19, } +VCLS/VCLSQ:3:result_int64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:7:result_uint64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:10:result_float32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VCLS/VCLSQ:12:result_int8x16 [] = { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, } +VCLS/VCLSQ:13:result_int16x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VCLS/VCLSQ:14:result_int32x4 [] = { 14, 14, 14, 14, } +VCLS/VCLSQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLS/VCLSQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLS/VCLSQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCLS/VCLSQ (negative input) output: +VCLS/VCLSQ:24:result_int8x8 [] = { 7, 7, 7, 7, 7, 7, 7, 7, } +VCLS/VCLSQ:25:result_int16x4 [] = { 1, 1, 1, 1, } +VCLS/VCLSQ:26:result_int32x2 [] = { 1, 1, } +VCLS/VCLSQ:27:result_int64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:28:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:29:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:30:result_uint32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:31:result_uint64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:34:result_float32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VCLS/VCLSQ:36:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCLS/VCLSQ:37:result_int16x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VCLS/VCLSQ:38:result_int32x4 [] = { 0, 0, 0, 0, } +VCLS/VCLSQ:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:41:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:42:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLS/VCLSQ:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLS/VCLSQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCNT/VCNTQ output: +VCNT/VCNTQ:0:result_int8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8, } +VCNT/VCNTQ:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:2:result_int32x2 [] = { 33333333, 33333333, } +VCNT/VCNTQ:3:result_int64x1 [] = { 3333333333333333, } +VCNT/VCNTQ:4:result_uint8x8 [] = { 4, 4, 4, 4, 4, 4, 4, 4, } +VCNT/VCNTQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VCNT/VCNTQ:7:result_uint64x1 [] = { 3333333333333333, } +VCNT/VCNTQ:8:result_poly8x8 [] = { 4, 4, 4, 4, 4, 4, 4, 4, } +VCNT/VCNTQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:10:result_float32x2 [] = { 33333333, 33333333, } +VCNT/VCNTQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VCNT/VCNTQ:12:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCNT/VCNTQ:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCNT/VCNTQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCNT/VCNTQ:16:result_uint8x16 [] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, } +VCNT/VCNTQ:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCNT/VCNTQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCNT/VCNTQ:20:result_poly8x16 [] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, } +VCNT/VCNTQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCNT/VCNTQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHRN_N cumulative saturation output: +VQSHRN_N:0:vqshrn_n_s16 Neon cumulative saturation 0 +VQSHRN_N:1:vqshrn_n_s32 Neon cumulative saturation 0 +VQSHRN_N:2:vqshrn_n_s64 Neon cumulative saturation 0 +VQSHRN_N:3:vqshrn_n_u16 Neon cumulative saturation 1 +VQSHRN_N:4:vqshrn_n_u32 Neon cumulative saturation 1 +VQSHRN_N:5:vqshrn_n_u64 Neon cumulative saturation 1 + +VQSHRN_N output: +VQSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VQSHRN_N:7:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9, } +VQSHRN_N:8:result_int32x2 [] = { fffffffc, fffffffc, } +VQSHRN_N:9:result_int64x1 [] = { 3333333333333333, } +VQSHRN_N:10:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRN_N:11:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRN_N:12:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRN_N:13:result_uint64x1 [] = { 3333333333333333, } +VQSHRN_N:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRN_N:16:result_float32x2 [] = { 33333333, 33333333, } +VQSHRN_N:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHRN_N:18:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:19:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:20:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHRN_N (check saturation: shift by 3) cumulative saturation output: +VQSHRN_N:30:vqshrn_n_s16 Neon cumulative saturation 1 +VQSHRN_N:31:vqshrn_n_s32 Neon cumulative saturation 1 +VQSHRN_N:32:vqshrn_n_s64 Neon cumulative saturation 1 +VQSHRN_N:33:vqshrn_n_u16 Neon cumulative saturation 1 +VQSHRN_N:34:vqshrn_n_u32 Neon cumulative saturation 1 +VQSHRN_N:35:vqshrn_n_u64 Neon cumulative saturation 1 + +VQSHRN_N (check saturation: shift by 3) output: +VQSHRN_N:36:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHRN_N:37:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHRN_N:38:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHRN_N:39:result_int64x1 [] = { 3333333333333333, } +VQSHRN_N:40:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRN_N:41:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRN_N:42:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRN_N:43:result_uint64x1 [] = { 3333333333333333, } +VQSHRN_N:44:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:45:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRN_N:46:result_float32x2 [] = { 33333333, 33333333, } +VQSHRN_N:47:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHRN_N:48:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:49:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:50:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:51:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:52:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:53:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:54:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:55:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:56:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:57:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:58:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:59:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHRN_N (check saturation: shift by max) cumulative saturation output: +VQSHRN_N:60:vqshrn_n_s16 Neon cumulative saturation 0 +VQSHRN_N:61:vqshrn_n_s32 Neon cumulative saturation 0 +VQSHRN_N:62:vqshrn_n_s64 Neon cumulative saturation 0 +VQSHRN_N:63:vqshrn_n_u16 Neon cumulative saturation 0 +VQSHRN_N:64:vqshrn_n_u32 Neon cumulative saturation 0 +VQSHRN_N:65:vqshrn_n_u64 Neon cumulative saturation 0 + +VQSHRN_N (check saturation: shift by max) output: +VQSHRN_N:66:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHRN_N:67:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHRN_N:68:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHRN_N:69:result_int64x1 [] = { 3333333333333333, } +VQSHRN_N:70:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRN_N:71:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRN_N:72:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRN_N:73:result_uint64x1 [] = { 3333333333333333, } +VQSHRN_N:74:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:75:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRN_N:76:result_float32x2 [] = { 33333333, 33333333, } +VQSHRN_N:77:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHRN_N:78:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:79:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:80:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:81:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:82:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:83:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:84:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:85:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:86:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:87:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:88:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:89:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VPMAX output: +VPMAX:0:result_int8x8 [] = { fffffff1, fffffff3, fffffff5, fffffff7, fffffff1, fffffff3, fffffff5, fffffff7, } +VPMAX:1:result_int16x4 [] = { fffffff1, fffffff3, fffffff1, fffffff3, } +VPMAX:2:result_int32x2 [] = { fffffff1, fffffff1, } +VPMAX:3:result_int64x1 [] = { 3333333333333333, } +VPMAX:4:result_uint8x8 [] = { f1, f3, f5, f7, f1, f3, f5, f7, } +VPMAX:5:result_uint16x4 [] = { fff1, fff3, fff1, fff3, } +VPMAX:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VPMAX:7:result_uint64x1 [] = { 3333333333333333, } +VPMAX:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPMAX:10:result_float32x2 [] = { c1700000, c1700000, } +VPMAX:11:result_float16x4 [] = { 0, 0, 0, 0, } +VPMAX:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMAX:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMAX:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VPMAX:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMAX:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMAX:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VPMAX:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMAX:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMAX:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VPMIN output: +VPMIN:0:result_int8x8 [] = { fffffff0, fffffff2, fffffff4, fffffff6, fffffff0, fffffff2, fffffff4, fffffff6, } +VPMIN:1:result_int16x4 [] = { fffffff0, fffffff2, fffffff0, fffffff2, } +VPMIN:2:result_int32x2 [] = { fffffff0, fffffff0, } +VPMIN:3:result_int64x1 [] = { 3333333333333333, } +VPMIN:4:result_uint8x8 [] = { f0, f2, f4, f6, f0, f2, f4, f6, } +VPMIN:5:result_uint16x4 [] = { fff0, fff2, fff0, fff2, } +VPMIN:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VPMIN:7:result_uint64x1 [] = { 3333333333333333, } +VPMIN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPMIN:10:result_float32x2 [] = { c1800000, c1800000, } +VPMIN:11:result_float16x4 [] = { 0, 0, 0, 0, } +VPMIN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMIN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMIN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VPMIN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMIN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMIN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VPMIN:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMIN:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMIN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHRUN_N (negative input) cumulative saturation output: +VQSHRUN_N:0:vqshrun_n_s16 Neon cumulative saturation 1 +VQSHRUN_N:1:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:2:vqshrun_n_s64 Neon cumulative saturation 1 + +VQSHRUN_N (negative input) output: +VQSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:4:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:5:result_int32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:6:result_int64x1 [] = { 3333333333333333, } +VQSHRUN_N:7:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHRUN_N:8:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:9:result_uint32x2 [] = { 0, 0, } +VQSHRUN_N:10:result_uint64x1 [] = { 3333333333333333, } +VQSHRUN_N:11:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:12:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:13:result_float32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:14:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:16:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:17:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:25:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:26:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHRUN_N (check cumulative saturation) cumulative saturation output: +VQSHRUN_N:27:vqshrun_n_s16 Neon cumulative saturation 1 +VQSHRUN_N:28:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:29:vqshrun_n_s64 Neon cumulative saturation 1 + +VQSHRUN_N (check cumulative saturation) output: +VQSHRUN_N:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:31:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:32:result_int32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:33:result_int64x1 [] = { 3333333333333333, } +VQSHRUN_N:34:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRUN_N:35:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRUN_N:36:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRUN_N:37:result_uint64x1 [] = { 3333333333333333, } +VQSHRUN_N:38:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:39:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:40:result_float32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:41:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:42:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:43:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:44:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:45:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:46:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:47:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:48:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:49:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:50:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:51:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:52:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:53:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHRUN_N cumulative saturation output: +VQSHRUN_N:54:vqshrun_n_s16 Neon cumulative saturation 0 +VQSHRUN_N:55:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:56:vqshrun_n_s64 Neon cumulative saturation 0 + +VQSHRUN_N output: +VQSHRUN_N:57:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:58:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:59:result_int32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:60:result_int64x1 [] = { 3333333333333333, } +VQSHRUN_N:61:result_uint8x8 [] = { 48, 48, 48, 48, 48, 48, 48, 48, } +VQSHRUN_N:62:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:63:result_uint32x2 [] = { deadbe, deadbe, } +VQSHRUN_N:64:result_uint64x1 [] = { 3333333333333333, } +VQSHRUN_N:65:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:66:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:67:result_float32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:68:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:69:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:70:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:71:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:72:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:73:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:74:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:75:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:76:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:77:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:78:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:79:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:80:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRUN_N (negative input) cumulative saturation output: +VQRSHRUN_N:0:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:1:vqrshrun_n_s32 Neon cumulative saturation 0 +VQRSHRUN_N:2:vqrshrun_n_s64 Neon cumulative saturation 1 + +VQRSHRUN_N (negative input) output: +VQRSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:4:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:5:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:6:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:7:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHRUN_N:8:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:9:result_uint32x2 [] = { 0, 0, } +VQRSHRUN_N:10:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:11:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:12:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:13:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:14:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:16:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:17:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:25:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:26:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRUN_N (check cumulative saturation: shift by 1) cumulative saturation output: +VQRSHRUN_N:27:vqrshrun_n_s16 Neon cumulative saturation 1 +VQRSHRUN_N:28:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:29:vqrshrun_n_s64 Neon cumulative saturation 1 + +VQRSHRUN_N (check cumulative saturation: shift by 1) output: +VQRSHRUN_N:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:31:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:32:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:33:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:34:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRUN_N:35:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRUN_N:36:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRUN_N:37:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:38:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:39:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:40:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:41:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:42:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:43:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:44:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:45:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:46:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:47:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:48:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:49:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:50:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:51:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:52:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:53:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) cumulative saturation output: +VQRSHRUN_N:54:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:55:vqrshrun_n_s32 Neon cumulative saturation 0 +VQRSHRUN_N:56:vqrshrun_n_s64 Neon cumulative saturation 0 + +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) output: +VQRSHRUN_N:57:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:58:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:59:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:60:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:61:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VQRSHRUN_N:62:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VQRSHRUN_N:63:result_uint32x2 [] = { 80000000, 80000000, } +VQRSHRUN_N:64:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:65:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:66:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:67:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:68:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:69:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:70:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:71:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:72:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:73:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:74:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:75:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:76:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:77:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:78:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:79:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:80:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) cumulative saturation output: +VQRSHRUN_N:81:vqrshrun_n_s16 Neon cumulative saturation 1 +VQRSHRUN_N:82:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:83:vqrshrun_n_s64 Neon cumulative saturation 1 + +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) output: +VQRSHRUN_N:84:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:85:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:86:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:87:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:88:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHRUN_N:89:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:90:result_uint32x2 [] = { 0, 0, } +VQRSHRUN_N:91:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:92:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:93:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:94:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:95:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:96:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:97:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:98:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:99:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:100:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:101:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:102:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:103:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:104:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:105:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:106:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:107:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRUN_N cumulative saturation output: +VQRSHRUN_N:108:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:109:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:110:vqrshrun_n_s64 Neon cumulative saturation 0 + +VQRSHRUN_N output: +VQRSHRUN_N:111:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:112:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:113:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:114:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:115:result_uint8x8 [] = { 49, 49, 49, 49, 49, 49, 49, 49, } +VQRSHRUN_N:116:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:117:result_uint32x2 [] = { deadbf, deadbf, } +VQRSHRUN_N:118:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:119:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:120:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:121:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:122:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:123:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:124:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:125:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:126:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:127:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:128:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:129:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:130:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:131:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:132:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:133:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:134:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VST2_LANE/VST2Q_LANE chunk 0 output: +VST2_LANE/VST2Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, 0, 0, } +VST2_LANE/VST2Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VST2_LANE/VST2Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:4:result_uint8x8 [] = { f0, f1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:5:result_uint16x4 [] = { fff0, fff1, 0, 0, } +VST2_LANE/VST2Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VST2_LANE/VST2Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:8:result_poly8x8 [] = { f0, f1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:9:result_poly16x4 [] = { fff0, fff1, 0, 0, } +VST2_LANE/VST2Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VST2_LANE/VST2Q_LANE:11:result_float16x4 [] = { cc00, cb80, 0, 0, } +VST2_LANE/VST2Q_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:13:result_int16x8 [] = { fffffff0, fffffff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:14:result_int32x4 [] = { fffffff0, fffffff1, 0, 0, } +VST2_LANE/VST2Q_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:17:result_uint16x8 [] = { fff0, fff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:18:result_uint32x4 [] = { fffffff0, fffffff1, 0, 0, } +VST2_LANE/VST2Q_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:21:result_poly16x8 [] = { fff0, fff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:22:result_float32x4 [] = { c1800000, c1700000, 0, 0, } +VST2_LANE/VST2Q_LANE:23:result_float16x8 [] = { cc00, cb80, 0, 0, 0, 0, 0, 0, } + +VST2_LANE/VST2Q_LANE chunk 1 output: +VST2_LANE/VST2Q_LANE:24:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:25:result_int16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:26:result_int32x2 [] = { 0, 0, } +VST2_LANE/VST2Q_LANE:27:result_int64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:28:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:29:result_uint16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:30:result_uint32x2 [] = { 0, 0, } +VST2_LANE/VST2Q_LANE:31:result_uint64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:32:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:33:result_poly16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:34:result_float32x2 [] = { 0, 0, } +VST2_LANE/VST2Q_LANE:35:result_float16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:37:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:38:result_int32x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:41:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:42:result_uint32x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:45:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:46:result_float32x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VST3_LANE/VST3Q_LANE chunk 0 output: +VST3_LANE/VST3Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, 0, } +VST3_LANE/VST3Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VST3_LANE/VST3Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:4:result_uint8x8 [] = { f0, f1, f2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:5:result_uint16x4 [] = { fff0, fff1, fff2, 0, } +VST3_LANE/VST3Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VST3_LANE/VST3Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:8:result_poly8x8 [] = { f0, f1, f2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:9:result_poly16x4 [] = { fff0, fff1, fff2, 0, } +VST3_LANE/VST3Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VST3_LANE/VST3Q_LANE:11:result_float16x4 [] = { cc00, cb80, cb00, 0, } +VST3_LANE/VST3Q_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, 0, } +VST3_LANE/VST3Q_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:17:result_uint16x8 [] = { fff0, fff1, fff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, 0, } +VST3_LANE/VST3Q_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:21:result_poly16x8 [] = { fff0, fff1, fff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:22:result_float32x4 [] = { c1800000, c1700000, c1600000, 0, } +VST3_LANE/VST3Q_LANE:23:result_float16x8 [] = { cc00, cb80, cb00, 0, 0, 0, 0, 0, } + +VST3_LANE/VST3Q_LANE chunk 1 output: +VST3_LANE/VST3Q_LANE:24:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:25:result_int16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:26:result_int32x2 [] = { fffffff2, 0, } +VST3_LANE/VST3Q_LANE:27:result_int64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:28:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:29:result_uint16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:30:result_uint32x2 [] = { fffffff2, 0, } +VST3_LANE/VST3Q_LANE:31:result_uint64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:32:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:33:result_poly16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:34:result_float32x2 [] = { c1600000, 0, } +VST3_LANE/VST3Q_LANE:35:result_float16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:37:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:38:result_int32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:41:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:42:result_uint32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:45:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:46:result_float32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VST3_LANE/VST3Q_LANE chunk 2 output: +VST3_LANE/VST3Q_LANE:48:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:49:result_int16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:50:result_int32x2 [] = { 0, 0, } +VST3_LANE/VST3Q_LANE:51:result_int64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:52:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:53:result_uint16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:54:result_uint32x2 [] = { 0, 0, } +VST3_LANE/VST3Q_LANE:55:result_uint64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:56:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:57:result_poly16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:58:result_float32x2 [] = { 0, 0, } +VST3_LANE/VST3Q_LANE:59:result_float16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:61:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:62:result_int32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:65:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:66:result_uint32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:69:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:70:result_float32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VST4_LANE/VST4Q_LANE chunk 0 output: +VST4_LANE/VST4Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VST4_LANE/VST4Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:4:result_uint8x8 [] = { f0, f1, f2, f3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VST4_LANE/VST4Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VST4_LANE/VST4Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:8:result_poly8x8 [] = { f0, f1, f2, f3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VST4_LANE/VST4Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VST4_LANE/VST4Q_LANE:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VST4_LANE/VST4Q_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:22:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VST4_LANE/VST4Q_LANE:23:result_float16x8 [] = { cc00, cb80, cb00, ca80, 0, 0, 0, 0, } + +VST4_LANE/VST4Q_LANE chunk 1 output: +VST4_LANE/VST4Q_LANE:24:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:25:result_int16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:26:result_int32x2 [] = { fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:27:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:28:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:29:result_uint16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:30:result_uint32x2 [] = { fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:31:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:32:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:33:result_poly16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:34:result_float32x2 [] = { c1600000, c1500000, } +VST4_LANE/VST4Q_LANE:35:result_float16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:37:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:38:result_int32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:41:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:42:result_uint32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:45:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:46:result_float32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VST4_LANE/VST4Q_LANE chunk 2 output: +VST4_LANE/VST4Q_LANE:48:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:49:result_int16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:50:result_int32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:51:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:52:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:53:result_uint16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:54:result_uint32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:55:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:56:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:57:result_poly16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:58:result_float32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:59:result_float16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:61:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:62:result_int32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:65:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:66:result_uint32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:69:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:70:result_float32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VST4_LANE/VST4Q_LANE chunk 3 output: +VST4_LANE/VST4Q_LANE:72:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:73:result_int16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:74:result_int32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:75:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:76:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:77:result_uint16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:78:result_uint32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:79:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:80:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:81:result_poly16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:82:result_float32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:83:result_float16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:84:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:85:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:86:result_int32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:87:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:88:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:89:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:90:result_uint32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:91:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:93:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:94:result_float32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBL1 output: +VTBL1:0:result_int8x8 [] = { 0, fffffff2, fffffff2, fffffff2, 0, 0, fffffff2, fffffff2, } +VTBL1:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL1:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL1:3:result_int64x1 [] = { 3333333333333333, } +VTBL1:4:result_uint8x8 [] = { 0, f3, f3, f3, 0, 0, f3, f3, } +VTBL1:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL1:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL1:7:result_uint64x1 [] = { 3333333333333333, } +VTBL1:8:result_poly8x8 [] = { 0, f3, f3, f3, 0, 0, f3, f3, } +VTBL1:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL1:10:result_float32x2 [] = { 33333333, 33333333, } +VTBL1:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBL1:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL1:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL1:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL1:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL1:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL1:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL1:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL1:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL1:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL1:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL1:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL1:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBL2 output: +VTBL2:0:result_int8x8 [] = { fffffff6, fffffff3, fffffff3, fffffff3, 0, 0, fffffff3, fffffff3, } +VTBL2:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL2:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL2:3:result_int64x1 [] = { 3333333333333333, } +VTBL2:4:result_uint8x8 [] = { f6, f5, f5, f5, 0, 0, f5, f5, } +VTBL2:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL2:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL2:7:result_uint64x1 [] = { 3333333333333333, } +VTBL2:8:result_poly8x8 [] = { f6, f5, f5, f5, 0, 0, f5, f5, } +VTBL2:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL2:10:result_float32x2 [] = { 33333333, 33333333, } +VTBL2:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBL2:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL2:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL2:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL2:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL2:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL2:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL2:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL2:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL2:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL2:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL2:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL2:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBL3 output: +VTBL3:0:result_int8x8 [] = { fffffff8, fffffff4, fffffff4, fffffff4, ffffffff, 0, fffffff4, fffffff4, } +VTBL3:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL3:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL3:3:result_int64x1 [] = { 3333333333333333, } +VTBL3:4:result_uint8x8 [] = { f8, f7, f7, f7, ff, 0, f7, f7, } +VTBL3:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL3:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL3:7:result_uint64x1 [] = { 3333333333333333, } +VTBL3:8:result_poly8x8 [] = { f8, f7, f7, f7, ff, 0, f7, f7, } +VTBL3:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL3:10:result_float32x2 [] = { 33333333, 33333333, } +VTBL3:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBL3:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL3:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL3:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL3:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL3:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL3:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL3:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL3:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL3:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL3:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL3:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL3:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBL4 output: +VTBL4:0:result_int8x8 [] = { fffffffa, fffffff5, fffffff5, fffffff5, 3, 0, fffffff5, fffffff5, } +VTBL4:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL4:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL4:3:result_int64x1 [] = { 3333333333333333, } +VTBL4:4:result_uint8x8 [] = { fa, f9, f9, f9, 3, 0, f9, f9, } +VTBL4:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL4:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL4:7:result_uint64x1 [] = { 3333333333333333, } +VTBL4:8:result_poly8x8 [] = { fa, f9, f9, f9, 3, 0, f9, f9, } +VTBL4:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL4:10:result_float32x2 [] = { 33333333, 33333333, } +VTBL4:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBL4:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL4:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL4:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL4:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL4:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL4:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL4:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL4:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL4:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL4:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL4:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL4:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBX1 output: +VTBX1:0:result_int8x8 [] = { 33, fffffff2, fffffff2, fffffff2, 33, 33, fffffff2, fffffff2, } +VTBX1:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX1:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX1:3:result_int64x1 [] = { 3333333333333333, } +VTBX1:4:result_uint8x8 [] = { cc, f3, f3, f3, cc, cc, f3, f3, } +VTBX1:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX1:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX1:7:result_uint64x1 [] = { 3333333333333333, } +VTBX1:8:result_poly8x8 [] = { cc, f3, f3, f3, cc, cc, f3, f3, } +VTBX1:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX1:10:result_float32x2 [] = { 33333333, 33333333, } +VTBX1:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBX1:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX1:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX1:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX1:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX1:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX1:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX1:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX1:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX1:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX1:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX1:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX1:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBX2 output: +VTBX2:0:result_int8x8 [] = { fffffff6, fffffff3, fffffff3, fffffff3, 33, 33, fffffff3, fffffff3, } +VTBX2:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX2:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX2:3:result_int64x1 [] = { 3333333333333333, } +VTBX2:4:result_uint8x8 [] = { f6, f5, f5, f5, cc, cc, f5, f5, } +VTBX2:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX2:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX2:7:result_uint64x1 [] = { 3333333333333333, } +VTBX2:8:result_poly8x8 [] = { f6, f5, f5, f5, cc, cc, f5, f5, } +VTBX2:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX2:10:result_float32x2 [] = { 33333333, 33333333, } +VTBX2:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBX2:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX2:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX2:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX2:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX2:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX2:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX2:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX2:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX2:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX2:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX2:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX2:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBX3 output: +VTBX3:0:result_int8x8 [] = { fffffff8, fffffff4, fffffff4, fffffff4, ffffffff, 33, fffffff4, fffffff4, } +VTBX3:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX3:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX3:3:result_int64x1 [] = { 3333333333333333, } +VTBX3:4:result_uint8x8 [] = { f8, f7, f7, f7, ff, cc, f7, f7, } +VTBX3:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX3:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX3:7:result_uint64x1 [] = { 3333333333333333, } +VTBX3:8:result_poly8x8 [] = { f8, f7, f7, f7, ff, cc, f7, f7, } +VTBX3:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX3:10:result_float32x2 [] = { 33333333, 33333333, } +VTBX3:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBX3:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX3:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX3:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX3:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX3:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX3:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX3:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX3:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX3:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX3:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX3:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX3:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBX4 output: +VTBX4:0:result_int8x8 [] = { fffffffa, fffffff5, fffffff5, fffffff5, 3, 33, fffffff5, fffffff5, } +VTBX4:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX4:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX4:3:result_int64x1 [] = { 3333333333333333, } +VTBX4:4:result_uint8x8 [] = { fa, f9, f9, f9, 3, cc, f9, f9, } +VTBX4:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX4:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX4:7:result_uint64x1 [] = { 3333333333333333, } +VTBX4:8:result_poly8x8 [] = { fa, f9, f9, f9, 3, cc, f9, f9, } +VTBX4:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX4:10:result_float32x2 [] = { 33333333, 33333333, } +VTBX4:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBX4:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX4:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX4:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX4:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX4:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX4:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX4:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX4:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX4:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX4:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX4:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX4:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRECPE/VRECPEQ (positive input) output: +VRECPE/VRECPEQ:0:result_uint32x2 [] = { ffffffff, ffffffff, } +VRECPE/VRECPEQ:1:result_uint32x4 [] = { bf000000, bf000000, bf000000, bf000000, } +VRECPE/VRECPEQ:2:result_float32x2 [] = { 3f068000, 3f068000, } +VRECPE/VRECPEQ:3:result_float32x4 [] = { 3c030000, 3c030000, 3c030000, 3c030000, } + +VRECPE/VRECPEQ (negative input) output: +VRECPE/VRECPEQ:4:result_uint32x2 [] = { 80000000, 80000000, } +VRECPE/VRECPEQ:5:result_uint32x4 [] = { ee800000, ee800000, ee800000, ee800000, } +VRECPE/VRECPEQ:6:result_float32x2 [] = { bdcc8000, bdcc8000, } +VRECPE/VRECPEQ:7:result_float32x4 [] = { bc030000, bc030000, bc030000, bc030000, } + +VRECPE/VRECPEQ FP special (NaN, infinity) output: +VRECPE/VRECPEQ:8:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRECPE/VRECPEQ:9:result_float32x4 [] = { 0, 0, 0, 0, } + +VRECPE/VRECPEQ FP special (zero, large value) output: +VRECPE/VRECPEQ:10:result_float32x2 [] = { 7f800000, 7f800000, } +VRECPE/VRECPEQ:11:result_float32x4 [] = { 0, 0, 0, 0, } + +VRECPE/VRECPEQ FP special (-0, -infinity) output: +VRECPE/VRECPEQ:12:result_float32x2 [] = { ff800000, ff800000, } +VRECPE/VRECPEQ:13:result_float32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VRECPE/VRECPEQ FP special (large negative value) output: +VRECPE/VRECPEQ:14:result_float32x2 [] = { 80000000, 80000000, } + +VRSQRTE/VRSQRTEQ output: +VRSQRTE/VRSQRTEQ:0:result_uint32x2 [] = { ffffffff, ffffffff, } +VRSQRTE/VRSQRTEQ:1:result_uint32x4 [] = { 9c800000, 9c800000, 9c800000, 9c800000, } +VRSQRTE/VRSQRTEQ:2:result_float32x2 [] = { 3e498000, 3e498000, } +VRSQRTE/VRSQRTEQ:3:result_float32x4 [] = { 3e700000, 3e700000, 3e700000, 3e700000, } + +VRSQRTE/VRSQRTEQ (2) output: +VRSQRTE/VRSQRTEQ:4:result_uint32x2 [] = { 80000000, 80000000, } +VRSQRTE/VRSQRTEQ:5:result_uint32x4 [] = { ae800000, ae800000, ae800000, ae800000, } + +VRSQRTE/VRSQRTEQ (3) output: +VRSQRTE/VRSQRTEQ:6:result_uint32x2 [] = { b4800000, b4800000, } +VRSQRTE/VRSQRTEQ:7:result_uint32x4 [] = { ed000000, ed000000, ed000000, ed000000, } + +VRSQRTE/VRSQRTEQ FP special (NaN, 0) output: +VRSQRTE/VRSQRTEQ:8:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRSQRTE/VRSQRTEQ:9:result_float32x4 [] = { 7f800000, 7f800000, 7f800000, 7f800000, } + +VRSQRTE/VRSQRTEQ FP special (negative, infinity) output: +VRSQRTE/VRSQRTEQ:10:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRSQRTE/VRSQRTEQ:11:result_float32x4 [] = { 0, 0, 0, 0, } + +VRSQRTE/VRSQRTEQ FP special (-0, -infinity) output: +VRSQRTE/VRSQRTEQ:12:result_float32x2 [] = { ff800000, ff800000, } +VRSQRTE/VRSQRTEQ:13:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } + +VCAGE/VCAGEQ output: +VCAGE/VCAGEQ:0:result_uint32x2 [] = { ffffffff, 0, } +VCAGE/VCAGEQ:1:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } + +VCAGE/VCAGEQ output: +VCAGE/VCAGEQ:2:result_uint32x2 [] = { ffffffff, ffffffff, } +VCAGE/VCAGEQ:3:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } + +VCALE/VCALEQ output: +VCALE/VCALEQ:0:result_uint32x2 [] = { ffffffff, ffffffff, } +VCALE/VCALEQ:1:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } + +VCALE/VCALEQ output: +VCALE/VCALEQ:2:result_uint32x2 [] = { 0, 0, } +VCALE/VCALEQ:3:result_uint32x4 [] = { 0, 0, 0, 0, } + +VCAGT/VCAGTQ output: +VCAGT/VCAGTQ:0:result_uint32x2 [] = { 0, 0, } +VCAGT/VCAGTQ:1:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } + +VCAGT/VCAGTQ output: +VCAGT/VCAGTQ:2:result_uint32x2 [] = { ffffffff, ffffffff, } +VCAGT/VCAGTQ:3:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } + +VCALT/VCALTQ output: +VCALT/VCALTQ:0:result_uint32x2 [] = { 0, ffffffff, } +VCALT/VCALTQ:1:result_uint32x4 [] = { 0, 0, 0, ffffffff, } + +VCALT/VCALTQ output: +VCALT/VCALTQ:2:result_uint32x2 [] = { 0, 0, } +VCALT/VCALTQ:3:result_uint32x4 [] = { 0, 0, 0, 0, } + +VCVT/VCVTQ output: +VCVT/VCVTQ:0:result_float32x2 [] = { c1800000, c1700000, } +VCVT/VCVTQ:1:result_float32x2 [] = { 4f800000, 4f800000, } +VCVT/VCVTQ:2:result_float32x4 [] = { 41720000, c1720000, 0, 80000000, } +VCVT/VCVTQ:3:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VCVT/VCVTQ:4:result_float32x4 [] = { 4f800000, 4f800000, 4f800000, 4f800000, } +VCVT/VCVTQ:5:result_int32x2 [] = { fffffff1, 5, } +VCVT/VCVTQ:6:result_uint32x2 [] = { 0, 5, } +VCVT/VCVTQ:7:result_float16x4 [] = { 0, 8000, 4b8f, cb8f, } +VCVT/VCVTQ:8:result_int32x4 [] = { 0, 0, f, fffffff1, } +VCVT/VCVTQ:9:result_uint32x4 [] = { 0, 0, f, 0, } + +VCVT_N/VCVTQ_N output: +VCVT_N/VCVTQ_N:10:result_float32x2 [] = { c0800000, c0700000, } +VCVT_N/VCVTQ_N:11:result_float32x2 [] = { 4c000000, 4c000000, } +VCVT_N/VCVTQ_N:12:result_float32x4 [] = { b2800000, b2700000, b2600000, b2500000, } +VCVT_N/VCVTQ_N:13:result_float32x4 [] = { 49800000, 49800000, 49800000, 49800000, } +VCVT_N/VCVTQ_N:14:result_int32x2 [] = { ff0b3333, 54cccd, } +VCVT_N/VCVTQ_N:15:result_uint32x2 [] = { 0, 15, } +VCVT_N/VCVTQ_N:16:result_int32x4 [] = { 0, 0, 1e3d7, fffe1c29, } +VCVT_N/VCVTQ_N:17:result_uint32x4 [] = { 0, 0, 1e, 0, } + +VCVT/VCVTQ (check rounding) output: +VCVT/VCVTQ:18:result_int32x2 [] = { a, a, } +VCVT/VCVTQ:19:result_uint32x2 [] = { a, a, } +VCVT/VCVTQ:20:result_int32x4 [] = { 7d, 7d, 7d, 7d, } +VCVT/VCVTQ:21:result_uint32x4 [] = { 7d, 7d, 7d, 7d, } + +VCVT_N/VCVTQ_N (check rounding) output: +VCVT_N/VCVTQ_N:22:result_int32x2 [] = { a66666, a66666, } +VCVT_N/VCVTQ_N:23:result_uint32x2 [] = { a66666, a66666, } +VCVT_N/VCVTQ_N:24:result_int32x4 [] = { fbccc, fbccc, fbccc, fbccc, } +VCVT_N/VCVTQ_N:25:result_uint32x4 [] = { fbccc, fbccc, fbccc, fbccc, } + +VCVT_N/VCVTQ_N (check saturation) output: +VCVT_N/VCVTQ_N:26:result_int32x2 [] = { 7fffffff, 7fffffff, } +VCVT_N/VCVTQ_N:27:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } + +VCVT FP16 (check fp16-fp32 inf/nan/denormal) output: +VCVT FP16:28:result_float32x4 [] = { 38640000, 7f800000, 7fc00000, ff800000, } + +VCVT FP16 (check fp32-fp16 inf/nan) output: +VCVT FP16:29:result_float16x4 [] = { 7e00, 7c00, fc00, 8000, } + +VRECPS/VRECPSQ output: +VRECPS/VRECPSQ:0:result_float32x2 [] = { c2e19eb7, c2e19eb7, } +VRECPS/VRECPSQ:1:result_float32x4 [] = { c1db851f, c1db851f, c1db851f, c1db851f, } + +VRECPS/VRECPSQ FP special (NAN) and normal values output: +VRECPS/VRECPSQ:2:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRECPS/VRECPSQ:3:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } + +VRECPS/VRECPSQ FP special (infinity, 0) and normal values output: +VRECPS/VRECPSQ:4:result_float32x2 [] = { ff800000, ff800000, } +VRECPS/VRECPSQ:5:result_float32x4 [] = { 40000000, 40000000, 40000000, 40000000, } + +VRECPS/VRECPSQ FP special (infinity, 0) output: +VRECPS/VRECPSQ:6:result_float32x2 [] = { 40000000, 40000000, } +VRECPS/VRECPSQ:7:result_float32x4 [] = { 40000000, 40000000, 40000000, 40000000, } + +VRSQRTS/VRSQRTSQ output: +VRSQRTS/VRSQRTSQ:0:result_float32x2 [] = { c2796b84, c2796b84, } +VRSQRTS/VRSQRTSQ:1:result_float32x4 [] = { c0e4a3d8, c0e4a3d8, c0e4a3d8, c0e4a3d8, } + +VRSQRTS/VRSQRTSQ FP special (NAN) and normal values output: +VRSQRTS/VRSQRTSQ:2:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRSQRTS/VRSQRTSQ:3:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } + +VRSQRTS/VRSQRTSQ FP special (infinity, 0) and normal values output: +VRSQRTS/VRSQRTSQ:4:result_float32x2 [] = { ff800000, ff800000, } +VRSQRTS/VRSQRTSQ:5:result_float32x4 [] = { 3fc00000, 3fc00000, 3fc00000, 3fc00000, } + +VRSQRTS/VRSQRTSQ FP special (infinity, 0) output: +VRSQRTS/VRSQRTSQ:6:result_float32x2 [] = { 3fc00000, 3fc00000, } +VRSQRTS/VRSQRTSQ:7:result_float32x4 [] = { 3fc00000, 3fc00000, 3fc00000, 3fc00000, } + + +Integer (non-NEON) intrinsics +__clz(0xffffffff) = 0 +__clz(0x7fffffff) = 1 +__clz(0x3fffffff) = 2 +__clz(0x1fffffff) = 3 +__clz(0xfffffff) = 4 +__clz(0x7ffffff) = 5 +__clz(0x3ffffff) = 6 +__clz(0x1ffffff) = 7 +__clz(0xffffff) = 8 +__clz(0x7fffff) = 9 +__clz(0x3fffff) = 10 +__clz(0x1fffff) = 11 +__clz(0xfffff) = 12 +__clz(0x7ffff) = 13 +__clz(0x3ffff) = 14 +__clz(0x1ffff) = 15 +__clz(0xffff) = 16 +__clz(0x7fff) = 17 +__clz(0x3fff) = 18 +__clz(0x1fff) = 19 +__clz(0xfff) = 20 +__clz(0x7ff) = 21 +__clz(0x3ff) = 22 +__clz(0x1ff) = 23 +__clz(0xff) = 24 +__clz(0x7f) = 25 +__clz(0x3f) = 26 +__clz(0x1f) = 27 +__clz(0xf) = 28 +__clz(0x7) = 29 +__clz(0x3) = 30 +__clz(0x1) = 31 +__clz(0) = 32 +__qadd(0x1, 0x2) = 0x3 sat 0 +__qadd(0xffffffff, 0xfffffffe) = 0xfffffffd sat 0 +__qadd(0xffffffff, 0x2) = 0x1 sat 0 +__qadd(0x7000, 0x7000) = 0xe000 sat 0 +__qadd(0x8fff, 0x8fff) = 0x11ffe sat 0 +__qadd(0x70000000, 0x70000000) = 0x7fffffff sat 1 +__qadd(0x8fffffff, 0x8fffffff) = 0x80000000 sat 1 +__qdbl(0x1) = 0x2 sat 0 +__qdbl(0x70000000) = 0x7fffffff sat 1 +__qdbl(0x8fffffff) = 0x80000000 sat 1 +__qdbl(0xefffffff) = 0xdffffffe sat 0 +__qsub(0x1, 0x2) = 0xffffffff sat 0 +__qsub(0xffffffff, 0xfffffffe) = 0x1 sat 0 +__qsub(0xffffffff, 0x2) = 0xfffffffd sat 0 +__qsub(0x7000, 0xffff9000) = 0xe000 sat 0 +__qsub(0x8fff, 0xffff7001) = 0x11ffe sat 0 +__qsub(0x70000000, 0x90000000) = 0x7fffffff sat 1 +__qsub(0x8fffffff, 0x70000001) = 0x80000000 sat 1 +__qsub(0, 0x80000000) = 0x7fffffff sat 1 +__rbit(0x12345678) = 0x1e6a2c48 +__rev(0x12345678) = 0x78563412 +__ssat(0x12345678, 30) = 0x12345678 sat 0 +__ssat(0x12345678, 19) = 0x3ffff sat 1 +__ssat(0x87654321, 29) = 0xf0000000 sat 1 +__ssat(0x87654321, 12) = 0xfffff800 sat 1 +__ssat(0x87654321, 32) = 0x87654321 sat 0 +__ssat(0x87654321, 1) = 0xffffffff sat 1 +__usat(0x12345678, 30) = 0x12345678 sat 0 +__usat(0x12345678, 19) = 0x7ffff sat 1 +__usat(0x87654321, 29) = 0 sat 1 +__usat(0x87654321, 12) = 0 sat 1 +__usat(0x87654321, 31) = 0 sat 1 +__usat(0x87654321, 0) = 0 sat 1 + + +DSP (non-NEON) intrinsics +qadd(0x1, 0x2) = 0x3 sat 0 +qadd(0xffffffff, 0xfffffffe) = 0xfffffffd sat 0 +qadd(0xffffffff, 0x2) = 0x1 sat 0 +qadd(0x7000, 0x7000) = 0xe000 sat 0 +qadd(0x8fff, 0x8fff) = 0x11ffe sat 0 +qadd(0x70000000, 0x70000000) = 0x7fffffff sat 1 +qadd(0x8fffffff, 0x8fffffff) = 0x80000000 sat 1 +qsub(0x1, 0x2) = 0xffffffff sat 0 +qsub(0xffffffff, 0xfffffffe) = 0x1 sat 0 +qsub(0xffffffff, 0x2) = 0xfffffffd sat 0 +qsub(0x7000, 0xffff9000) = 0xe000 sat 0 +qsub(0x8fff, 0xffff7001) = 0x11ffe sat 0 +qsub(0x70000000, 0x90000000) = 0x7fffffff sat 1 +qsub(0x8fffffff, 0x70000001) = 0x80000000 sat 1 +qsub(0, 0x80000000) = 0x7fffffff sat 1 +qdadd(0x1, 0x2) = 0x5 sat 0 +qdadd(0xffffffff, 0xfffffffe) = 0xfffffffb sat 0 +qdadd(0xffffffff, 0x2) = 0x3 sat 0 +qdadd(0x7000, 0x7000) = 0x15000 sat 0 +qdadd(0x8fff, 0x8fff) = 0x1affd sat 0 +qdadd(0x70000000, 0x70000000) = 0x7fffffff sat 1 +qdadd(0, 0x70000000) = 0x7fffffff sat 1 +qdadd(0x8fffffff, 0x8fffffff) = 0x80000000 sat 1 +qdadd(0, 0x8fffffff) = 0x80000000 sat 1 +qdsub(0x1, 0x2) = 0xfffffffd sat 0 +qdsub(0xffffffff, 0xfffffffe) = 0x3 sat 0 +qdsub(0xffffffff, 0x2) = 0xfffffffb sat 0 +qdsub(0x7000, 0xffff9000) = 0x15000 sat 0 +qdsub(0x8fff, 0xffff7001) = 0x1affd sat 0 +qdsub(0x70000000, 0x90000000) = 0x7fffffff sat 1 +qdsub(0, 0x90000000) = 0x7fffffff sat 1 +qdsub(0x8fffffff, 0x70000001) = 0x80000000 sat 1 +qdsub(0, 0x70000001) = 0x80000001 sat 1 +smulbb(0x12345678, 0x12345678) = 0x1d34d840 +smulbt(0x12345678, 0x12345678) = 0x6260060 +smultb(0x12345678, 0x12345678) = 0x6260060 +smultt(0x12345678, 0x12345678) = 0x14b5a90 +smulbb(0xf123f456, 0xf123f456) = 0x880ce4 +smulbt(0xf123f456, 0xf123f456) = 0xad5dc2 +smultb(0xf123f456, 0xf123f456) = 0xad5dc2 +smultt(0xf123f456, 0xf123f456) = 0xdceac9 +smlabb(0x12345678, 0x12345678, 0x1020304) = 0x1e36db44 +smlabt(0x12345678, 0x12345678, 0x1020304) = 0x7280364 +smlatb(0x12345678, 0x12345678, 0x1020304) = 0x7280364 +smlatt(0x12345678, 0x12345678, 0x1020304) = 0x24d5d94 +smlabb(0xf123f456, 0xf123f456, 0x1020304) = 0x18a0fe8 +smlabt(0xf123f456, 0xf123f456, 0x1020304) = 0x1af60c6 +smlatb(0xf123f456, 0xf123f456, 0x1020304) = 0x1af60c6 +smlatt(0xf123f456, 0xf123f456, 0x1020304) = 0x1deedcd +smlalbb(&0x9abcdef0, &0x12345678, 0x12345678, 0x12345678) = 0x123456780xb7f1b730 +smlalbt(&0x9abcdef0, &0x12345678, 0x12345678, 0x12345678) = 0x123456780xa0e2df50 +smlaltb(&0x9abcdef0, &0x12345678, 0x12345678, 0x12345678) = 0x123456780xa0e2df50 +smlaltt(&0x9abcdef0, &0x12345678, 0x12345678, 0x12345678) = 0x123456780x9c083980 +smlalbb(&0x9abcdef0, &0x12345678, 0xf123f456, 0xf123f456) = 0x123456780x9b44ebd4 +smlalbt(&0x9abcdef0, &0x12345678, 0xf123f456, 0xf123f456) = 0x123456780x9b6a3cb2 +smlaltb(&0x9abcdef0, &0x12345678, 0xf123f456, 0xf123f456) = 0x123456780x9b6a3cb2 +smlaltt(&0x9abcdef0, &0x12345678, 0xf123f456, 0xf123f456) = 0x123456780x9b99c9b9 +smlalbb(&0xffffffff, &0x12345678, 0x7fff7fff, 0x7fff7fff) = 0x123456790x3fff0000 +smlalbt(&0xffffffff, &0x12345678, 0x7fff7fff, 0x7fff7fff) = 0x123456790x3fff0000 +smlaltb(&0xffffffff, &0x12345678, 0x7fff7fff, 0x7fff7fff) = 0x123456790x3fff0000 +smlaltt(&0xffffffff, &0x12345678, 0x7fff7fff, 0x7fff7fff) = 0x123456790x3fff0000 +smulwb(0x12345678, 0x12345678) = 0x6261d94 +smulwt(0x12345678, 0x12345678) = 0x14b60b6 +smulwb(0xf123f456, 0xf123f456) = 0xad52a0 +smulwt(0xf123f456, 0xf123f456) = 0xdcdc99 +smlawb(0x12345678, 0x12345678, 0x1020304) = 0x7282098 +smlawt(0x12345678, 0x12345678, 0x1020304) = 0x24d63ba +smlawb(0xf123f456, 0xf123f456, 0x1020304) = 0x1af55a4 +smlawt(0xf123f456, 0xf123f456, 0X1020304) = 0x1dedf9d + + +DSP FNS (non-NEON/ITU) intrinsics with input Overflow=0 and input Carry=0 +Checking saturate with input Overflow=0 and input Carry=0 +saturate(0x1) = 0x1 overflow 0 carry 0 +saturate(0xffffffff) = 0xffffffff overflow 0 carry 0 +saturate(0x8000) = 0x7fff overflow 1 carry 0 +saturate(0xffff8000) = 0xffff8000 overflow 0 carry 0 +saturate(0xffff7fff) = 0xffff8000 overflow 1 carry 0 +add(0x1, 0x1) = 0x2 overflow 0 carry 0 +add(0xffffffff, 0xffffffff) = 0xfffffffe overflow 0 carry 0 +add(0x4e20, 0x4e20) = 0x7fff overflow 1 carry 0 +add(0xffffb1e0, 0xffffb1e0) = 0xffff8000 overflow 1 carry 0 +sub(0x1, 0x1) = 0 overflow 0 carry 0 +sub(0xffffffff, 0xffffffff) = 0 overflow 0 carry 0 +sub(0x4e20, 0x4e20) = 0 overflow 0 carry 0 +sub(0xffffb1e0, 0xffffb1e0) = 0 overflow 0 carry 0 +sub(0, 0xffff8000) = 0x7fff overflow 1 carry 0 +abs_s(0x1) = 0x1 overflow 0 carry 0 +abs_s(0xffffffff) = 0x1 overflow 0 carry 0 +abs_s(0xffff8000) = 0x7fff overflow 0 carry 0 +shl(0x1, 1) = 0x2 overflow 0 carry 0 +shl(0xa, 1) = 0x14 overflow 0 carry 0 +shl(0xfff, 10) = 0x7fff overflow 1 carry 0 +shl(0xfff, 20) = 0x7fff overflow 1 carry 0 +shl(0x1, -1) = 0 overflow 0 carry 0 +shl(0x14, -1) = 0xa overflow 0 carry 0 +shl(0xfff, -10) = 0x3 overflow 0 carry 0 +shl(0xfff, -64) = 0 overflow 0 carry 0 +shr(0x1, -1) = 0x2 overflow 0 carry 0 +shr(0xa, -1) = 0x14 overflow 0 carry 0 +shr(0xfff, -10) = 0x7fff overflow 1 carry 0 +shr(0xfff, -20) = 0x7fff overflow 1 carry 0 +shr(0x1, 1) = 0 overflow 0 carry 0 +shr(0x14, 1) = 0xa overflow 0 carry 0 +shr(0xfff, 10) = 0x3 overflow 0 carry 0 +shr(0xfff, 64) = 0 overflow 0 carry 0 +mult(0x2, 0x2) = 0 overflow 0 carry 0 +mult(0xffffffff, 0xffffffff) = 0 overflow 0 carry 0 +mult(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +L_mult(0x2, 0x2) = 0x8 overflow 0 carry 0 +L_mult(0xffffffff, 0xffffffff) = 0x2 overflow 0 carry 0 +L_mult(0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 0 +negate(0x1) = 0xffffffff overflow 0 carry 0 +negate(0xffffffff) = 0x1 overflow 0 carry 0 +negate(0xffff8000) = 0x7fff overflow 0 carry 0 +extract_h(0x1) = 0 overflow 0 carry 0 +extract_h(0xffffffff) = 0xffffffff overflow 0 carry 0 +extract_h(0xffff8000) = 0xffffffff overflow 0 carry 0 +extract_h(0x12345678) = 0x1234 overflow 0 carry 0 +extract_l(0x1) = 0x1 overflow 0 carry 0 +extract_l(0xffffffff) = 0xffffffff overflow 0 carry 0 +extract_l(0xffff8000) = 0xffff8000 overflow 0 carry 0 +extract_l(0x43218765) = 0xffff8765 overflow 0 carry 0 +round(0x1) = 0 overflow 0 carry 0 +round(0xffffffff) = 0 overflow 0 carry 0 +round(0xffff8000) = 0 overflow 0 carry 0 +round(0x43218765) = 0x4322 overflow 0 carry 0 +round(0x87654321) = 0xffff8765 overflow 0 carry 0 +L_mac(0x1234, 0x2, 0x2) = 0x123c overflow 0 carry 0 +L_mac(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 0 carry 0 +L_mac(0x1234, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 0 +L_mac(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 0 +L_msu(0x1234, 0x2, 0x2) = 0x122c overflow 0 carry 0 +L_msu(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 0 carry 0 +L_msu(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 0 +L_msu(0x1, 0xffff8000, 0xffff8000) = 0x80000002 overflow 1 carry 0 +L_add(0x1, 0x2) = 0x3 overflow 0 carry 0 +L_add(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 0 carry 0 +L_add(0xffffffff, 0x2) = 0x1 overflow 0 carry 0 +L_add(0x7000, 0x7000) = 0xe000 overflow 0 carry 0 +L_add(0x8fff, 0x8fff) = 0x11ffe overflow 0 carry 0 +L_add(0x70000000, 0x70000000) = 0x7fffffff overflow 1 carry 0 +L_add(0x8fffffff, 0x8fffffff) = 0x80000000 overflow 1 carry 0 +L_sub(0x1, 0x2) = 0xffffffff overflow 0 carry 0 +L_sub(0xffffffff, 0xfffffffe) = 0x1 overflow 0 carry 0 +L_sub(0xffffffff, 0x2) = 0xfffffffd overflow 0 carry 0 +L_sub(0x7000, 0xffff9000) = 0xe000 overflow 0 carry 0 +L_sub(0x8fff, 0xffff7001) = 0x11ffe overflow 0 carry 0 +L_sub(0x70000000, 0x90000000) = 0x7fffffff overflow 1 carry 0 +L_sub(0x8fffffff, 0x70000001) = 0x80000000 overflow 1 carry 0 +L_sub(0, 0x80000000) = 0x7fffffff overflow 1 carry 0 +Checking L_add_c with input Overflow=0 and input Carry=0 +L_add_c(0x1, 0x2) = 0x3 overflow 0 carry 0 +L_add_c(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 0 carry 1 +L_add_c(0xffffffff, 0x2) = 0x1 overflow 0 carry 1 +L_add_c(0x7000, 0x7000) = 0xe000 overflow 0 carry 0 +L_add_c(0x8fff, 0x8fff) = 0x11ffe overflow 0 carry 0 +L_add_c(0x70000000, 0x70000000) = 0xe0000000 overflow 1 carry 0 +L_add_c(0x8fffffff, 0x8fffffff) = 0x1ffffffe overflow 1 carry 1 +L_add_c(0x8fffffff, 0xffffffff) = 0x8ffffffe overflow 0 carry 1 +L_sub_c(0x1, 0x2) = 0xfffffffe overflow 0 carry 0 +L_sub_c(0xffffffff, 0xfffffffe) = 0 overflow 0 carry 1 +L_sub_c(0xffffffff, 0x2) = 0xfffffffc overflow 0 carry 1 +L_sub_c(0x7000, 0x7000) = 0xffffffff overflow 0 carry 0 +L_sub_c(0x8fff, 0x8fff) = 0xffffffff overflow 0 carry 0 +L_sub_c(0x70000000, 0x70000000) = 0xffffffff overflow 0 carry 0 +L_sub_c(0x8fffffff, 0x8fffffff) = 0xffffffff overflow 0 carry 0 +L_sub_c(0x1, 0x80000000) = 0x80000000 overflow 1 carry 0 +L_sub_c(0xffffffff, 0x7fffffff) = 0x7fffffff overflow 1 carry 1 +Checking L_macNs with input Overflow=0 and input Carry=0 +L_macNs(0x1234, 0x2, 0x2) = 0x123c overflow 0 carry 0 +L_macNs(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 0 carry 0 +L_macNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001233 overflow 1 carry 0 +L_macNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 1 +Checking L_msuNs with input Overflow=0 and input Carry=0 +L_msuNs(0x1234, 0x2, 0x2) = 0x122b overflow 0 carry 1 +L_msuNs(0x1234, 0xffffffff, 0xffffffff) = 0x1231 overflow 0 carry 1 +L_msuNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001234 overflow 1 carry 0 +L_msuNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +negate(0x1) = 0xffffffff overflow 0 carry 0 +negate(0xffffffff) = 0x1 overflow 0 carry 0 +negate(0xffff8000) = 0x7fff overflow 0 carry 0 +mult_r(0x2, 0x2) = 0 overflow 0 carry 0 +mult_r(0xffffffff, 0xffffffff) = 0 overflow 0 carry 0 +mult_r(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +norm_s(0x1) = 0xe overflow 0 carry 0 +norm_s(0xffffffff) = 0xf overflow 0 carry 0 +norm_s(0xffff8000) = 0 overflow 0 carry 0 +norm_s(0x2ee0) = 0x1 overflow 0 carry 0 +norm_l(0x1) = 0x1e overflow 0 carry 0 +norm_l(0xffffffff) = 0x1f overflow 0 carry 0 +norm_l(0xffff8000) = 0x10 overflow 0 carry 0 +norm_l(0x2ee0) = 0x11 overflow 0 carry 0 +norm_l(0x123456) = 0xa overflow 0 carry 0 +norm_l(0xabcdef) = 0x7 overflow 0 carry 0 +L_shl(0x1, 1) = 0x2 overflow 0 carry 0 +L_shl(0xa, 1) = 0x14 overflow 0 carry 0 +L_shl(0xfff, 10) = 0x3ffc00 overflow 0 carry 0 +L_shl(0xfff, 20) = 0x7fffffff overflow 1 carry 0 +L_shl(0x12345678, 2) = 0x48d159e0 overflow 0 carry 0 +L_shl(0x12345678, 40) = 0x7fffffff overflow 1 carry 0 +L_shl(0x1, -1) = 0 overflow 0 carry 0 +L_shl(0x14, -1) = 0xa overflow 0 carry 0 +L_shl(0xfff, -10) = 0x3 overflow 0 carry 0 +L_shl(0xfff, -64) = 0 overflow 0 carry 0 +L_shl(0x12345678, -10) = 0x48d15 overflow 0 carry 0 +L_shl(0x12345678, -64) = 0 overflow 0 carry 0 +L_shr(0x1, -1) = 0x2 overflow 0 carry 0 +L_shr(0xa, -1) = 0x14 overflow 0 carry 0 +L_shr(0xfff, -10) = 0x3ffc00 overflow 0 carry 0 +L_shr(0xfff, -20) = 0x7fffffff overflow 1 carry 0 +L_shr(0x12345678, -10) = 0x7fffffff overflow 1 carry 0 +L_shr(0x12345678, -40) = 0x7fffffff overflow 1 carry 0 +L_shr(0x1, 1) = 0 overflow 0 carry 0 +L_shr(0x14, 1) = 0xa overflow 0 carry 0 +L_shr(0xfff, 10) = 0x3 overflow 0 carry 0 +L_shr(0xfff, 64) = 0 overflow 0 carry 0 +L_shr(0x12345678, 10) = 0x48d15 overflow 0 carry 0 +L_shr(0x12345678, 64) = 0 overflow 0 carry 0 +shr_r(0x1, -1) = 0x2 overflow 0 carry 0 +shr_r(0xa, -1) = 0x14 overflow 0 carry 0 +shr_r(0xfff, -10) = 0x7fff overflow 1 carry 0 +shr_r(0xfff, -20) = 0x7fff overflow 1 carry 0 +shr_r(0x1, 1) = 0x1 overflow 0 carry 0 +shr_r(0x14, 1) = 0xa overflow 0 carry 0 +shr_r(0xfff, 10) = 0x4 overflow 0 carry 0 +shr_r(0xfff, 64) = 0 overflow 0 carry 0 +mac_r(0x1234, 0x2, 0x2) = 0 overflow 0 carry 0 +mac_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 0 carry 0 +mac_r(0x1234, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +mac_r(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +mac_r(0x123456, 0x244, 0x522) = 0x29 overflow 0 carry 0 +msu_r(0x1234, 0x2, 0x2) = 0 overflow 0 carry 0 +msu_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 0 carry 0 +msu_r(0x1234, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 0 +msu_r(0x1, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 0 +msu_r(0x123456, 0x321, 0x243) = 0x4 overflow 0 carry 0 +L_deposit_h(0x1) = 0x10000 overflow 0 carry 0 +L_deposit_h(0xffffffff) = 0xffff0000 overflow 0 carry 0 +L_deposit_h(0xffff8000) = 0x80000000 overflow 0 carry 0 +L_deposit_h(0x1234) = 0x12340000 overflow 0 carry 0 +L_deposit_l(0x1) = 0x1 overflow 0 carry 0 +L_deposit_l(0xffffffff) = 0xffffffff overflow 0 carry 0 +L_deposit_l(0xffff8000) = 0xffff8000 overflow 0 carry 0 +L_deposit_l(0x1234) = 0x1234 overflow 0 carry 0 +L_shr_r(0x1, -1) = 0x2 overflow 0 carry 0 +L_shr_r(0xa, -1) = 0x14 overflow 0 carry 0 +L_shr_r(0xfff, -10) = 0x3ffc00 overflow 0 carry 0 +L_shr_r(0xfff, -20) = 0x7fffffff overflow 1 carry 0 +L_shr_r(0x12345678, -10) = 0x7fffffff overflow 1 carry 0 +L_shr_r(0x12345678, -40) = 0x7fffffff overflow 1 carry 0 +L_shr_r(0x1, 1) = 0x1 overflow 0 carry 0 +L_shr_r(0x14, 1) = 0xa overflow 0 carry 0 +L_shr_r(0xfff, 10) = 0x4 overflow 0 carry 0 +L_shr_r(0xfff, 64) = 0 overflow 0 carry 0 +L_shr_r(0x12345678, 10) = 0x48d16 overflow 0 carry 0 +L_shr_r(0x12345678, 64) = 0 overflow 0 carry 0 +L_abs(0x1) = 0x1 overflow 0 carry 0 +L_abs(0xffffffff) = 0x1 overflow 0 carry 0 +L_abs(0x80000000) = 0x7fffffff overflow 1 carry 0 +L_sat(0x1) = 0x1 overflow 0 carry 0 +L_sat(0xffffffff) = 0xffffffff overflow 0 carry 0 +L_sat(0xffff8000) = 0xffff8000 overflow 0 carry 0 +L_sat(0x8000) = 0x8000 overflow 0 carry 0 +div_s(0x1, 0x1) = 0x7fff overflow 0 carry 0 +div_s(0x2710, 0x4e20) = 0x4000 overflow 0 carry 0 +div_s(0x2710, 0x4e20) = 0x4000 overflow 0 carry 0 + + +DSP FNS (non-NEON/ITU) intrinsics with input Overflow=0 and input Carry=1 +Checking saturate with input Overflow=0 and input Carry=1 +saturate(0x1) = 0x1 overflow 0 carry 1 +saturate(0xffffffff) = 0xffffffff overflow 0 carry 1 +saturate(0x8000) = 0x7fff overflow 1 carry 1 +saturate(0xffff8000) = 0xffff8000 overflow 0 carry 1 +saturate(0xffff7fff) = 0xffff8000 overflow 1 carry 1 +add(0x1, 0x1) = 0x2 overflow 0 carry 1 +add(0xffffffff, 0xffffffff) = 0xfffffffe overflow 0 carry 1 +add(0x4e20, 0x4e20) = 0x7fff overflow 1 carry 1 +add(0xffffb1e0, 0xffffb1e0) = 0xffff8000 overflow 1 carry 1 +sub(0x1, 0x1) = 0 overflow 0 carry 1 +sub(0xffffffff, 0xffffffff) = 0 overflow 0 carry 1 +sub(0x4e20, 0x4e20) = 0 overflow 0 carry 1 +sub(0xffffb1e0, 0xffffb1e0) = 0 overflow 0 carry 1 +sub(0, 0xffff8000) = 0x7fff overflow 1 carry 1 +abs_s(0x1) = 0x1 overflow 0 carry 1 +abs_s(0xffffffff) = 0x1 overflow 0 carry 1 +abs_s(0xffff8000) = 0x7fff overflow 0 carry 1 +shl(0x1, 1) = 0x2 overflow 0 carry 1 +shl(0xa, 1) = 0x14 overflow 0 carry 1 +shl(0xfff, 10) = 0x7fff overflow 1 carry 1 +shl(0xfff, 20) = 0x7fff overflow 1 carry 1 +shl(0x1, -1) = 0 overflow 0 carry 1 +shl(0x14, -1) = 0xa overflow 0 carry 1 +shl(0xfff, -10) = 0x3 overflow 0 carry 1 +shl(0xfff, -64) = 0 overflow 0 carry 1 +shr(0x1, -1) = 0x2 overflow 0 carry 1 +shr(0xa, -1) = 0x14 overflow 0 carry 1 +shr(0xfff, -10) = 0x7fff overflow 1 carry 1 +shr(0xfff, -20) = 0x7fff overflow 1 carry 1 +shr(0x1, 1) = 0 overflow 0 carry 1 +shr(0x14, 1) = 0xa overflow 0 carry 1 +shr(0xfff, 10) = 0x3 overflow 0 carry 1 +shr(0xfff, 64) = 0 overflow 0 carry 1 +mult(0x2, 0x2) = 0 overflow 0 carry 1 +mult(0xffffffff, 0xffffffff) = 0 overflow 0 carry 1 +mult(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +L_mult(0x2, 0x2) = 0x8 overflow 0 carry 1 +L_mult(0xffffffff, 0xffffffff) = 0x2 overflow 0 carry 1 +L_mult(0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +negate(0x1) = 0xffffffff overflow 0 carry 1 +negate(0xffffffff) = 0x1 overflow 0 carry 1 +negate(0xffff8000) = 0x7fff overflow 0 carry 1 +extract_h(0x1) = 0 overflow 0 carry 1 +extract_h(0xffffffff) = 0xffffffff overflow 0 carry 1 +extract_h(0xffff8000) = 0xffffffff overflow 0 carry 1 +extract_h(0x12345678) = 0x1234 overflow 0 carry 1 +extract_l(0x1) = 0x1 overflow 0 carry 1 +extract_l(0xffffffff) = 0xffffffff overflow 0 carry 1 +extract_l(0xffff8000) = 0xffff8000 overflow 0 carry 1 +extract_l(0x43218765) = 0xffff8765 overflow 0 carry 1 +round(0x1) = 0 overflow 0 carry 1 +round(0xffffffff) = 0 overflow 0 carry 1 +round(0xffff8000) = 0 overflow 0 carry 1 +round(0x43218765) = 0x4322 overflow 0 carry 1 +round(0x87654321) = 0xffff8765 overflow 0 carry 1 +L_mac(0x1234, 0x2, 0x2) = 0x123c overflow 0 carry 1 +L_mac(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 0 carry 1 +L_mac(0x1234, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +L_mac(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 1 +L_msu(0x1234, 0x2, 0x2) = 0x122c overflow 0 carry 1 +L_msu(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 0 carry 1 +L_msu(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 1 +L_msu(0x1, 0xffff8000, 0xffff8000) = 0x80000002 overflow 1 carry 1 +L_add(0x1, 0x2) = 0x3 overflow 0 carry 1 +L_add(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 0 carry 1 +L_add(0xffffffff, 0x2) = 0x1 overflow 0 carry 1 +L_add(0x7000, 0x7000) = 0xe000 overflow 0 carry 1 +L_add(0x8fff, 0x8fff) = 0x11ffe overflow 0 carry 1 +L_add(0x70000000, 0x70000000) = 0x7fffffff overflow 1 carry 1 +L_add(0x8fffffff, 0x8fffffff) = 0x80000000 overflow 1 carry 1 +L_sub(0x1, 0x2) = 0xffffffff overflow 0 carry 1 +L_sub(0xffffffff, 0xfffffffe) = 0x1 overflow 0 carry 1 +L_sub(0xffffffff, 0x2) = 0xfffffffd overflow 0 carry 1 +L_sub(0x7000, 0xffff9000) = 0xe000 overflow 0 carry 1 +L_sub(0x8fff, 0xffff7001) = 0x11ffe overflow 0 carry 1 +L_sub(0x70000000, 0x90000000) = 0x7fffffff overflow 1 carry 1 +L_sub(0x8fffffff, 0x70000001) = 0x80000000 overflow 1 carry 1 +L_sub(0, 0x80000000) = 0x7fffffff overflow 1 carry 1 +Checking L_add_c with input Overflow=0 and input Carry=1 +L_add_c(0x1, 0x2) = 0x4 overflow 0 carry 0 +L_add_c(0xffffffff, 0xfffffffe) = 0xfffffffe overflow 0 carry 1 +L_add_c(0xffffffff, 0x2) = 0x2 overflow 0 carry 1 +L_add_c(0x7000, 0x7000) = 0xe001 overflow 0 carry 0 +L_add_c(0x8fff, 0x8fff) = 0x11fff overflow 0 carry 0 +L_add_c(0x70000000, 0x70000000) = 0xe0000001 overflow 1 carry 0 +L_add_c(0x8fffffff, 0x8fffffff) = 0x1fffffff overflow 1 carry 1 +L_add_c(0x8fffffff, 0xffffffff) = 0x8fffffff overflow 0 carry 1 +L_sub_c(0x1, 0x2) = 0xffffffff overflow 0 carry 0 +L_sub_c(0xffffffff, 0xfffffffe) = 0x1 overflow 0 carry 1 +L_sub_c(0xffffffff, 0x2) = 0xfffffffd overflow 0 carry 1 +L_sub_c(0x7000, 0x7000) = 0 overflow 0 carry 1 +L_sub_c(0x8fff, 0x8fff) = 0 overflow 0 carry 1 +L_sub_c(0x70000000, 0x70000000) = 0 overflow 0 carry 1 +L_sub_c(0x8fffffff, 0x8fffffff) = 0 overflow 0 carry 1 +L_sub_c(0x1, 0x80000000) = 0x80000001 overflow 1 carry 0 +L_sub_c(0xffffffff, 0x7fffffff) = 0x80000000 overflow 0 carry 1 +Checking L_macNs with input Overflow=0 and input Carry=1 +L_macNs(0x1234, 0x2, 0x2) = 0x123d overflow 0 carry 0 +L_macNs(0x1234, 0xffffffff, 0xffffffff) = 0x1237 overflow 0 carry 0 +L_macNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001234 overflow 1 carry 0 +L_macNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +Checking L_msuNs with input Overflow=0 and input Carry=1 +L_msuNs(0x1234, 0x2, 0x2) = 0x122c overflow 0 carry 1 +L_msuNs(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 0 carry 1 +L_msuNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 0 +L_msuNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x80000000 overflow 1 carry 1 +negate(0x1) = 0xffffffff overflow 0 carry 1 +negate(0xffffffff) = 0x1 overflow 0 carry 1 +negate(0xffff8000) = 0x7fff overflow 0 carry 1 +mult_r(0x2, 0x2) = 0 overflow 0 carry 1 +mult_r(0xffffffff, 0xffffffff) = 0 overflow 0 carry 1 +mult_r(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +norm_s(0x1) = 0xe overflow 0 carry 1 +norm_s(0xffffffff) = 0xf overflow 0 carry 1 +norm_s(0xffff8000) = 0 overflow 0 carry 1 +norm_s(0x2ee0) = 0x1 overflow 0 carry 1 +norm_l(0x1) = 0x1e overflow 0 carry 1 +norm_l(0xffffffff) = 0x1f overflow 0 carry 1 +norm_l(0xffff8000) = 0x10 overflow 0 carry 1 +norm_l(0x2ee0) = 0x11 overflow 0 carry 1 +norm_l(0x123456) = 0xa overflow 0 carry 1 +norm_l(0xabcdef) = 0x7 overflow 0 carry 1 +L_shl(0x1, 1) = 0x2 overflow 0 carry 1 +L_shl(0xa, 1) = 0x14 overflow 0 carry 1 +L_shl(0xfff, 10) = 0x3ffc00 overflow 0 carry 1 +L_shl(0xfff, 20) = 0x7fffffff overflow 1 carry 1 +L_shl(0x12345678, 2) = 0x48d159e0 overflow 0 carry 1 +L_shl(0x12345678, 40) = 0x7fffffff overflow 1 carry 1 +L_shl(0x1, -1) = 0 overflow 0 carry 1 +L_shl(0x14, -1) = 0xa overflow 0 carry 1 +L_shl(0xfff, -10) = 0x3 overflow 0 carry 1 +L_shl(0xfff, -64) = 0 overflow 0 carry 1 +L_shl(0x12345678, -10) = 0x48d15 overflow 0 carry 1 +L_shl(0x12345678, -64) = 0 overflow 0 carry 1 +L_shr(0x1, -1) = 0x2 overflow 0 carry 1 +L_shr(0xa, -1) = 0x14 overflow 0 carry 1 +L_shr(0xfff, -10) = 0x3ffc00 overflow 0 carry 1 +L_shr(0xfff, -20) = 0x7fffffff overflow 1 carry 1 +L_shr(0x12345678, -10) = 0x7fffffff overflow 1 carry 1 +L_shr(0x12345678, -40) = 0x7fffffff overflow 1 carry 1 +L_shr(0x1, 1) = 0 overflow 0 carry 1 +L_shr(0x14, 1) = 0xa overflow 0 carry 1 +L_shr(0xfff, 10) = 0x3 overflow 0 carry 1 +L_shr(0xfff, 64) = 0 overflow 0 carry 1 +L_shr(0x12345678, 10) = 0x48d15 overflow 0 carry 1 +L_shr(0x12345678, 64) = 0 overflow 0 carry 1 +shr_r(0x1, -1) = 0x2 overflow 0 carry 1 +shr_r(0xa, -1) = 0x14 overflow 0 carry 1 +shr_r(0xfff, -10) = 0x7fff overflow 1 carry 1 +shr_r(0xfff, -20) = 0x7fff overflow 1 carry 1 +shr_r(0x1, 1) = 0x1 overflow 0 carry 1 +shr_r(0x14, 1) = 0xa overflow 0 carry 1 +shr_r(0xfff, 10) = 0x4 overflow 0 carry 1 +shr_r(0xfff, 64) = 0 overflow 0 carry 1 +mac_r(0x1234, 0x2, 0x2) = 0 overflow 0 carry 1 +mac_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 0 carry 1 +mac_r(0x1234, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +mac_r(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +mac_r(0x123456, 0x244, 0x522) = 0x29 overflow 0 carry 1 +msu_r(0x1234, 0x2, 0x2) = 0 overflow 0 carry 1 +msu_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 0 carry 1 +msu_r(0x1234, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 1 +msu_r(0x1, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 1 +msu_r(0x123456, 0x321, 0x243) = 0x4 overflow 0 carry 1 +L_deposit_h(0x1) = 0x10000 overflow 0 carry 1 +L_deposit_h(0xffffffff) = 0xffff0000 overflow 0 carry 1 +L_deposit_h(0xffff8000) = 0x80000000 overflow 0 carry 1 +L_deposit_h(0x1234) = 0x12340000 overflow 0 carry 1 +L_deposit_l(0x1) = 0x1 overflow 0 carry 1 +L_deposit_l(0xffffffff) = 0xffffffff overflow 0 carry 1 +L_deposit_l(0xffff8000) = 0xffff8000 overflow 0 carry 1 +L_deposit_l(0x1234) = 0x1234 overflow 0 carry 1 +L_shr_r(0x1, -1) = 0x2 overflow 0 carry 1 +L_shr_r(0xa, -1) = 0x14 overflow 0 carry 1 +L_shr_r(0xfff, -10) = 0x3ffc00 overflow 0 carry 1 +L_shr_r(0xfff, -20) = 0x7fffffff overflow 1 carry 1 +L_shr_r(0x12345678, -10) = 0x7fffffff overflow 1 carry 1 +L_shr_r(0x12345678, -40) = 0x7fffffff overflow 1 carry 1 +L_shr_r(0x1, 1) = 0x1 overflow 0 carry 1 +L_shr_r(0x14, 1) = 0xa overflow 0 carry 1 +L_shr_r(0xfff, 10) = 0x4 overflow 0 carry 1 +L_shr_r(0xfff, 64) = 0 overflow 0 carry 1 +L_shr_r(0x12345678, 10) = 0x48d16 overflow 0 carry 1 +L_shr_r(0x12345678, 64) = 0 overflow 0 carry 1 +L_abs(0x1) = 0x1 overflow 0 carry 1 +L_abs(0xffffffff) = 0x1 overflow 0 carry 1 +L_abs(0x80000000) = 0x7fffffff overflow 1 carry 1 +L_sat(0x1) = 0x1 overflow 0 carry 1 +L_sat(0xffffffff) = 0xffffffff overflow 0 carry 1 +L_sat(0xffff8000) = 0xffff8000 overflow 0 carry 1 +L_sat(0x8000) = 0x8000 overflow 0 carry 1 +div_s(0x1, 0x1) = 0x7fff overflow 0 carry 1 +div_s(0x2710, 0x4e20) = 0x4000 overflow 0 carry 1 +div_s(0x2710, 0x4e20) = 0x4000 overflow 0 carry 1 + + +DSP FNS (non-NEON/ITU) intrinsics with input Overflow=1 and input Carry=0 +Checking saturate with input Overflow=1 and input Carry=0 +saturate(0x1) = 0x1 overflow 1 carry 0 +saturate(0xffffffff) = 0xffffffff overflow 1 carry 0 +saturate(0x8000) = 0x7fff overflow 1 carry 0 +saturate(0xffff8000) = 0xffff8000 overflow 1 carry 0 +saturate(0xffff7fff) = 0xffff8000 overflow 1 carry 0 +add(0x1, 0x1) = 0x2 overflow 1 carry 0 +add(0xffffffff, 0xffffffff) = 0xfffffffe overflow 1 carry 0 +add(0x4e20, 0x4e20) = 0x7fff overflow 1 carry 0 +add(0xffffb1e0, 0xffffb1e0) = 0xffff8000 overflow 1 carry 0 +sub(0x1, 0x1) = 0 overflow 1 carry 0 +sub(0xffffffff, 0xffffffff) = 0 overflow 1 carry 0 +sub(0x4e20, 0x4e20) = 0 overflow 1 carry 0 +sub(0xffffb1e0, 0xffffb1e0) = 0 overflow 1 carry 0 +sub(0, 0xffff8000) = 0x7fff overflow 1 carry 0 +abs_s(0x1) = 0x1 overflow 1 carry 0 +abs_s(0xffffffff) = 0x1 overflow 1 carry 0 +abs_s(0xffff8000) = 0x7fff overflow 1 carry 0 +shl(0x1, 1) = 0x2 overflow 1 carry 0 +shl(0xa, 1) = 0x14 overflow 1 carry 0 +shl(0xfff, 10) = 0x7fff overflow 1 carry 0 +shl(0xfff, 20) = 0x7fff overflow 1 carry 0 +shl(0x1, -1) = 0 overflow 1 carry 0 +shl(0x14, -1) = 0xa overflow 1 carry 0 +shl(0xfff, -10) = 0x3 overflow 1 carry 0 +shl(0xfff, -64) = 0 overflow 1 carry 0 +shr(0x1, -1) = 0x2 overflow 1 carry 0 +shr(0xa, -1) = 0x14 overflow 1 carry 0 +shr(0xfff, -10) = 0x7fff overflow 1 carry 0 +shr(0xfff, -20) = 0x7fff overflow 1 carry 0 +shr(0x1, 1) = 0 overflow 1 carry 0 +shr(0x14, 1) = 0xa overflow 1 carry 0 +shr(0xfff, 10) = 0x3 overflow 1 carry 0 +shr(0xfff, 64) = 0 overflow 1 carry 0 +mult(0x2, 0x2) = 0 overflow 1 carry 0 +mult(0xffffffff, 0xffffffff) = 0 overflow 1 carry 0 +mult(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +L_mult(0x2, 0x2) = 0x8 overflow 1 carry 0 +L_mult(0xffffffff, 0xffffffff) = 0x2 overflow 1 carry 0 +L_mult(0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 0 +negate(0x1) = 0xffffffff overflow 1 carry 0 +negate(0xffffffff) = 0x1 overflow 1 carry 0 +negate(0xffff8000) = 0x7fff overflow 1 carry 0 +extract_h(0x1) = 0 overflow 1 carry 0 +extract_h(0xffffffff) = 0xffffffff overflow 1 carry 0 +extract_h(0xffff8000) = 0xffffffff overflow 1 carry 0 +extract_h(0x12345678) = 0x1234 overflow 1 carry 0 +extract_l(0x1) = 0x1 overflow 1 carry 0 +extract_l(0xffffffff) = 0xffffffff overflow 1 carry 0 +extract_l(0xffff8000) = 0xffff8000 overflow 1 carry 0 +extract_l(0x43218765) = 0xffff8765 overflow 1 carry 0 +round(0x1) = 0 overflow 1 carry 0 +round(0xffffffff) = 0 overflow 1 carry 0 +round(0xffff8000) = 0 overflow 1 carry 0 +round(0x43218765) = 0x4322 overflow 1 carry 0 +round(0x87654321) = 0xffff8765 overflow 1 carry 0 +L_mac(0x1234, 0x2, 0x2) = 0x123c overflow 1 carry 0 +L_mac(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 1 carry 0 +L_mac(0x1234, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 0 +L_mac(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 0 +L_msu(0x1234, 0x2, 0x2) = 0x122c overflow 1 carry 0 +L_msu(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 1 carry 0 +L_msu(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 0 +L_msu(0x1, 0xffff8000, 0xffff8000) = 0x80000002 overflow 1 carry 0 +L_add(0x1, 0x2) = 0x3 overflow 1 carry 0 +L_add(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 1 carry 0 +L_add(0xffffffff, 0x2) = 0x1 overflow 1 carry 0 +L_add(0x7000, 0x7000) = 0xe000 overflow 1 carry 0 +L_add(0x8fff, 0x8fff) = 0x11ffe overflow 1 carry 0 +L_add(0x70000000, 0x70000000) = 0x7fffffff overflow 1 carry 0 +L_add(0x8fffffff, 0x8fffffff) = 0x80000000 overflow 1 carry 0 +L_sub(0x1, 0x2) = 0xffffffff overflow 1 carry 0 +L_sub(0xffffffff, 0xfffffffe) = 0x1 overflow 1 carry 0 +L_sub(0xffffffff, 0x2) = 0xfffffffd overflow 1 carry 0 +L_sub(0x7000, 0xffff9000) = 0xe000 overflow 1 carry 0 +L_sub(0x8fff, 0xffff7001) = 0x11ffe overflow 1 carry 0 +L_sub(0x70000000, 0x90000000) = 0x7fffffff overflow 1 carry 0 +L_sub(0x8fffffff, 0x70000001) = 0x80000000 overflow 1 carry 0 +L_sub(0, 0x80000000) = 0x7fffffff overflow 1 carry 0 +Checking L_add_c with input Overflow=1 and input Carry=0 +L_add_c(0x1, 0x2) = 0x3 overflow 1 carry 0 +L_add_c(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 1 carry 1 +L_add_c(0xffffffff, 0x2) = 0x1 overflow 1 carry 1 +L_add_c(0x7000, 0x7000) = 0xe000 overflow 1 carry 0 +L_add_c(0x8fff, 0x8fff) = 0x11ffe overflow 1 carry 0 +L_add_c(0x70000000, 0x70000000) = 0xe0000000 overflow 1 carry 0 +L_add_c(0x8fffffff, 0x8fffffff) = 0x1ffffffe overflow 1 carry 1 +L_add_c(0x8fffffff, 0xffffffff) = 0x8ffffffe overflow 1 carry 1 +L_sub_c(0x1, 0x2) = 0xfffffffe overflow 1 carry 0 +L_sub_c(0xffffffff, 0xfffffffe) = 0 overflow 1 carry 1 +L_sub_c(0xffffffff, 0x2) = 0xfffffffc overflow 1 carry 1 +L_sub_c(0x7000, 0x7000) = 0xffffffff overflow 1 carry 0 +L_sub_c(0x8fff, 0x8fff) = 0xffffffff overflow 1 carry 0 +L_sub_c(0x70000000, 0x70000000) = 0xffffffff overflow 1 carry 0 +L_sub_c(0x8fffffff, 0x8fffffff) = 0xffffffff overflow 1 carry 0 +L_sub_c(0x1, 0x80000000) = 0x80000000 overflow 1 carry 0 +L_sub_c(0xffffffff, 0x7fffffff) = 0x7fffffff overflow 1 carry 1 +Checking L_macNs with input Overflow=1 and input Carry=0 +L_macNs(0x1234, 0x2, 0x2) = 0x123c overflow 0 carry 0 +L_macNs(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 0 carry 0 +L_macNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001233 overflow 1 carry 0 +L_macNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 1 +Checking L_msuNs with input Overflow=1 and input Carry=0 +L_msuNs(0x1234, 0x2, 0x2) = 0x122b overflow 0 carry 1 +L_msuNs(0x1234, 0xffffffff, 0xffffffff) = 0x1231 overflow 0 carry 1 +L_msuNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001234 overflow 1 carry 0 +L_msuNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +negate(0x1) = 0xffffffff overflow 1 carry 0 +negate(0xffffffff) = 0x1 overflow 1 carry 0 +negate(0xffff8000) = 0x7fff overflow 1 carry 0 +mult_r(0x2, 0x2) = 0 overflow 1 carry 0 +mult_r(0xffffffff, 0xffffffff) = 0 overflow 1 carry 0 +mult_r(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +norm_s(0x1) = 0xe overflow 1 carry 0 +norm_s(0xffffffff) = 0xf overflow 1 carry 0 +norm_s(0xffff8000) = 0 overflow 1 carry 0 +norm_s(0x2ee0) = 0x1 overflow 1 carry 0 +norm_l(0x1) = 0x1e overflow 1 carry 0 +norm_l(0xffffffff) = 0x1f overflow 1 carry 0 +norm_l(0xffff8000) = 0x10 overflow 1 carry 0 +norm_l(0x2ee0) = 0x11 overflow 1 carry 0 +norm_l(0x123456) = 0xa overflow 1 carry 0 +norm_l(0xabcdef) = 0x7 overflow 1 carry 0 +L_shl(0x1, 1) = 0x2 overflow 1 carry 0 +L_shl(0xa, 1) = 0x14 overflow 1 carry 0 +L_shl(0xfff, 10) = 0x3ffc00 overflow 1 carry 0 +L_shl(0xfff, 20) = 0x7fffffff overflow 1 carry 0 +L_shl(0x12345678, 2) = 0x48d159e0 overflow 1 carry 0 +L_shl(0x12345678, 40) = 0x7fffffff overflow 1 carry 0 +L_shl(0x1, -1) = 0 overflow 1 carry 0 +L_shl(0x14, -1) = 0xa overflow 1 carry 0 +L_shl(0xfff, -10) = 0x3 overflow 1 carry 0 +L_shl(0xfff, -64) = 0 overflow 1 carry 0 +L_shl(0x12345678, -10) = 0x48d15 overflow 1 carry 0 +L_shl(0x12345678, -64) = 0 overflow 1 carry 0 +L_shr(0x1, -1) = 0x2 overflow 1 carry 0 +L_shr(0xa, -1) = 0x14 overflow 1 carry 0 +L_shr(0xfff, -10) = 0x3ffc00 overflow 1 carry 0 +L_shr(0xfff, -20) = 0x7fffffff overflow 1 carry 0 +L_shr(0x12345678, -10) = 0x7fffffff overflow 1 carry 0 +L_shr(0x12345678, -40) = 0x7fffffff overflow 1 carry 0 +L_shr(0x1, 1) = 0 overflow 1 carry 0 +L_shr(0x14, 1) = 0xa overflow 1 carry 0 +L_shr(0xfff, 10) = 0x3 overflow 1 carry 0 +L_shr(0xfff, 64) = 0 overflow 1 carry 0 +L_shr(0x12345678, 10) = 0x48d15 overflow 1 carry 0 +L_shr(0x12345678, 64) = 0 overflow 1 carry 0 +shr_r(0x1, -1) = 0x2 overflow 1 carry 0 +shr_r(0xa, -1) = 0x14 overflow 1 carry 0 +shr_r(0xfff, -10) = 0x7fff overflow 1 carry 0 +shr_r(0xfff, -20) = 0x7fff overflow 1 carry 0 +shr_r(0x1, 1) = 0x1 overflow 1 carry 0 +shr_r(0x14, 1) = 0xa overflow 1 carry 0 +shr_r(0xfff, 10) = 0x4 overflow 1 carry 0 +shr_r(0xfff, 64) = 0 overflow 1 carry 0 +mac_r(0x1234, 0x2, 0x2) = 0 overflow 1 carry 0 +mac_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 1 carry 0 +mac_r(0x1234, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +mac_r(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +mac_r(0x123456, 0x244, 0x522) = 0x29 overflow 1 carry 0 +msu_r(0x1234, 0x2, 0x2) = 0 overflow 1 carry 0 +msu_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 1 carry 0 +msu_r(0x1234, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 0 +msu_r(0x1, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 0 +msu_r(0x123456, 0x321, 0x243) = 0x4 overflow 1 carry 0 +L_deposit_h(0x1) = 0x10000 overflow 1 carry 0 +L_deposit_h(0xffffffff) = 0xffff0000 overflow 1 carry 0 +L_deposit_h(0xffff8000) = 0x80000000 overflow 1 carry 0 +L_deposit_h(0x1234) = 0x12340000 overflow 1 carry 0 +L_deposit_l(0x1) = 0x1 overflow 1 carry 0 +L_deposit_l(0xffffffff) = 0xffffffff overflow 1 carry 0 +L_deposit_l(0xffff8000) = 0xffff8000 overflow 1 carry 0 +L_deposit_l(0x1234) = 0x1234 overflow 1 carry 0 +L_shr_r(0x1, -1) = 0x2 overflow 1 carry 0 +L_shr_r(0xa, -1) = 0x14 overflow 1 carry 0 +L_shr_r(0xfff, -10) = 0x3ffc00 overflow 1 carry 0 +L_shr_r(0xfff, -20) = 0x7fffffff overflow 1 carry 0 +L_shr_r(0x12345678, -10) = 0x7fffffff overflow 1 carry 0 +L_shr_r(0x12345678, -40) = 0x7fffffff overflow 1 carry 0 +L_shr_r(0x1, 1) = 0x1 overflow 1 carry 0 +L_shr_r(0x14, 1) = 0xa overflow 1 carry 0 +L_shr_r(0xfff, 10) = 0x4 overflow 1 carry 0 +L_shr_r(0xfff, 64) = 0 overflow 1 carry 0 +L_shr_r(0x12345678, 10) = 0x48d16 overflow 1 carry 0 +L_shr_r(0x12345678, 64) = 0 overflow 1 carry 0 +L_abs(0x1) = 0x1 overflow 1 carry 0 +L_abs(0xffffffff) = 0x1 overflow 1 carry 0 +L_abs(0x80000000) = 0x7fffffff overflow 1 carry 0 +L_sat(0x1) = 0x7fffffff overflow 0 carry 0 +L_sat(0xffffffff) = 0x7fffffff overflow 0 carry 0 +L_sat(0xffff8000) = 0x7fffffff overflow 0 carry 0 +L_sat(0x8000) = 0x7fffffff overflow 0 carry 0 +div_s(0x1, 0x1) = 0x7fff overflow 1 carry 0 +div_s(0x2710, 0x4e20) = 0x4000 overflow 1 carry 0 +div_s(0x2710, 0x4e20) = 0x4000 overflow 1 carry 0 + + +DSP FNS (non-NEON/ITU) intrinsics with input Overflow=1 and input Carry=1 +Checking saturate with input Overflow=1 and input Carry=1 +saturate(0x1) = 0x1 overflow 1 carry 1 +saturate(0xffffffff) = 0xffffffff overflow 1 carry 1 +saturate(0x8000) = 0x7fff overflow 1 carry 1 +saturate(0xffff8000) = 0xffff8000 overflow 1 carry 1 +saturate(0xffff7fff) = 0xffff8000 overflow 1 carry 1 +add(0x1, 0x1) = 0x2 overflow 1 carry 1 +add(0xffffffff, 0xffffffff) = 0xfffffffe overflow 1 carry 1 +add(0x4e20, 0x4e20) = 0x7fff overflow 1 carry 1 +add(0xffffb1e0, 0xffffb1e0) = 0xffff8000 overflow 1 carry 1 +sub(0x1, 0x1) = 0 overflow 1 carry 1 +sub(0xffffffff, 0xffffffff) = 0 overflow 1 carry 1 +sub(0x4e20, 0x4e20) = 0 overflow 1 carry 1 +sub(0xffffb1e0, 0xffffb1e0) = 0 overflow 1 carry 1 +sub(0, 0xffff8000) = 0x7fff overflow 1 carry 1 +abs_s(0x1) = 0x1 overflow 1 carry 1 +abs_s(0xffffffff) = 0x1 overflow 1 carry 1 +abs_s(0xffff8000) = 0x7fff overflow 1 carry 1 +shl(0x1, 1) = 0x2 overflow 1 carry 1 +shl(0xa, 1) = 0x14 overflow 1 carry 1 +shl(0xfff, 10) = 0x7fff overflow 1 carry 1 +shl(0xfff, 20) = 0x7fff overflow 1 carry 1 +shl(0x1, -1) = 0 overflow 1 carry 1 +shl(0x14, -1) = 0xa overflow 1 carry 1 +shl(0xfff, -10) = 0x3 overflow 1 carry 1 +shl(0xfff, -64) = 0 overflow 1 carry 1 +shr(0x1, -1) = 0x2 overflow 1 carry 1 +shr(0xa, -1) = 0x14 overflow 1 carry 1 +shr(0xfff, -10) = 0x7fff overflow 1 carry 1 +shr(0xfff, -20) = 0x7fff overflow 1 carry 1 +shr(0x1, 1) = 0 overflow 1 carry 1 +shr(0x14, 1) = 0xa overflow 1 carry 1 +shr(0xfff, 10) = 0x3 overflow 1 carry 1 +shr(0xfff, 64) = 0 overflow 1 carry 1 +mult(0x2, 0x2) = 0 overflow 1 carry 1 +mult(0xffffffff, 0xffffffff) = 0 overflow 1 carry 1 +mult(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +L_mult(0x2, 0x2) = 0x8 overflow 1 carry 1 +L_mult(0xffffffff, 0xffffffff) = 0x2 overflow 1 carry 1 +L_mult(0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +negate(0x1) = 0xffffffff overflow 1 carry 1 +negate(0xffffffff) = 0x1 overflow 1 carry 1 +negate(0xffff8000) = 0x7fff overflow 1 carry 1 +extract_h(0x1) = 0 overflow 1 carry 1 +extract_h(0xffffffff) = 0xffffffff overflow 1 carry 1 +extract_h(0xffff8000) = 0xffffffff overflow 1 carry 1 +extract_h(0x12345678) = 0x1234 overflow 1 carry 1 +extract_l(0x1) = 0x1 overflow 1 carry 1 +extract_l(0xffffffff) = 0xffffffff overflow 1 carry 1 +extract_l(0xffff8000) = 0xffff8000 overflow 1 carry 1 +extract_l(0x43218765) = 0xffff8765 overflow 1 carry 1 +round(0x1) = 0 overflow 1 carry 1 +round(0xffffffff) = 0 overflow 1 carry 1 +round(0xffff8000) = 0 overflow 1 carry 1 +round(0x43218765) = 0x4322 overflow 1 carry 1 +round(0x87654321) = 0xffff8765 overflow 1 carry 1 +L_mac(0x1234, 0x2, 0x2) = 0x123c overflow 1 carry 1 +L_mac(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 1 carry 1 +L_mac(0x1234, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +L_mac(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 1 +L_msu(0x1234, 0x2, 0x2) = 0x122c overflow 1 carry 1 +L_msu(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 1 carry 1 +L_msu(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 1 +L_msu(0x1, 0xffff8000, 0xffff8000) = 0x80000002 overflow 1 carry 1 +L_add(0x1, 0x2) = 0x3 overflow 1 carry 1 +L_add(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 1 carry 1 +L_add(0xffffffff, 0x2) = 0x1 overflow 1 carry 1 +L_add(0x7000, 0x7000) = 0xe000 overflow 1 carry 1 +L_add(0x8fff, 0x8fff) = 0x11ffe overflow 1 carry 1 +L_add(0x70000000, 0x70000000) = 0x7fffffff overflow 1 carry 1 +L_add(0x8fffffff, 0x8fffffff) = 0x80000000 overflow 1 carry 1 +L_sub(0x1, 0x2) = 0xffffffff overflow 1 carry 1 +L_sub(0xffffffff, 0xfffffffe) = 0x1 overflow 1 carry 1 +L_sub(0xffffffff, 0x2) = 0xfffffffd overflow 1 carry 1 +L_sub(0x7000, 0xffff9000) = 0xe000 overflow 1 carry 1 +L_sub(0x8fff, 0xffff7001) = 0x11ffe overflow 1 carry 1 +L_sub(0x70000000, 0x90000000) = 0x7fffffff overflow 1 carry 1 +L_sub(0x8fffffff, 0x70000001) = 0x80000000 overflow 1 carry 1 +L_sub(0, 0x80000000) = 0x7fffffff overflow 1 carry 1 +Checking L_add_c with input Overflow=1 and input Carry=1 +L_add_c(0x1, 0x2) = 0x4 overflow 1 carry 0 +L_add_c(0xffffffff, 0xfffffffe) = 0xfffffffe overflow 1 carry 1 +L_add_c(0xffffffff, 0x2) = 0x2 overflow 1 carry 1 +L_add_c(0x7000, 0x7000) = 0xe001 overflow 1 carry 0 +L_add_c(0x8fff, 0x8fff) = 0x11fff overflow 1 carry 0 +L_add_c(0x70000000, 0x70000000) = 0xe0000001 overflow 1 carry 0 +L_add_c(0x8fffffff, 0x8fffffff) = 0x1fffffff overflow 1 carry 1 +L_add_c(0x8fffffff, 0xffffffff) = 0x8fffffff overflow 1 carry 1 +L_sub_c(0x1, 0x2) = 0xffffffff overflow 1 carry 0 +L_sub_c(0xffffffff, 0xfffffffe) = 0x1 overflow 1 carry 1 +L_sub_c(0xffffffff, 0x2) = 0xfffffffd overflow 1 carry 1 +L_sub_c(0x7000, 0x7000) = 0 overflow 1 carry 1 +L_sub_c(0x8fff, 0x8fff) = 0 overflow 1 carry 1 +L_sub_c(0x70000000, 0x70000000) = 0 overflow 1 carry 1 +L_sub_c(0x8fffffff, 0x8fffffff) = 0 overflow 1 carry 1 +L_sub_c(0x1, 0x80000000) = 0x80000001 overflow 1 carry 0 +L_sub_c(0xffffffff, 0x7fffffff) = 0x80000000 overflow 1 carry 1 +Checking L_macNs with input Overflow=1 and input Carry=1 +L_macNs(0x1234, 0x2, 0x2) = 0x123d overflow 0 carry 0 +L_macNs(0x1234, 0xffffffff, 0xffffffff) = 0x1237 overflow 0 carry 0 +L_macNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001234 overflow 1 carry 0 +L_macNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +Checking L_msuNs with input Overflow=1 and input Carry=1 +L_msuNs(0x1234, 0x2, 0x2) = 0x122c overflow 0 carry 1 +L_msuNs(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 0 carry 1 +L_msuNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 0 +L_msuNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x80000000 overflow 1 carry 1 +negate(0x1) = 0xffffffff overflow 1 carry 1 +negate(0xffffffff) = 0x1 overflow 1 carry 1 +negate(0xffff8000) = 0x7fff overflow 1 carry 1 +mult_r(0x2, 0x2) = 0 overflow 1 carry 1 +mult_r(0xffffffff, 0xffffffff) = 0 overflow 1 carry 1 +mult_r(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +norm_s(0x1) = 0xe overflow 1 carry 1 +norm_s(0xffffffff) = 0xf overflow 1 carry 1 +norm_s(0xffff8000) = 0 overflow 1 carry 1 +norm_s(0x2ee0) = 0x1 overflow 1 carry 1 +norm_l(0x1) = 0x1e overflow 1 carry 1 +norm_l(0xffffffff) = 0x1f overflow 1 carry 1 +norm_l(0xffff8000) = 0x10 overflow 1 carry 1 +norm_l(0x2ee0) = 0x11 overflow 1 carry 1 +norm_l(0x123456) = 0xa overflow 1 carry 1 +norm_l(0xabcdef) = 0x7 overflow 1 carry 1 +L_shl(0x1, 1) = 0x2 overflow 1 carry 1 +L_shl(0xa, 1) = 0x14 overflow 1 carry 1 +L_shl(0xfff, 10) = 0x3ffc00 overflow 1 carry 1 +L_shl(0xfff, 20) = 0x7fffffff overflow 1 carry 1 +L_shl(0x12345678, 2) = 0x48d159e0 overflow 1 carry 1 +L_shl(0x12345678, 40) = 0x7fffffff overflow 1 carry 1 +L_shl(0x1, -1) = 0 overflow 1 carry 1 +L_shl(0x14, -1) = 0xa overflow 1 carry 1 +L_shl(0xfff, -10) = 0x3 overflow 1 carry 1 +L_shl(0xfff, -64) = 0 overflow 1 carry 1 +L_shl(0x12345678, -10) = 0x48d15 overflow 1 carry 1 +L_shl(0x12345678, -64) = 0 overflow 1 carry 1 +L_shr(0x1, -1) = 0x2 overflow 1 carry 1 +L_shr(0xa, -1) = 0x14 overflow 1 carry 1 +L_shr(0xfff, -10) = 0x3ffc00 overflow 1 carry 1 +L_shr(0xfff, -20) = 0x7fffffff overflow 1 carry 1 +L_shr(0x12345678, -10) = 0x7fffffff overflow 1 carry 1 +L_shr(0x12345678, -40) = 0x7fffffff overflow 1 carry 1 +L_shr(0x1, 1) = 0 overflow 1 carry 1 +L_shr(0x14, 1) = 0xa overflow 1 carry 1 +L_shr(0xfff, 10) = 0x3 overflow 1 carry 1 +L_shr(0xfff, 64) = 0 overflow 1 carry 1 +L_shr(0x12345678, 10) = 0x48d15 overflow 1 carry 1 +L_shr(0x12345678, 64) = 0 overflow 1 carry 1 +shr_r(0x1, -1) = 0x2 overflow 1 carry 1 +shr_r(0xa, -1) = 0x14 overflow 1 carry 1 +shr_r(0xfff, -10) = 0x7fff overflow 1 carry 1 +shr_r(0xfff, -20) = 0x7fff overflow 1 carry 1 +shr_r(0x1, 1) = 0x1 overflow 1 carry 1 +shr_r(0x14, 1) = 0xa overflow 1 carry 1 +shr_r(0xfff, 10) = 0x4 overflow 1 carry 1 +shr_r(0xfff, 64) = 0 overflow 1 carry 1 +mac_r(0x1234, 0x2, 0x2) = 0 overflow 1 carry 1 +mac_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 1 carry 1 +mac_r(0x1234, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +mac_r(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +mac_r(0x123456, 0x244, 0x522) = 0x29 overflow 1 carry 1 +msu_r(0x1234, 0x2, 0x2) = 0 overflow 1 carry 1 +msu_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 1 carry 1 +msu_r(0x1234, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 1 +msu_r(0x1, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 1 +msu_r(0x123456, 0x321, 0x243) = 0x4 overflow 1 carry 1 +L_deposit_h(0x1) = 0x10000 overflow 1 carry 1 +L_deposit_h(0xffffffff) = 0xffff0000 overflow 1 carry 1 +L_deposit_h(0xffff8000) = 0x80000000 overflow 1 carry 1 +L_deposit_h(0x1234) = 0x12340000 overflow 1 carry 1 +L_deposit_l(0x1) = 0x1 overflow 1 carry 1 +L_deposit_l(0xffffffff) = 0xffffffff overflow 1 carry 1 +L_deposit_l(0xffff8000) = 0xffff8000 overflow 1 carry 1 +L_deposit_l(0x1234) = 0x1234 overflow 1 carry 1 +L_shr_r(0x1, -1) = 0x2 overflow 1 carry 1 +L_shr_r(0xa, -1) = 0x14 overflow 1 carry 1 +L_shr_r(0xfff, -10) = 0x3ffc00 overflow 1 carry 1 +L_shr_r(0xfff, -20) = 0x7fffffff overflow 1 carry 1 +L_shr_r(0x12345678, -10) = 0x7fffffff overflow 1 carry 1 +L_shr_r(0x12345678, -40) = 0x7fffffff overflow 1 carry 1 +L_shr_r(0x1, 1) = 0x1 overflow 1 carry 1 +L_shr_r(0x14, 1) = 0xa overflow 1 carry 1 +L_shr_r(0xfff, 10) = 0x4 overflow 1 carry 1 +L_shr_r(0xfff, 64) = 0 overflow 1 carry 1 +L_shr_r(0x12345678, 10) = 0x48d16 overflow 1 carry 1 +L_shr_r(0x12345678, 64) = 0 overflow 1 carry 1 +L_abs(0x1) = 0x1 overflow 1 carry 1 +L_abs(0xffffffff) = 0x1 overflow 1 carry 1 +L_abs(0x80000000) = 0x7fffffff overflow 1 carry 1 +L_sat(0x1) = 0x80000000 overflow 0 carry 0 +L_sat(0xffffffff) = 0x80000000 overflow 0 carry 0 +L_sat(0xffff8000) = 0x80000000 overflow 0 carry 0 +L_sat(0x8000) = 0x80000000 overflow 0 carry 0 +div_s(0x1, 0x1) = 0x7fff overflow 1 carry 1 +div_s(0x2710, 0x4e20) = 0x4000 overflow 1 carry 1 +div_s(0x2710, 0x4e20) = 0x4000 overflow 1 carry 1 diff --git a/ref-rvct-neon-nofp16.txt b/ref-rvct-neon-nofp16.txt new file mode 100644 index 0000000..f3656db --- /dev/null +++ b/ref-rvct-neon-nofp16.txt @@ -0,0 +1,7356 @@ + +VLD1/VLD1Q output: +VLD1/VLD1Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD1/VLD1Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD1/VLD1Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD1/VLD1Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD1/VLD1Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD1/VLD1Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD1/VLD1Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD1/VLD1Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD1/VLD1Q:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD1/VLD1Q:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD1/VLD1Q:10:result_float32x2 [] = { c1800000, c1700000, } +VLD1/VLD1Q:11:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD1/VLD1Q:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD1/VLD1Q:13:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD1/VLD1Q:14:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VLD1/VLD1Q:15:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD1/VLD1Q:16:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD1/VLD1Q:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD1/VLD1Q:18:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VLD1/VLD1Q:19:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD1/VLD1Q:20:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD1/VLD1Q:21:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } + +VADD/VADDQ output: +VADD/VADDQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, } +VADD/VADDQ:1:result_int16x4 [] = { ffffffec, ffffffed, ffffffee, ffffffef, } +VADD/VADDQ:2:result_int32x2 [] = { fffffff3, fffffff4, } +VADD/VADDQ:3:result_int64x1 [] = { 54, } +VADD/VADDQ:4:result_uint8x8 [] = { 4, 5, 6, 7, 8, 9, a, b, } +VADD/VADDQ:5:result_uint16x4 [] = { e, f, 10, 11, } +VADD/VADDQ:6:result_uint32x2 [] = { 18, 19, } +VADD/VADDQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VADD/VADDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADD/VADDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VADD/VADDQ:10:result_float32x2 [] = { 33333333, 33333333, } +VADD/VADDQ:11:result_int8x16 [] = { ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, ffffffeb, ffffffec, ffffffed, ffffffee, ffffffef, fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, } +VADD/VADDQ:12:result_int16x8 [] = { ffffffdc, ffffffdd, ffffffde, ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, } +VADD/VADDQ:13:result_int32x4 [] = { ffffffd2, ffffffd3, ffffffd4, ffffffd5, } +VADD/VADDQ:14:result_int64x2 [] = { 8, 9, } +VADD/VADDQ:15:result_uint8x16 [] = { fc, fd, fe, ff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, } +VADD/VADDQ:16:result_uint16x8 [] = { fff3, fff4, fff5, fff6, fff7, fff8, fff9, fffa, } +VADD/VADDQ:17:result_uint32x4 [] = { 27, 28, 29, 2a, } +VADD/VADDQ:18:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff4, } +VADD/VADDQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADD/VADDQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADD/VADDQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +float32: +VADD/VADDQ:22:result_float32x2 [] = { 40d9999a, 40d9999a, } +VADD/VADDQ:23:result_float32x4 [] = { 41100000, 41100000, 41100000, 41100000, } + +VLD1_LANE/VLD1_LANEQ output: +VLD1_LANE/VLD1_LANEQ:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, ffffffaa, } +VLD1_LANE/VLD1_LANEQ:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:2:result_int32x2 [] = { aaaaaaaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:3:result_int64x1 [] = { fffffffffffffff0, } +VLD1_LANE/VLD1_LANEQ:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, f0, } +VLD1_LANE/VLD1_LANEQ:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, fff0, } +VLD1_LANE/VLD1_LANEQ:6:result_uint32x2 [] = { aaaaaaaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD1_LANE/VLD1_LANEQ:8:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, f0, } +VLD1_LANE/VLD1_LANEQ:9:result_poly16x4 [] = { aaaa, aaaa, aaaa, fff0, } +VLD1_LANE/VLD1_LANEQ:10:result_float32x2 [] = { aaaaaaaa, c1800000, } +VLD1_LANE/VLD1_LANEQ:11:result_int8x16 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:12:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, ffffaaaa, ffffaaaa, } +VLD1_LANE/VLD1_LANEQ:13:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, aaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:14:result_int64x2 [] = { aaaaaaaaaaaaaaaa, fffffffffffffff0, } +VLD1_LANE/VLD1_LANEQ:15:result_uint8x16 [] = { aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, f0, aa, aa, aa, } +VLD1_LANE/VLD1_LANEQ:16:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, aaaa, } +VLD1_LANE/VLD1_LANEQ:17:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, aaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:18:result_uint64x2 [] = { fffffffffffffff0, aaaaaaaaaaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:19:result_poly8x16 [] = { aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, f0, aa, aa, aa, } +VLD1_LANE/VLD1_LANEQ:20:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, aaaa, } +VLD1_LANE/VLD1_LANEQ:21:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, c1800000, aaaaaaaa, } + +VLD1_DUP/VLD1_DUPQ output: +VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:8:result_poly8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:9:result_poly16x4 [] = { fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:10:result_float32x2 [] = { c1800000, c1800000, } +VLD1_DUP/VLD1_DUPQ:11:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:12:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:13:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:14:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:15:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:16:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:17:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:18:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:19:result_poly8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:20:result_poly16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:21:result_float32x4 [] = { c1800000, c1800000, c1800000, c1800000, } + +VLD1_DUP/VLD1_DUPQ output: +VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:8:result_poly8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:9:result_poly16x4 [] = { fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:10:result_float32x2 [] = { c1700000, c1700000, } +VLD1_DUP/VLD1_DUPQ:11:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:12:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:13:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:14:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:15:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:16:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:17:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:18:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:19:result_poly8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:20:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:21:result_float32x4 [] = { c1700000, c1700000, c1700000, c1700000, } + +VLD1_DUP/VLD1_DUPQ output: +VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:8:result_poly8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:9:result_poly16x4 [] = { fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:10:result_float32x2 [] = { c1600000, c1600000, } +VLD1_DUP/VLD1_DUPQ:11:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:12:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:13:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:14:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:15:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:16:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:17:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:18:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:19:result_poly8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:20:result_poly16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:21:result_float32x4 [] = { c1600000, c1600000, c1600000, c1600000, } + +VDUP/VDUPQ output: +VDUP/VDUPQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff0, } +VDUP/VDUPQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VDUP/VDUPQ:8:result_poly8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:9:result_poly16x4 [] = { fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:10:result_float32x2 [] = { c1800000, c1800000, } +VDUP/VDUPQ:11:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:12:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:13:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:14:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP/VDUPQ:15:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:16:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:17:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:18:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP/VDUPQ:19:result_poly8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:20:result_poly16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:21:result_float32x4 [] = { c1800000, c1800000, c1800000, c1800000, } + +VDUP/VDUPQ output: +VDUP/VDUPQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff1, } +VDUP/VDUPQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff1, } +VDUP/VDUPQ:8:result_poly8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:9:result_poly16x4 [] = { fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:10:result_float32x2 [] = { c1700000, c1700000, } +VDUP/VDUPQ:11:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:12:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:13:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:14:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VDUP/VDUPQ:15:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:16:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:17:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:18:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VDUP/VDUPQ:19:result_poly8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:20:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:21:result_float32x4 [] = { c1700000, c1700000, c1700000, c1700000, } + +VDUP/VDUPQ output: +VDUP/VDUPQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:2:result_int32x2 [] = { fffffff2, fffffff2, } +VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff2, } +VDUP/VDUPQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff2, fffffff2, } +VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VDUP/VDUPQ:8:result_poly8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:9:result_poly16x4 [] = { fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:10:result_float32x2 [] = { c1600000, c1600000, } +VDUP/VDUPQ:11:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:12:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:13:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:14:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VDUP/VDUPQ:15:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:16:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:17:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:18:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VDUP/VDUPQ:19:result_poly8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:20:result_poly16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:21:result_float32x4 [] = { c1600000, c1600000, c1600000, c1600000, } + +VMOV/VMOVQ output: +VMOV/VMOVQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff0, } +VMOV/VMOVQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VMOV/VMOVQ:8:result_poly8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:9:result_poly16x4 [] = { fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:10:result_float32x2 [] = { c1800000, c1800000, } +VMOV/VMOVQ:11:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:12:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:13:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:14:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VMOV/VMOVQ:15:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:16:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:17:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:18:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VMOV/VMOVQ:19:result_poly8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:20:result_poly16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:21:result_float32x4 [] = { c1800000, c1800000, c1800000, c1800000, } + +VMOV/VMOVQ output: +VMOV/VMOVQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff1, } +VMOV/VMOVQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff1, } +VMOV/VMOVQ:8:result_poly8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:9:result_poly16x4 [] = { fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:10:result_float32x2 [] = { c1700000, c1700000, } +VMOV/VMOVQ:11:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:12:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:13:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:14:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VMOV/VMOVQ:15:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:16:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:17:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:18:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VMOV/VMOVQ:19:result_poly8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:20:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:21:result_float32x4 [] = { c1700000, c1700000, c1700000, c1700000, } + +VMOV/VMOVQ output: +VMOV/VMOVQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:2:result_int32x2 [] = { fffffff2, fffffff2, } +VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff2, } +VMOV/VMOVQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff2, fffffff2, } +VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VMOV/VMOVQ:8:result_poly8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:9:result_poly16x4 [] = { fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:10:result_float32x2 [] = { c1600000, c1600000, } +VMOV/VMOVQ:11:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:12:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:13:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:14:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VMOV/VMOVQ:15:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:16:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:17:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:18:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VMOV/VMOVQ:19:result_poly8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:20:result_poly16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:21:result_float32x4 [] = { c1600000, c1600000, c1600000, c1600000, } + +VGET_HIGH output: +VGET_HIGH:0:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VGET_HIGH:1:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VGET_HIGH:2:result_int32x2 [] = { fffffff2, fffffff3, } +VGET_HIGH:3:result_int64x1 [] = { fffffffffffffff1, } +VGET_HIGH:4:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VGET_HIGH:5:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VGET_HIGH:6:result_uint32x2 [] = { fffffff2, fffffff3, } +VGET_HIGH:7:result_uint64x1 [] = { fffffffffffffff1, } +VGET_HIGH:8:result_poly8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VGET_HIGH:9:result_poly16x4 [] = { fff4, fff5, fff6, fff7, } +VGET_HIGH:10:result_float32x2 [] = { c1600000, c1500000, } +VGET_HIGH:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_HIGH:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_HIGH:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_HIGH:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_HIGH:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_HIGH:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_HIGH:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_HIGH:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_HIGH:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_HIGH:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_HIGH:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VGET_LOW output: +VGET_LOW:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VGET_LOW:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VGET_LOW:2:result_int32x2 [] = { fffffff0, fffffff1, } +VGET_LOW:3:result_int64x1 [] = { fffffffffffffff0, } +VGET_LOW:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VGET_LOW:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VGET_LOW:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VGET_LOW:7:result_uint64x1 [] = { fffffffffffffff0, } +VGET_LOW:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VGET_LOW:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VGET_LOW:10:result_float32x2 [] = { c1800000, c1700000, } +VGET_LOW:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_LOW:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_LOW:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_LOW:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_LOW:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_LOW:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_LOW:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_LOW:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_LOW:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_LOW:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_LOW:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLAL_LANE cumulative saturation output: +VQDMLAL_LANE:0:vqdmlal_lane_s16 Neon cumulative saturation 0 +VQDMLAL_LANE:1:vqdmlal_lane_s32 Neon cumulative saturation 0 + +VQDMLAL_LANE output: +VQDMLAL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:5:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:14:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:15:result_int32x4 [] = { 7c1e, 7c1f, 7c20, 7c21, } +VQDMLAL_LANE:16:result_int64x2 [] = { 7c1e, 7c1f, } +VQDMLAL_LANE:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_LANE:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:23:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLAL_LANE (mul with input=0) cumulative saturation output: +VQDMLAL_LANE:24:vqdmlal_lane_s16 Neon cumulative saturation 0 +VQDMLAL_LANE:25:vqdmlal_lane_s32 Neon cumulative saturation 0 + +VQDMLAL_LANE (mul with input=0) output: +VQDMLAL_LANE:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:27:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:28:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:29:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:32:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:33:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:34:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:35:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:36:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:37:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:38:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:39:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VQDMLAL_LANE:40:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQDMLAL_LANE:41:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:42:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:43:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:44:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_LANE:45:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:46:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:47:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLAL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMLAL_LANE:48:vqdmlal_lane_s16 Neon cumulative saturation 1 +VQDMLAL_LANE:49:vqdmlal_lane_s32 Neon cumulative saturation 1 + +VQDMLAL_LANE (check mul cumulative saturation) output: +VQDMLAL_LANE:50:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:51:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:52:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:53:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:54:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:55:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:56:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:57:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:58:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:59:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:60:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:61:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:62:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:63:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2, } +VQDMLAL_LANE:64:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0, } +VQDMLAL_LANE:65:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:66:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:67:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:68:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_LANE:69:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:70:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:71:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLSL_LANE cumulative saturation output: +VQDMLSL_LANE:0:vqdmlsl_lane_s16 Neon cumulative saturation 0 +VQDMLSL_LANE:1:vqdmlsl_lane_s32 Neon cumulative saturation 0 + +VQDMLSL_LANE output: +VQDMLSL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:5:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:14:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:15:result_int32x4 [] = { ffff83c2, ffff83c3, ffff83c4, ffff83c5, } +VQDMLSL_LANE:16:result_int64x2 [] = { ffffffffffff83c2, ffffffffffff83c3, } +VQDMLSL_LANE:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_LANE:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:23:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLSL_LANE (mul with input=0) cumulative saturation output: +VQDMLSL_LANE:24:vqdmlsl_lane_s16 Neon cumulative saturation 0 +VQDMLSL_LANE:25:vqdmlsl_lane_s32 Neon cumulative saturation 0 + +VQDMLSL_LANE (mul with input=0) output: +VQDMLSL_LANE:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:27:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:28:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:29:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:32:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:33:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:34:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:35:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:36:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:37:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:38:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:39:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VQDMLSL_LANE:40:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQDMLSL_LANE:41:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:42:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:43:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:44:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_LANE:45:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:46:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:47:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLSL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMLSL_LANE:48:vqdmlsl_lane_s16 Neon cumulative saturation 1 +VQDMLSL_LANE:49:vqdmlsl_lane_s32 Neon cumulative saturation 1 + +VQDMLSL_LANE (check mul cumulative saturation) output: +VQDMLSL_LANE:50:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:51:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:52:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:53:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:54:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:55:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:56:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:57:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:58:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:59:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:60:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:61:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:62:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:63:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQDMLSL_LANE:64:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQDMLSL_LANE:65:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:66:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:67:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:68:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_LANE:69:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:70:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:71:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLAL_N cumulative saturation output: +VQDMLAL_N:0:vqdmlal_n_s16 Neon cumulative saturation 0 +VQDMLAL_N:1:vqdmlal_n_s32 Neon cumulative saturation 0 + +VQDMLAL_N output: +VQDMLAL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:5:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_N:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_N:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:14:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:15:result_int32x4 [] = { 1684, 1685, 1686, 1687, } +VQDMLAL_N:16:result_int64x2 [] = { 21ce, 21cf, } +VQDMLAL_N:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_N:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_N:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:23:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLAL_N (check mul cumulative saturation) cumulative saturation output: +VQDMLAL_N:24:vqdmlal_n_s16 Neon cumulative saturation 1 +VQDMLAL_N:25:vqdmlal_n_s32 Neon cumulative saturation 1 + +VQDMLAL_N (check mul cumulative saturation) output: +VQDMLAL_N:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:27:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:28:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:29:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_N:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:32:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:33:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_N:34:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:35:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:36:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:37:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:38:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:39:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2, } +VQDMLAL_N:40:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0, } +VQDMLAL_N:41:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:42:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:43:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_N:44:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_N:45:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:46:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:47:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLSL_N cumulative saturation output: +VQDMLSL_N:0:vqdmlsl_n_s16 Neon cumulative saturation 0 +VQDMLSL_N:1:vqdmlsl_n_s32 Neon cumulative saturation 0 + +VQDMLSL_N output: +VQDMLSL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:5:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_N:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_N:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:14:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:15:result_int32x4 [] = { ffffe95c, ffffe95d, ffffe95e, ffffe95f, } +VQDMLSL_N:16:result_int64x2 [] = { ffffffffffffde12, ffffffffffffde13, } +VQDMLSL_N:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_N:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_N:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:23:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLSL_N (check mul cumulative saturation) cumulative saturation output: +VQDMLSL_N:24:vqdmlsl_n_s16 Neon cumulative saturation 1 +VQDMLSL_N:25:vqdmlsl_n_s32 Neon cumulative saturation 1 + +VQDMLSL_N (check mul cumulative saturation) output: +VQDMLSL_N:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:27:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:28:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:29:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_N:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:32:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:33:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_N:34:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:35:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:36:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:37:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:38:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:39:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQDMLSL_N:40:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQDMLSL_N:41:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:42:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:43:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_N:44:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_N:45:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:46:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:47:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VEXT/VEXTQ output: +VEXT/VEXTQ:0:result_int8x8 [] = { fffffff7, 11, 11, 11, 11, 11, 11, 11, } +VEXT/VEXTQ:1:result_int16x4 [] = { fffffff3, 22, 22, 22, } +VEXT/VEXTQ:2:result_int32x2 [] = { fffffff1, 33, } +VEXT/VEXTQ:3:result_int64x1 [] = { fffffffffffffff0, } +VEXT/VEXTQ:4:result_uint8x8 [] = { f6, f7, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:5:result_uint16x4 [] = { fff2, fff3, 66, 66, } +VEXT/VEXTQ:6:result_uint32x2 [] = { fffffff1, 77, } +VEXT/VEXTQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VEXT/VEXTQ:8:result_poly8x8 [] = { f6, f7, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:9:result_poly16x4 [] = { fff2, fff3, 66, 66, } +VEXT/VEXTQ:10:result_float32x2 [] = { c1700000, 42066666, } +VEXT/VEXTQ:11:result_int8x16 [] = { fffffffe, ffffffff, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, } +VEXT/VEXTQ:12:result_int16x8 [] = { fffffff7, 22, 22, 22, 22, 22, 22, 22, } +VEXT/VEXTQ:13:result_int32x4 [] = { fffffff3, 33, 33, 33, } +VEXT/VEXTQ:14:result_int64x2 [] = { fffffffffffffff1, 44, } +VEXT/VEXTQ:15:result_uint8x16 [] = { fc, fd, fe, ff, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:16:result_uint16x8 [] = { fff6, fff7, 66, 66, 66, 66, 66, 66, } +VEXT/VEXTQ:17:result_uint32x4 [] = { fffffff3, 77, 77, 77, } +VEXT/VEXTQ:18:result_uint64x2 [] = { fffffffffffffff1, 88, } +VEXT/VEXTQ:19:result_poly8x16 [] = { fc, fd, fe, ff, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:20:result_poly16x8 [] = { fff6, fff7, 66, 66, 66, 66, 66, 66, } +VEXT/VEXTQ:21:result_float32x4 [] = { c1500000, 4204cccd, 4204cccd, 4204cccd, } + +VSHR_N output: +VSHR_N:0:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VSHR_N:1:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VSHR_N:2:result_int32x2 [] = { fffffffc, fffffffc, } +VSHR_N:3:result_int64x1 [] = { ffffffffffffffff, } +VSHR_N:4:result_uint8x8 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, } +VSHR_N:5:result_uint16x4 [] = { 1ffe, 1ffe, 1ffe, 1ffe, } +VSHR_N:6:result_uint32x2 [] = { 7ffffff, 7ffffff, } +VSHR_N:7:result_uint64x1 [] = { 7fffffff, } +VSHR_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHR_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHR_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSHR_N:11:result_int8x16 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, fffffffc, fffffffd, fffffffd, fffffffe, fffffffe, ffffffff, ffffffff, } +VSHR_N:12:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VSHR_N:13:result_int32x4 [] = { fffffffc, fffffffc, fffffffc, fffffffc, } +VSHR_N:14:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VSHR_N:15:result_uint8x16 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, } +VSHR_N:16:result_uint16x8 [] = { 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, } +VSHR_N:17:result_uint32x4 [] = { 7ffffff, 7ffffff, 7ffffff, 7ffffff, } +VSHR_N:18:result_uint64x2 [] = { 7fffffff, 7fffffff, } +VSHR_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHR_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHR_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VSHRN_N output: +VSHRN_N:0:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VSHRN_N:1:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9, } +VSHRN_N:2:result_int32x2 [] = { fffffffc, fffffffc, } +VSHRN_N:3:result_int64x1 [] = { 3333333333333333, } +VSHRN_N:4:result_uint8x8 [] = { fc, fc, fc, fc, fd, fd, fd, fd, } +VSHRN_N:5:result_uint16x4 [] = { fffe, fffe, fffe, fffe, } +VSHRN_N:6:result_uint32x2 [] = { fffffffe, fffffffe, } +VSHRN_N:7:result_uint64x1 [] = { 3333333333333333, } +VSHRN_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHRN_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSHRN_N:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHRN_N:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHRN_N:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VSHRN_N:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHRN_N:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHRN_N:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VSHRN_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHRN_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHRN_N (with input = 0) output: +VRSHRN_N:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:1:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:2:result_int32x2 [] = { 0, 0, } +VRSHRN_N:3:result_int64x1 [] = { 3333333333333333, } +VRSHRN_N:4:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:5:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:6:result_uint32x2 [] = { 0, 0, } +VRSHRN_N:7:result_uint64x1 [] = { 3333333333333333, } +VRSHRN_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHRN_N:10:result_float32x2 [] = { 33333333, 33333333, } +VRSHRN_N:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHRN_N output: +VRSHRN_N:22:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VRSHRN_N:23:result_int16x4 [] = { fffffff8, fffffff9, fffffff9, fffffffa, } +VRSHRN_N:24:result_int32x2 [] = { fffffffc, fffffffc, } +VRSHRN_N:25:result_int64x1 [] = { 3333333333333333, } +VRSHRN_N:26:result_uint8x8 [] = { fc, fc, fd, fd, fd, fd, fe, fe, } +VRSHRN_N:27:result_uint16x4 [] = { fffe, fffe, fffe, fffe, } +VRSHRN_N:28:result_uint32x2 [] = { fffffffe, fffffffe, } +VRSHRN_N:29:result_uint64x1 [] = { 3333333333333333, } +VRSHRN_N:30:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:31:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHRN_N:32:result_float32x2 [] = { 33333333, 33333333, } +VRSHRN_N:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:34:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:35:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:38:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:39:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:42:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHRN_N (with large shift amount) output: +VRSHRN_N:44:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:45:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:46:result_int32x2 [] = { 0, 0, } +VRSHRN_N:47:result_int64x1 [] = { 3333333333333333, } +VRSHRN_N:48:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:49:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:50:result_uint32x2 [] = { 0, 0, } +VRSHRN_N:51:result_uint64x1 [] = { 3333333333333333, } +VRSHRN_N:52:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:53:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHRN_N:54:result_float32x2 [] = { 33333333, 33333333, } +VRSHRN_N:55:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:56:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:57:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:58:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:59:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:60:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:61:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:62:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:63:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:64:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:65:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHRN_N cumulative saturation output: +VQRSHRN_N:0:vqrshrn_n_s16 Neon cumulative saturation 0 +VQRSHRN_N:1:vqrshrn_n_s32 Neon cumulative saturation 0 +VQRSHRN_N:2:vqrshrn_n_s64 Neon cumulative saturation 0 +VQRSHRN_N:3:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:4:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:5:vqrshrn_n_u64 Neon cumulative saturation 1 + +VQRSHRN_N output: +VQRSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VQRSHRN_N:7:result_int16x4 [] = { fffffff8, fffffff9, fffffff9, fffffffa, } +VQRSHRN_N:8:result_int32x2 [] = { fffffffc, fffffffc, } +VQRSHRN_N:9:result_int64x1 [] = { 3333333333333333, } +VQRSHRN_N:10:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRN_N:11:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRN_N:12:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRN_N:13:result_uint64x1 [] = { 3333333333333333, } +VQRSHRN_N:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRN_N:16:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRN_N:17:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:18:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:19:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:20:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:21:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:22:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:23:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:24:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:25:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:26:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:27:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHRN_N (check saturation: shift by 3) cumulative saturation output: +VQRSHRN_N:28:vqrshrn_n_s16 Neon cumulative saturation 1 +VQRSHRN_N:29:vqrshrn_n_s32 Neon cumulative saturation 1 +VQRSHRN_N:30:vqrshrn_n_s64 Neon cumulative saturation 1 +VQRSHRN_N:31:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:32:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:33:vqrshrn_n_u64 Neon cumulative saturation 1 + +VQRSHRN_N (check saturation: shift by 3) output: +VQRSHRN_N:34:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHRN_N:35:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRSHRN_N:36:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRSHRN_N:37:result_int64x1 [] = { 3333333333333333, } +VQRSHRN_N:38:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRN_N:39:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRN_N:40:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRN_N:41:result_uint64x1 [] = { 3333333333333333, } +VQRSHRN_N:42:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:43:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRN_N:44:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRN_N:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:46:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:47:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:50:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:51:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:53:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:54:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:55:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHRN_N (check saturation: shift by max) cumulative saturation output: +VQRSHRN_N:56:vqrshrn_n_s16 Neon cumulative saturation 1 +VQRSHRN_N:57:vqrshrn_n_s32 Neon cumulative saturation 1 +VQRSHRN_N:58:vqrshrn_n_s64 Neon cumulative saturation 1 +VQRSHRN_N:59:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:60:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:61:vqrshrn_n_u64 Neon cumulative saturation 1 + +VQRSHRN_N (check saturation: shift by max) output: +VQRSHRN_N:62:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHRN_N:63:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRSHRN_N:64:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRSHRN_N:65:result_int64x1 [] = { 3333333333333333, } +VQRSHRN_N:66:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRN_N:67:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRN_N:68:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRN_N:69:result_uint64x1 [] = { 3333333333333333, } +VQRSHRN_N:70:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:71:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRN_N:72:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRN_N:73:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:74:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:75:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:76:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:77:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:78:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:79:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:80:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:81:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:82:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:83:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VSET_LANE/VSET_LANEQ output: +VSET_LANE/VSET_LANEQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, 11, } +VSET_LANE/VSET_LANEQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, 22, } +VSET_LANE/VSET_LANEQ:2:result_int32x2 [] = { fffffff0, 33, } +VSET_LANE/VSET_LANEQ:3:result_int64x1 [] = { 44, } +VSET_LANE/VSET_LANEQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, 55, f7, } +VSET_LANE/VSET_LANEQ:5:result_uint16x4 [] = { fff0, fff1, 66, fff3, } +VSET_LANE/VSET_LANEQ:6:result_uint32x2 [] = { fffffff0, 77, } +VSET_LANE/VSET_LANEQ:7:result_uint64x1 [] = { 88, } +VSET_LANE/VSET_LANEQ:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, 55, f7, } +VSET_LANE/VSET_LANEQ:9:result_poly16x4 [] = { fff0, fff1, 66, fff3, } +VSET_LANE/VSET_LANEQ:10:result_float32x2 [] = { c1800000, 4204cccd, } +VSET_LANE/VSET_LANEQ:11:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffff99, } +VSET_LANE/VSET_LANEQ:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, aa, fffffff6, fffffff7, } +VSET_LANE/VSET_LANEQ:13:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, bb, } +VSET_LANE/VSET_LANEQ:14:result_int64x2 [] = { fffffffffffffff0, cc, } +VSET_LANE/VSET_LANEQ:15:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, dd, ff, } +VSET_LANE/VSET_LANEQ:16:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, ee, fff7, } +VSET_LANE/VSET_LANEQ:17:result_uint32x4 [] = { fffffff0, fffffff1, ff, fffffff3, } +VSET_LANE/VSET_LANEQ:18:result_uint64x2 [] = { fffffffffffffff0, 11, } +VSET_LANE/VSET_LANEQ:19:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, dd, ff, } +VSET_LANE/VSET_LANEQ:20:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, ee, fff7, } +VSET_LANE/VSET_LANEQ:21:result_float32x4 [] = { c1800000, c1700000, c1600000, 41333333, } + +VGET_LANE/VGETQ_LANE output: +vget_lane_s8: fffffff7 +vget_lane_s16: fffffff3 +vget_lane_s32: fffffff1 +vget_lane_s64: fffffffffffffff0 +vget_lane_u8: f6 +vget_lane_u16: fff2 +vget_lane_u32: fffffff1 +vget_lane_u64: fffffffffffffff0 +vget_lane_p8: f6 +vget_lane_p16: fff2 +vget_lane_f32: c1700000 +vgetq_lane_s8: ffffffff +vgetq_lane_s16: fffffff5 +vgetq_lane_s32: fffffff3 +vgetq_lane_s64: fffffffffffffff1 +vgetq_lane_u8: fe +vgetq_lane_u16: fff6 +vgetq_lane_u32: fffffff2 +vgetq_lane_u64: fffffffffffffff1 +vgetq_lane_p8: fe +vgetq_lane_p16: fff6 +vgetq_lane_f32: c1500000 + + +VQSUB/VQSUBQ cumulative saturation output: +VQSUB/VQSUBQ:0:vqsub_s8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:1:vqsub_s16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:2:vqsub_s32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:3:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:4:vqsub_u8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:5:vqsub_u16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:6:vqsub_u32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:7:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:8:vqsubq_s8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:9:vqsubq_s16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:10:vqsubq_s32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:11:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:12:vqsubq_u8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:13:vqsubq_u16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:14:vqsubq_u32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:15:vqsubq_u64 Neon cumulative saturation 0 + +VQSUB/VQSUBQ output: +VQSUB/VQSUBQ:16:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, } +VQSUB/VQSUBQ:17:result_int16x4 [] = { ffffffce, ffffffcf, ffffffd0, ffffffd1, } +VQSUB/VQSUBQ:18:result_int32x2 [] = { ffffffbd, ffffffbe, } +VQSUB/VQSUBQ:19:result_int64x1 [] = { ffffffffffffffac, } +VQSUB/VQSUBQ:20:result_uint8x8 [] = { 9b, 9c, 9d, 9e, 9f, a0, a1, a2, } +VQSUB/VQSUBQ:21:result_uint16x4 [] = { ff8a, ff8b, ff8c, ff8d, } +VQSUB/VQSUBQ:22:result_uint32x2 [] = { ffffff79, ffffff7a, } +VQSUB/VQSUBQ:23:result_uint64x1 [] = { ffffffffffffff68, } +VQSUB/VQSUBQ:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSUB/VQSUBQ:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSUB/VQSUBQ:26:result_float32x2 [] = { 33333333, 33333333, } +VQSUB/VQSUBQ:27:result_int8x16 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, ffffffeb, ffffffec, ffffffed, ffffffee, } +VQSUB/VQSUBQ:28:result_int16x8 [] = { ffffffce, ffffffcf, ffffffd0, ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, } +VQSUB/VQSUBQ:29:result_int32x4 [] = { ffffffbd, ffffffbe, ffffffbf, ffffffc0, } +VQSUB/VQSUBQ:30:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad, } +VQSUB/VQSUBQ:31:result_uint8x16 [] = { 9b, 9c, 9d, 9e, 9f, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, } +VQSUB/VQSUBQ:32:result_uint16x8 [] = { ff8a, ff8b, ff8c, ff8d, ff8e, ff8f, ff90, ff91, } +VQSUB/VQSUBQ:33:result_uint32x4 [] = { ffffff79, ffffff7a, ffffff7b, ffffff7c, } +VQSUB/VQSUBQ:34:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69, } +VQSUB/VQSUBQ:35:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSUB/VQSUBQ:36:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSUB/VQSUBQ:37:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:38:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:39:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:40:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:41:vqsubq_u64 Neon cumulative saturation 0 + +64 bits saturation: +VQSUB/VQSUBQ:42:result_int64x1 [] = { fffffffffffffff0, } +VQSUB/VQSUBQ:43:result_uint64x1 [] = { fffffffffffffff0, } +VQSUB/VQSUBQ:44:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQSUB/VQSUBQ:45:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } + +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:46:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:47:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:48:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:49:vqsubq_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:50:result_int64x1 [] = { ffffffffffffffac, } +VQSUB/VQSUBQ:51:result_uint64x1 [] = { ffffffffffffff68, } +VQSUB/VQSUBQ:52:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad, } +VQSUB/VQSUBQ:53:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69, } + +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:54:vqsub_s64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:55:vqsub_u64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:56:vqsubq_s64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:57:vqsubq_u64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:58:result_int64x1 [] = { 8000000000000000, } +VQSUB/VQSUBQ:59:result_uint64x1 [] = { 0, } +VQSUB/VQSUBQ:60:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSUB/VQSUBQ:61:result_uint64x2 [] = { 0, 0, } + +less than 64 bits saturation: +VQSUB/VQSUBQ:62:vqsub_s8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:63:vqsub_s16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:64:vqsub_s32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:65:vqsubq_s8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:66:vqsubq_s16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:67:vqsubq_s32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:68:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSUB/VQSUBQ:69:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQSUB/VQSUBQ:70:result_int32x2 [] = { 80000000, 80000000, } +VQSUB/VQSUBQ:71:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSUB/VQSUBQ:72:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQSUB/VQSUBQ:73:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VQSUB/VQSUBQ less than 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:74:vqsub_u8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:75:vqsub_u16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:76:vqsub_u32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:77:vqsubq_u8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:78:vqsubq_u16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:79:vqsubq_u32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:80:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSUB/VQSUBQ:81:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSUB/VQSUBQ:82:result_uint32x2 [] = { 0, 0, } +VQSUB/VQSUBQ:83:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSUB/VQSUBQ:84:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSUB/VQSUBQ:85:result_uint32x4 [] = { 0, 0, 0, 0, } + +VQDMULH cumulative saturation output: +VQDMULH:0:vqdmulh_s16 Neon cumulative saturation 0 +VQDMULH:1:vqdmulh_s32 Neon cumulative saturation 0 +VQDMULH:2:vqdmulhq_s16 Neon cumulative saturation 0 +VQDMULH:3:vqdmulhq_s32 Neon cumulative saturation 0 + +VQDMULH output: +VQDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:5:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH:6:result_int32x2 [] = { ffffffff, ffffffff, } +VQDMULH:7:result_int64x1 [] = { 3333333333333333, } +VQDMULH:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:10:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH:11:result_uint64x1 [] = { 3333333333333333, } +VQDMULH:12:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:13:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:14:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:16:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH:17:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:25:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMULH cumulative saturation output: +VQDMULH:26:vqdmulh_s16 Neon cumulative saturation 1 +VQDMULH:27:vqdmulh_s32 Neon cumulative saturation 1 +VQDMULH:28:vqdmulhq_s16 Neon cumulative saturation 1 +VQDMULH:29:vqdmulhq_s32 Neon cumulative saturation 1 + +VQDMULH output: +VQDMULH:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQDMULH:32:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQDMULH:33:result_int64x1 [] = { 3333333333333333, } +VQDMULH:34:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:35:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:36:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH:37:result_uint64x1 [] = { 3333333333333333, } +VQDMULH:38:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:39:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:40:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH:41:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:42:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQDMULH:43:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULH:44:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:45:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:46:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:47:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH:48:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:49:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:50:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:51:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMULH_LANE cumulative saturation output: +VQDMULH_LANE:0:vqdmulh_lane_s16 Neon cumulative saturation 0 +VQDMULH_LANE:1:vqdmulh_lane_s32 Neon cumulative saturation 0 +VQDMULH_LANE:2:vqdmulhq_lane_s16 Neon cumulative saturation 0 +VQDMULH_LANE:3:vqdmulhq_lane_s32 Neon cumulative saturation 0 + +VQDMULH_LANE output: +VQDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:5:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH_LANE:6:result_int32x2 [] = { ffffffff, ffffffff, } +VQDMULH_LANE:7:result_int64x1 [] = { 3333333333333333, } +VQDMULH_LANE:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:10:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:11:result_uint64x1 [] = { 3333333333333333, } +VQDMULH_LANE:12:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:13:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:14:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:16:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH_LANE:17:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH_LANE:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_LANE:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:25:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMULH_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMULH_LANE:26:vqdmulh_lane_s16 Neon cumulative saturation 1 +VQDMULH_LANE:27:vqdmulh_lane_s32 Neon cumulative saturation 1 +VQDMULH_LANE:28:vqdmulhq_lane_s16 Neon cumulative saturation 1 +VQDMULH_LANE:29:vqdmulhq_lane_s32 Neon cumulative saturation 1 + +VQDMULH_LANE (check mul cumulative saturation) output: +VQDMULH_LANE:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQDMULH_LANE:32:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQDMULH_LANE:33:result_int64x1 [] = { 3333333333333333, } +VQDMULH_LANE:34:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:35:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:36:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:37:result_uint64x1 [] = { 3333333333333333, } +VQDMULH_LANE:38:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:39:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:40:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:41:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:42:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQDMULH_LANE:43:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULH_LANE:44:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:45:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:46:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:47:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_LANE:48:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:49:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:50:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:51:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMULH_N cumulative saturation output: +VQDMULH_N:0:vqdmulh_n_s16 Neon cumulative saturation 0 +VQDMULH_N:1:vqdmulh_n_s32 Neon cumulative saturation 0 +VQDMULH_N:2:vqdmulhq_n_s16 Neon cumulative saturation 0 +VQDMULH_N:3:vqdmulhq_n_s32 Neon cumulative saturation 0 + +VQDMULH_N output: +VQDMULH_N:4:result_int16x4 [] = { 19, 19, 19, 19, } +VQDMULH_N:5:result_int32x2 [] = { 4, 4, } +VQDMULH_N:6:result_int16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VQDMULH_N:7:result_int32x4 [] = { a, a, a, a, } + +VQDMULH_N (check mul cumulative saturation) cumulative saturation output: +VQDMULH_N:8:vqdmulh_n_s16 Neon cumulative saturation 1 +VQDMULH_N:9:vqdmulh_n_s32 Neon cumulative saturation 1 +VQDMULH_N:10:vqdmulhq_n_s16 Neon cumulative saturation 1 +VQDMULH_N:11:vqdmulhq_n_s32 Neon cumulative saturation 1 + +VQDMULH_N (check mul cumulative saturation) output: +VQDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQDMULH_N:15:result_int64x1 [] = { 3333333333333333, } +VQDMULH_N:16:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:17:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_N:18:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH_N:19:result_uint64x1 [] = { 3333333333333333, } +VQDMULH_N:20:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:21:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_N:22:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH_N:23:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:24:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQDMULH_N:25:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULH_N:26:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_N:27:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:28:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_N:29:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_N:30:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_N:31:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:32:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_N:33:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMULL cumulative saturation output: +VQDMULL:0:vqdmull_s16 Neon cumulative saturation 0 +VQDMULL:1:vqdmull_s32 Neon cumulative saturation 0 + +VQDMULL output: +VQDMULL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMULL:5:result_int64x1 [] = { 3333333333333333, } +VQDMULL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULL:9:result_uint64x1 [] = { 3333333333333333, } +VQDMULL:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMULL:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:14:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:15:result_int32x4 [] = { 200, 1c2, 188, 152, } +VQDMULL:16:result_int64x2 [] = { 200, 1c2, } +VQDMULL:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULL:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULL:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:23:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMULL (check mul cumulative saturation) cumulative saturation output: +VQDMULL:24:vqdmull_s16 Neon cumulative saturation 1 +VQDMULL:25:vqdmull_s32 Neon cumulative saturation 1 + +VQDMULL (check mul cumulative saturation) output: +VQDMULL:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:27:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:28:result_int32x2 [] = { 33333333, 33333333, } +VQDMULL:29:result_int64x1 [] = { 3333333333333333, } +VQDMULL:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:32:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULL:33:result_uint64x1 [] = { 3333333333333333, } +VQDMULL:34:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:35:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:36:result_float32x2 [] = { 33333333, 33333333, } +VQDMULL:37:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:38:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:39:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULL:40:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQDMULL:41:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:42:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:43:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULL:44:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULL:45:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:46:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:47:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLAL cumulative saturation output: +VQDMLAL:0:vqdmlal_s16 Neon cumulative saturation 0 +VQDMLAL:1:vqdmlal_s32 Neon cumulative saturation 0 + +VQDMLAL output: +VQDMLAL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL:5:result_int64x1 [] = { 3333333333333333, } +VQDMLAL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:14:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:15:result_int32x4 [] = { 7c1e, 7c1f, 7c20, 7c21, } +VQDMLAL:16:result_int64x2 [] = { 7c1e, 7c1f, } +VQDMLAL:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:23:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLAL (check mul cumulative saturation) cumulative saturation output: +VQDMLAL:24:vqdmlal_s16 Neon cumulative saturation 1 +VQDMLAL:25:vqdmlal_s32 Neon cumulative saturation 1 + +VQDMLAL (check mul cumulative saturation) output: +VQDMLAL:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:27:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:28:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL:29:result_int64x1 [] = { 3333333333333333, } +VQDMLAL:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:32:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL:33:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL:34:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:35:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:36:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL:37:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:38:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:39:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2, } +VQDMLAL:40:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0, } +VQDMLAL:41:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:42:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:43:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL:44:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL:45:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:46:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:47:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLSL cumulative saturation output: +VQDMLSL:0:vqdmlsl_s16 Neon cumulative saturation 0 +VQDMLSL:1:vqdmlsl_s32 Neon cumulative saturation 0 + +VQDMLSL output: +VQDMLSL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL:5:result_int64x1 [] = { 3333333333333333, } +VQDMLSL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:14:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:15:result_int32x4 [] = { ffff83c2, ffff83c3, ffff83c4, ffff83c5, } +VQDMLSL:16:result_int64x2 [] = { ffffffffffff83c2, ffffffffffff83c3, } +VQDMLSL:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL:21:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:22:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:23:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQDMLSL (check mul cumulative saturation) cumulative saturation output: +VQDMLSL:24:vqdmlsl_s16 Neon cumulative saturation 1 +VQDMLSL:25:vqdmlsl_s32 Neon cumulative saturation 1 + +VQDMLSL (check mul cumulative saturation) output: +VQDMLSL:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:27:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:28:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL:29:result_int64x1 [] = { 3333333333333333, } +VQDMLSL:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:32:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL:33:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL:34:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:35:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:36:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL:37:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:38:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:39:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQDMLSL:40:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQDMLSL:41:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:42:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:43:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL:44:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL:45:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:46:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:47:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VCEQ/VCEQQ output: +VCEQ/VCEQQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, ff, 0, } +VCEQ/VCEQQ:1:result_uint16x4 [] = { 0, 0, ffff, 0, } +VCEQ/VCEQQ:2:result_uint32x2 [] = { ffffffff, 0, } +VCEQ/VCEQQ:3:result_uint8x8 [] = { 0, 0, 0, ff, 0, 0, 0, 0, } +VCEQ/VCEQQ:4:result_uint16x4 [] = { 0, 0, ffff, 0, } +VCEQ/VCEQQ:5:result_uint32x2 [] = { 0, ffffffff, } +VCEQ/VCEQQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, 0, 0, 0, } +VCEQ/VCEQQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, 0, } +VCEQ/VCEQQ:8:result_uint32x4 [] = { 0, 0, ffffffff, 0, } +VCEQ/VCEQQ:9:result_uint8x16 [] = { 0, 0, 0, 0, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCEQ/VCEQQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, 0, } +VCEQ/VCEQQ:11:result_uint32x4 [] = { 0, 0, ffffffff, 0, } +VCEQ/VCEQQ:12:result_uint32x2 [] = { 0, ffffffff, } +VCEQ/VCEQQ:13:result_uint32x4 [] = { 0, 0, ffffffff, 0, } +VCEQ/VCEQQ:14:result_uint32x2 [] = { ffffffff, 0, } +VCEQ/VCEQQ:15:result_uint32x2 [] = { 0, ffffffff, } +VCEQ/VCEQQ:16:result_uint32x2 [] = { ffffffff, 0, } +VCEQ/VCEQQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (inf):20:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (-inf):21:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (inf):22:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (-0.0):23:result_uint32x2 [] = { ffffffff, ffffffff, } + +VCEQ/VCEQQ p8 output: +VCEQ/VCEQQ:0:result_uint8x8 [] = { 0, 0, 0, ff, 0, 0, 0, 0, } +VCEQ/VCEQQ:1:result_uint8x16 [] = { 0, 0, 0, 0, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } + +VCGE/VCGEQ output: +VCGE/VCGEQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, ff, ff, } +VCGE/VCGEQ:1:result_uint16x4 [] = { 0, 0, ffff, ffff, } +VCGE/VCGEQ:2:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ:3:result_uint8x8 [] = { 0, 0, 0, ff, ff, ff, ff, ff, } +VCGE/VCGEQ:4:result_uint16x4 [] = { 0, 0, ffff, ffff, } +VCGE/VCGEQ:5:result_uint32x2 [] = { 0, ffffffff, } +VCGE/VCGEQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, ff, ff, ff, } +VCGE/VCGEQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, ffff, } +VCGE/VCGEQ:8:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } +VCGE/VCGEQ:9:result_uint8x16 [] = { 0, 0, 0, 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VCGE/VCGEQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, ffff, } +VCGE/VCGEQ:11:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } +VCGE/VCGEQ:12:result_uint32x2 [] = { 0, ffffffff, } +VCGE/VCGEQ:13:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } +VCGE/VCGEQ:14:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ:15:result_uint32x2 [] = { 0, ffffffff, } +VCGE/VCGEQ:16:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCGE/VCGEQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCGE/VCGEQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCGE/VCGEQ FP special (inf):20:result_uint32x2 [] = { 0, 0, } +VCGE/VCGEQ FP special (-inf):21:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ FP special (inf):22:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ FP special (-0.0):23:result_uint32x2 [] = { ffffffff, ffffffff, } + +VCLE/VCLEQ output: +VCLE/VCLEQ:0:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, 0, } +VCLE/VCLEQ:1:result_uint16x4 [] = { ffff, ffff, ffff, 0, } +VCLE/VCLEQ:2:result_uint32x2 [] = { ffffffff, 0, } +VCLE/VCLEQ:3:result_uint8x8 [] = { ff, ff, ff, ff, 0, 0, 0, 0, } +VCLE/VCLEQ:4:result_uint16x4 [] = { ffff, ffff, ffff, 0, } +VCLE/VCLEQ:5:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ:6:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, 0, 0, 0, } +VCLE/VCLEQ:7:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, 0, } +VCLE/VCLEQ:8:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } +VCLE/VCLEQ:9:result_uint8x16 [] = { ff, ff, ff, ff, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCLE/VCLEQ:10:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, 0, } +VCLE/VCLEQ:11:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } +VCLE/VCLEQ:12:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ:13:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } +VCLE/VCLEQ:14:result_uint32x2 [] = { ffffffff, 0, } +VCLE/VCLEQ:15:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ:16:result_uint32x2 [] = { ffffffff, 0, } +VCLE/VCLEQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (inf):20:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ FP special (-inf):21:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (inf):22:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (-0.0):23:result_uint32x2 [] = { ffffffff, ffffffff, } + +VCGT/VCGTQ output: +VCGT/VCGTQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, ff, } +VCGT/VCGTQ:1:result_uint16x4 [] = { 0, 0, 0, ffff, } +VCGT/VCGTQ:2:result_uint32x2 [] = { 0, ffffffff, } +VCGT/VCGTQ:3:result_uint8x8 [] = { 0, 0, 0, 0, ff, ff, ff, ff, } +VCGT/VCGTQ:4:result_uint16x4 [] = { 0, 0, 0, ffff, } +VCGT/VCGTQ:5:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, ff, ff, } +VCGT/VCGTQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, ffff, } +VCGT/VCGTQ:8:result_uint32x4 [] = { 0, 0, 0, ffffffff, } +VCGT/VCGTQ:9:result_uint8x16 [] = { 0, 0, 0, 0, 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VCGT/VCGTQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, ffff, } +VCGT/VCGTQ:11:result_uint32x4 [] = { 0, 0, 0, ffffffff, } +VCGT/VCGTQ:12:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ:13:result_uint32x4 [] = { 0, 0, 0, ffffffff, } +VCGT/VCGTQ:14:result_uint32x2 [] = { 0, ffffffff, } +VCGT/VCGTQ:15:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ:16:result_uint32x2 [] = { 0, ffffffff, } +VCGT/VCGTQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ FP special (inf):20:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ FP special (-inf):21:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGT/VCGTQ FP special (inf):22:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGT/VCGTQ FP special (-0.0):23:result_uint32x2 [] = { 0, 0, } + +VCLT/VCLTQ output: +VCLT/VCLTQ:0:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, 0, 0, } +VCLT/VCLTQ:1:result_uint16x4 [] = { ffff, ffff, 0, 0, } +VCLT/VCLTQ:2:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ:3:result_uint8x8 [] = { ff, ff, ff, 0, 0, 0, 0, 0, } +VCLT/VCLTQ:4:result_uint16x4 [] = { ffff, ffff, 0, 0, } +VCLT/VCLTQ:5:result_uint32x2 [] = { ffffffff, 0, } +VCLT/VCLTQ:6:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, 0, 0, 0, 0, } +VCLT/VCLTQ:7:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, 0, 0, } +VCLT/VCLTQ:8:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } +VCLT/VCLTQ:9:result_uint8x16 [] = { ff, ff, ff, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCLT/VCLTQ:10:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, 0, 0, } +VCLT/VCLTQ:11:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } +VCLT/VCLTQ:12:result_uint32x2 [] = { ffffffff, 0, } +VCLT/VCLTQ:13:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } +VCLT/VCLTQ:14:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ:15:result_uint32x2 [] = { ffffffff, 0, } +VCLT/VCLTQ:16:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (inf):20:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLT/VCLTQ FP special (-inf):21:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (inf):22:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (-0.0):23:result_uint32x2 [] = { 0, 0, } + +VBSL/VBSLQ output: +VBSL/VBSLQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, } +VBSL/VBSLQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2, } +VBSL/VBSLQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VBSL/VBSLQ:3:result_int64x1 [] = { fffffffffffffffd, } +VBSL/VBSLQ:4:result_uint8x8 [] = { f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:5:result_uint16x4 [] = { fff0, fff0, fff2, fff2, } +VBSL/VBSLQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VBSL/VBSLQ:7:result_uint64x1 [] = { fffffff1, } +VBSL/VBSLQ:8:result_poly8x8 [] = { f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:9:result_poly16x4 [] = { fff0, fff0, fff2, fff2, } +VBSL/VBSLQ:10:result_float32x2 [] = { c1800004, c1700004, } +VBSL/VBSLQ:11:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, } +VBSL/VBSLQ:12:result_int16x8 [] = { fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, } +VBSL/VBSLQ:13:result_int32x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2, } +VBSL/VBSLQ:14:result_int64x2 [] = { fffffffffffffffd, fffffffffffffffd, } +VBSL/VBSLQ:15:result_uint8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:16:result_uint16x8 [] = { fff0, fff0, fff2, fff2, fff4, fff4, fff6, fff6, } +VBSL/VBSLQ:17:result_uint32x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2, } +VBSL/VBSLQ:18:result_uint64x2 [] = { fffffff1, fffffff1, } +VBSL/VBSLQ:19:result_poly8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:20:result_poly16x8 [] = { fff0, fff0, fff2, fff2, fff4, fff4, fff6, fff6, } +VBSL/VBSLQ:21:result_float32x4 [] = { c1800001, c1700001, c1600001, c1500001, } + +VSHL/VSHLQ output: +VSHL/VSHLQ:0:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHL/VSHLQ:1:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VSHL/VSHLQ:2:result_int32x2 [] = { fffff000, fffff100, } +VSHL/VSHLQ:3:result_int64x1 [] = { ffffffffffffff80, } +VSHL/VSHLQ:4:result_uint8x8 [] = { e0, e2, e4, e6, e8, ea, ec, ee, } +VSHL/VSHLQ:5:result_uint16x4 [] = { ff80, ff88, ff90, ff98, } +VSHL/VSHLQ:6:result_uint32x2 [] = { fffff000, fffff100, } +VSHL/VSHLQ:7:result_uint64x1 [] = { ffffffffffffff80, } +VSHL/VSHLQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:10:result_float32x2 [] = { 33333333, 33333333, } +VSHL/VSHLQ:11:result_int8x16 [] = { 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, } +VSHL/VSHLQ:12:result_int16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VSHL/VSHLQ:13:result_int32x4 [] = { 0, 40000000, 80000000, c0000000, } +VSHL/VSHLQ:14:result_int64x2 [] = { 0, 8000000000000000, } +VSHL/VSHLQ:15:result_uint8x16 [] = { 0, 20, 40, 60, 80, a0, c0, e0, 0, 20, 40, 60, 80, a0, c0, e0, } +VSHL/VSHLQ:16:result_uint16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VSHL/VSHLQ:17:result_uint32x4 [] = { 0, 40000000, 80000000, c0000000, } +VSHL/VSHLQ:18:result_uint64x2 [] = { 0, 8000000000000000, } +VSHL/VSHLQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VSHL/VSHLQ (large shift amount) output: +VSHL/VSHLQ:22:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:23:result_int16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:24:result_int32x2 [] = { 0, 0, } +VSHL/VSHLQ:25:result_int64x1 [] = { 0, } +VSHL/VSHLQ:26:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:27:result_uint16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:28:result_uint32x2 [] = { 0, 0, } +VSHL/VSHLQ:29:result_uint64x1 [] = { 0, } +VSHL/VSHLQ:30:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:31:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:32:result_float32x2 [] = { 33333333, 33333333, } +VSHL/VSHLQ:33:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:34:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:35:result_int32x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:36:result_int64x2 [] = { 0, 0, } +VSHL/VSHLQ:37:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:38:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:39:result_uint32x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:40:result_uint64x2 [] = { 0, 0, } +VSHL/VSHLQ:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:42:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VSHL/VSHLQ (negative shift amount) output: +VSHL/VSHLQ:44:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VSHL/VSHLQ:45:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9, } +VSHL/VSHLQ:46:result_int32x2 [] = { fffffffc, fffffffc, } +VSHL/VSHLQ:47:result_int64x1 [] = { ffffffffffffffff, } +VSHL/VSHLQ:48:result_uint8x8 [] = { 78, 78, 79, 79, 7a, 7a, 7b, 7b, } +VSHL/VSHLQ:49:result_uint16x4 [] = { 7ff8, 7ff8, 7ff9, 7ff9, } +VSHL/VSHLQ:50:result_uint32x2 [] = { 3ffffffc, 3ffffffc, } +VSHL/VSHLQ:51:result_uint64x1 [] = { fffffffffffffff, } +VSHL/VSHLQ:52:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:53:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:54:result_float32x2 [] = { 33333333, 33333333, } +VSHL/VSHLQ:55:result_int8x16 [] = { fffffffc, fffffffc, fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe, fffffffe, fffffffe, ffffffff, ffffffff, ffffffff, ffffffff, } +VSHL/VSHLQ:56:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VSHL/VSHLQ:57:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } +VSHL/VSHLQ:58:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VSHL/VSHLQ:59:result_uint8x16 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, } +VSHL/VSHLQ:60:result_uint16x8 [] = { 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, } +VSHL/VSHLQ:61:result_uint32x4 [] = { 1ffffffe, 1ffffffe, 1ffffffe, 1ffffffe, } +VSHL/VSHLQ:62:result_uint64x2 [] = { 7ffffffffffffff, 7ffffffffffffff, } +VSHL/VSHLQ:63:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:64:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:65:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VSHL_N output: +VSHL_N:0:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHL_N:1:result_int16x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VSHL_N:2:result_int32x2 [] = { ffffff80, ffffff88, } +VSHL_N:3:result_int64x1 [] = { ffffffffffffffc0, } +VSHL_N:4:result_uint8x8 [] = { c0, c4, c8, cc, d0, d4, d8, dc, } +VSHL_N:5:result_uint16x4 [] = { ff00, ff10, ff20, ff30, } +VSHL_N:6:result_uint32x2 [] = { ffffff80, ffffff88, } +VSHL_N:7:result_uint64x1 [] = { ffffffffffffffe0, } +VSHL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHL_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSHL_N:11:result_int8x16 [] = { 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, } +VSHL_N:12:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHL_N:13:result_int32x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, } +VSHL_N:14:result_int64x2 [] = { ffffffffffffffc0, ffffffffffffffc4, } +VSHL_N:15:result_uint8x16 [] = { c0, c4, c8, cc, d0, d4, d8, dc, e0, e4, e8, ec, f0, f4, f8, fc, } +VSHL_N:16:result_uint16x8 [] = { ff80, ff88, ff90, ff98, ffa0, ffa8, ffb0, ffb8, } +VSHL_N:17:result_uint32x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, } +VSHL_N:18:result_uint64x2 [] = { ffffffffffffffe0, ffffffffffffffe2, } +VSHL_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHL_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHL/VQSHLQ (with input = 0) cumulative saturation output: +VQSHL/VQSHLQ:0:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:1:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:2:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:3:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:4:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:5:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:6:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:7:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:8:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:9:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:10:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:11:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:12:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:13:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:14:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:15:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (with input = 0) output: +VQSHL/VQSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:17:result_int16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:18:result_int32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:19:result_int64x1 [] = { 0, } +VQSHL/VQSHLQ:20:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:21:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:22:result_uint32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:23:result_uint64x1 [] = { 0, } +VQSHL/VQSHLQ:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:26:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:27:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:29:result_int32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:30:result_int64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:31:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:32:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:33:result_uint32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:34:result_uint64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:35:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:36:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:37:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHL/VQSHLQ (input 0 and negative shift amount) cumulative saturation output: +VQSHL/VQSHLQ:38:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:39:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:40:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:41:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:42:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:43:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:44:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:45:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:46:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:47:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:48:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:49:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:50:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:51:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:52:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:53:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (input 0 and negative shift amount) output: +VQSHL/VQSHLQ:54:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:55:result_int16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:56:result_int32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:57:result_int64x1 [] = { 0, } +VQSHL/VQSHLQ:58:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:59:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:60:result_uint32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:61:result_uint64x1 [] = { 0, } +VQSHL/VQSHLQ:62:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:63:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:64:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:65:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:66:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:67:result_int32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:68:result_int64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:69:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:70:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:71:result_uint32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:72:result_uint64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:73:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:74:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:75:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHL/VQSHLQ cumulative saturation output: +VQSHL/VQSHLQ:76:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:77:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:78:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:79:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:80:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:81:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:82:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:83:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:84:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:85:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:86:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:87:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:88:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:89:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:90:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:91:vqshlq_u64 Neon cumulative saturation 1 + +VQSHL/VQSHLQ output: +VQSHL/VQSHLQ:92:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VQSHL/VQSHLQ:93:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VQSHL/VQSHLQ:94:result_int32x2 [] = { fffff000, fffff100, } +VQSHL/VQSHLQ:95:result_int64x1 [] = { fffffffffffffffe, } +VQSHL/VQSHLQ:96:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:97:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:98:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:99:result_uint64x1 [] = { 1ffffffffffffffe, } +VQSHL/VQSHLQ:100:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:101:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:102:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:103:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSHL/VQSHLQ:104:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQSHL/VQSHLQ:105:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQSHL/VQSHLQ:106:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQSHL/VQSHLQ:107:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:108:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:109:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:110:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:111:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:112:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:113:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHL/VQSHLQ (negative shift amount) cumulative saturation output: +VQSHL/VQSHLQ:114:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:115:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:116:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:117:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:118:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:119:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:120:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:121:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:122:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:123:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:124:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:125:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:126:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:127:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:128:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:129:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (negative shift amount) output: +VQSHL/VQSHLQ:130:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VQSHL/VQSHLQ:131:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffc, } +VQSHL/VQSHLQ:132:result_int32x2 [] = { fffffffe, fffffffe, } +VQSHL/VQSHLQ:133:result_int64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:134:result_uint8x8 [] = { 78, 78, 79, 79, 7a, 7a, 7b, 7b, } +VQSHL/VQSHLQ:135:result_uint16x4 [] = { 3ffc, 3ffc, 3ffc, 3ffc, } +VQSHL/VQSHLQ:136:result_uint32x2 [] = { 1ffffffe, 1ffffffe, } +VQSHL/VQSHLQ:137:result_uint64x1 [] = { fffffffffffffff, } +VQSHL/VQSHLQ:138:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:139:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:140:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:141:result_int8x16 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:142:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:143:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:144:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:145:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VQSHL/VQSHLQ:146:result_uint16x8 [] = { 1f, 1f, 1f, 1f, 1f, 1f, 1f, 1f, } +VQSHL/VQSHLQ:147:result_uint32x4 [] = { 7ffff, 7ffff, 7ffff, 7ffff, } +VQSHL/VQSHLQ:148:result_uint64x2 [] = { fffffffffff, fffffffffff, } +VQSHL/VQSHLQ:149:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:150:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:151:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHL/VQSHLQ (large shift amount, negative input) cumulative saturation output: +VQSHL/VQSHLQ:152:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:153:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:154:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:155:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:156:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:157:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:158:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:159:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:160:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:161:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:162:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:163:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:164:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:165:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:166:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:167:vqshlq_u64 Neon cumulative saturation 1 + +VQSHL/VQSHLQ (large shift amount, negative input) output: +VQSHL/VQSHLQ:168:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSHL/VQSHLQ:169:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQSHL/VQSHLQ:170:result_int32x2 [] = { 80000000, 80000000, } +VQSHL/VQSHLQ:171:result_int64x1 [] = { 8000000000000000, } +VQSHL/VQSHLQ:172:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:173:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:174:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:175:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:176:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:177:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:178:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:179:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSHL/VQSHLQ:180:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQSHL/VQSHLQ:181:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQSHL/VQSHLQ:182:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQSHL/VQSHLQ:183:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:184:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:185:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:186:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:187:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:188:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:189:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHL/VQSHLQ (check cumulative saturation) cumulative saturation output: +VQSHL/VQSHLQ:190:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:191:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:192:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:193:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:194:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:195:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:196:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:197:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:198:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:199:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:200:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:201:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:202:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:203:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:204:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:205:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (check cumulative saturation) output: +VQSHL/VQSHLQ:206:result_int8x8 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, } +VQSHL/VQSHLQ:207:result_int16x4 [] = { 3fff, 3fff, 3fff, 3fff, } +VQSHL/VQSHLQ:208:result_int32x2 [] = { 3fffffff, 3fffffff, } +VQSHL/VQSHLQ:209:result_int64x1 [] = { 3fffffffffffffff, } +VQSHL/VQSHLQ:210:result_uint8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:211:result_uint16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:212:result_uint32x2 [] = { 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:213:result_uint64x1 [] = { 7fffffffffffffff, } +VQSHL/VQSHLQ:214:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:215:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:216:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:217:result_int8x16 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, } +VQSHL/VQSHLQ:218:result_int16x8 [] = { 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, } +VQSHL/VQSHLQ:219:result_int32x4 [] = { 3fffffff, 3fffffff, 3fffffff, 3fffffff, } +VQSHL/VQSHLQ:220:result_int64x2 [] = { 3fffffffffffffff, 3fffffffffffffff, } +VQSHL/VQSHLQ:221:result_uint8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:222:result_uint16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:223:result_uint32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:224:result_uint64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL/VQSHLQ:225:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:226:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:227:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHL/VQSHLQ (large shift amount, positive input) cumulative saturation output: +VQSHL/VQSHLQ:228:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:229:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:230:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:231:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:232:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:233:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:234:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:235:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:236:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:237:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:238:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:239:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:240:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:241:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:242:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:243:vqshlq_u64 Neon cumulative saturation 1 + +VQSHL/VQSHLQ (large shift amount, positive input) output: +VQSHL/VQSHLQ:244:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:245:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:246:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:247:result_int64x1 [] = { 7fffffffffffffff, } +VQSHL/VQSHLQ:248:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:249:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:250:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:251:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:252:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:253:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:254:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:255:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:256:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:257:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:258:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL/VQSHLQ:259:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:260:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:261:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:262:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:263:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:264:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:265:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHL/VQSHLQ (check saturation on 64 bits) cumulative saturation output: +VQSHL/VQSHLQ:266:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:267:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:268:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:269:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:270:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:271:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:272:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:273:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:274:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:275:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:276:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:277:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:278:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:279:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:280:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:281:vqshlq_u64 Neon cumulative saturation 1 + +VQSHL/VQSHLQ (check saturation on 64 bits) output: +VQSHL/VQSHLQ:282:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:283:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:284:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:285:result_int64x1 [] = { 8000000000000000, } +VQSHL/VQSHLQ:286:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:287:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:288:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:289:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:290:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:291:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:292:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:293:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:294:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:295:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:296:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL/VQSHLQ:297:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:298:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:299:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:300:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:301:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:302:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:303:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHL_N/VQSHLQ_N cumulative saturation output: +VQSHL_N/VQSHLQ_N:0:vqshl_n_s8 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:1:vqshl_n_s16 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:2:vqshl_n_s32 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:3:vqshl_n_s64 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:4:vqshl_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:5:vqshl_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:6:vqshl_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:7:vqshl_n_u64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:8:vqshlq_n_s8 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:9:vqshlq_n_s16 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:10:vqshlq_n_s32 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:11:vqshlq_n_s64 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:12:vqshlq_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:13:vqshlq_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:14:vqshlq_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:15:vqshlq_n_u64 Neon cumulative saturation 1 + +VQSHL_N/VQSHLQ_N output: +VQSHL_N/VQSHLQ_N:16:result_int8x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, } +VQSHL_N/VQSHLQ_N:17:result_int16x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VQSHL_N/VQSHLQ_N:18:result_int32x2 [] = { ffffffe0, ffffffe2, } +VQSHL_N/VQSHLQ_N:19:result_int64x1 [] = { ffffffffffffffc0, } +VQSHL_N/VQSHLQ_N:20:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:21:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:22:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:23:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL_N/VQSHLQ_N:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL_N/VQSHLQ_N:26:result_float32x2 [] = { 33333333, 33333333, } +VQSHL_N/VQSHLQ_N:27:result_int8x16 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, ffffffe0, ffffffe4, ffffffe8, ffffffec, fffffff0, fffffff4, fffffff8, fffffffc, } +VQSHL_N/VQSHLQ_N:28:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VQSHL_N/VQSHLQ_N:29:result_int32x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VQSHL_N/VQSHLQ_N:30:result_int64x2 [] = { ffffffffffffffc0, ffffffffffffffc4, } +VQSHL_N/VQSHLQ_N:31:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:32:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:33:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:34:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:35:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL_N/VQSHLQ_N:36:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL_N/VQSHLQ_N:37:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHL_N/VQSHLQ_N (check saturation with large positive input) cumulative saturation output: +VQSHL_N/VQSHLQ_N:38:vqshl_n_s8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:39:vqshl_n_s16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:40:vqshl_n_s32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:41:vqshl_n_s64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:42:vqshl_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:43:vqshl_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:44:vqshl_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:45:vqshl_n_u64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:46:vqshlq_n_s8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:47:vqshlq_n_s16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:48:vqshlq_n_s32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:49:vqshlq_n_s64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:50:vqshlq_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:51:vqshlq_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:52:vqshlq_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:53:vqshlq_n_u64 Neon cumulative saturation 1 + +VQSHL_N/VQSHLQ_N (check saturation with large positive input) output: +VQSHL_N/VQSHLQ_N:54:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL_N/VQSHLQ_N:55:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL_N/VQSHLQ_N:56:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHL_N/VQSHLQ_N:57:result_int64x1 [] = { 7fffffffffffffff, } +VQSHL_N/VQSHLQ_N:58:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:59:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:60:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:61:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:62:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL_N/VQSHLQ_N:63:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL_N/VQSHLQ_N:64:result_float32x2 [] = { 33333333, 33333333, } +VQSHL_N/VQSHLQ_N:65:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL_N/VQSHLQ_N:66:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL_N/VQSHLQ_N:67:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL_N/VQSHLQ_N:68:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL_N/VQSHLQ_N:69:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:70:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:71:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:72:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:73:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL_N/VQSHLQ_N:74:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL_N/VQSHLQ_N:75:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHL/VRSHLQ (with input = 0) output: +VRSHL/VRSHLQ:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:1:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:2:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:3:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:4:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:5:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:6:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:7:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:10:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:11:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:12:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:13:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:14:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:15:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:16:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:17:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:18:result_uint64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHL/VRSHLQ (input 0 and negative shift amount) output: +VRSHL/VRSHLQ:22:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:23:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:24:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:25:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:26:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:27:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:28:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:29:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:30:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:31:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:32:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:33:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:34:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:35:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:36:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:37:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:38:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:39:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:40:result_uint64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:42:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHL/VRSHLQ output: +VRSHL/VRSHLQ:44:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VRSHL/VRSHLQ:45:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VRSHL/VRSHLQ:46:result_int32x2 [] = { fffff000, fffff100, } +VRSHL/VRSHLQ:47:result_int64x1 [] = { fffffffffffffffe, } +VRSHL/VRSHLQ:48:result_uint8x8 [] = { e0, e2, e4, e6, e8, ea, ec, ee, } +VRSHL/VRSHLQ:49:result_uint16x4 [] = { ff80, ff88, ff90, ff98, } +VRSHL/VRSHLQ:50:result_uint32x2 [] = { fffff000, fffff100, } +VRSHL/VRSHLQ:51:result_uint64x1 [] = { 1ffffffffffffffe, } +VRSHL/VRSHLQ:52:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:53:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:54:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:55:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:56:result_int16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VRSHL/VRSHLQ:57:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:58:result_int64x2 [] = { 0, 8000000000000000, } +VRSHL/VRSHLQ:59:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:60:result_uint16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VRSHL/VRSHLQ:61:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:62:result_uint64x2 [] = { 0, 8000000000000000, } +VRSHL/VRSHLQ:63:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:64:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:65:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHL/VRSHLQ (negative shift amount) output: +VRSHL/VRSHLQ:66:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VRSHL/VRSHLQ:67:result_int16x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd, } +VRSHL/VRSHLQ:68:result_int32x2 [] = { fffffffe, fffffffe, } +VRSHL/VRSHLQ:69:result_int64x1 [] = { ffffffffffffffff, } +VRSHL/VRSHLQ:70:result_uint8x8 [] = { 78, 79, 79, 7a, 7a, 7b, 7b, 7c, } +VRSHL/VRSHLQ:71:result_uint16x4 [] = { 3ffc, 3ffc, 3ffd, 3ffd, } +VRSHL/VRSHLQ:72:result_uint32x2 [] = { 1ffffffe, 1ffffffe, } +VRSHL/VRSHLQ:73:result_uint64x1 [] = { fffffffffffffff, } +VRSHL/VRSHLQ:74:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:75:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:76:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:77:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:78:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:79:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:80:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:81:result_uint8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, } +VRSHL/VRSHLQ:82:result_uint16x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHL/VRSHLQ:83:result_uint32x4 [] = { 80000, 80000, 80000, 80000, } +VRSHL/VRSHLQ:84:result_uint64x2 [] = { 100000000000, 100000000000, } +VRSHL/VRSHLQ:85:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:86:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:87:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHL/VRSHLQ (checking round_const overflow: shift by -1) output: +VRSHL/VRSHLQ:88:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHL/VRSHLQ:89:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VRSHL/VRSHLQ:90:result_int32x2 [] = { 40000000, 40000000, } +VRSHL/VRSHLQ:91:result_int64x1 [] = { 4000000000000000, } +VRSHL/VRSHLQ:92:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHL/VRSHLQ:93:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSHL/VRSHLQ:94:result_uint32x2 [] = { 80000000, 80000000, } +VRSHL/VRSHLQ:95:result_uint64x1 [] = { 8000000000000000, } +VRSHL/VRSHLQ:96:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:97:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:98:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:99:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHL/VRSHLQ:100:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VRSHL/VRSHLQ:101:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VRSHL/VRSHLQ:102:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VRSHL/VRSHLQ:103:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHL/VRSHLQ:104:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSHL/VRSHLQ:105:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSHL/VRSHLQ:106:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSHL/VRSHLQ:107:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:108:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:109:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHL/VRSHLQ (checking round_const overflow: shift by -3) output: +VRSHL/VRSHLQ:110:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHL/VRSHLQ:111:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VRSHL/VRSHLQ:112:result_int32x2 [] = { 10000000, 10000000, } +VRSHL/VRSHLQ:113:result_int64x1 [] = { 1000000000000000, } +VRSHL/VRSHLQ:114:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHL/VRSHLQ:115:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSHL/VRSHLQ:116:result_uint32x2 [] = { 20000000, 20000000, } +VRSHL/VRSHLQ:117:result_uint64x1 [] = { 2000000000000000, } +VRSHL/VRSHLQ:118:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:119:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:120:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:121:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHL/VRSHLQ:122:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VRSHL/VRSHLQ:123:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VRSHL/VRSHLQ:124:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VRSHL/VRSHLQ:125:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHL/VRSHLQ:126:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSHL/VRSHLQ:127:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSHL/VRSHLQ:128:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSHL/VRSHLQ:129:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:130:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:131:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHL/VRSHLQ (checking negative shift amount as large as input vector width) output: +VRSHL/VRSHLQ:132:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:133:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:134:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:135:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:136:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHL/VRSHLQ:137:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSHL/VRSHLQ:138:result_uint32x2 [] = { 1, 1, } +VRSHL/VRSHLQ:139:result_uint64x1 [] = { 1, } +VRSHL/VRSHLQ:140:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:141:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:142:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:143:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:144:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:145:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:146:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:147:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHL/VRSHLQ:148:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHL/VRSHLQ:149:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSHL/VRSHLQ:150:result_uint64x2 [] = { 1, 1, } +VRSHL/VRSHLQ:151:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:152:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:153:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHL/VRSHLQ (large shift amount) output: +VRSHL/VRSHLQ:154:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:155:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:156:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:157:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:158:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:159:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:160:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:161:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:162:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:163:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:164:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:165:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:166:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:167:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:168:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:169:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:170:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:171:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:172:result_uint64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:173:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:174:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:175:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHL/VRSHLQ (large negative shift amount) output: +VRSHL/VRSHLQ:176:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:177:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:178:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:179:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:180:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:181:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:182:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:183:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:184:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:185:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:186:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:187:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:188:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:189:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:190:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:191:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:192:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHL/VRSHLQ:193:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSHL/VRSHLQ:194:result_uint64x2 [] = { 1, 1, } +VRSHL/VRSHLQ:195:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:196:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:197:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VLD2/VLD2Q chunk 0 output: +VLD2/VLD2Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD2/VLD2Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2/VLD2Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD2/VLD2Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD2/VLD2Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD2/VLD2Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2/VLD2Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD2/VLD2Q:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD2/VLD2Q:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD2/VLD2Q:10:result_float32x2 [] = { c1800000, c1700000, } +VLD2/VLD2Q:11:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD2/VLD2Q:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:13:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD2/VLD2Q:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:15:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:16:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD2/VLD2Q:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:19:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:20:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:21:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } + +VLD2/VLD2Q chunk 1 output: +VLD2/VLD2Q:22:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD2/VLD2Q:23:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:24:result_int32x2 [] = { fffffff2, fffffff3, } +VLD2/VLD2Q:25:result_int64x1 [] = { fffffffffffffff1, } +VLD2/VLD2Q:26:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:27:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:28:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD2/VLD2Q:29:result_uint64x1 [] = { fffffffffffffff1, } +VLD2/VLD2Q:30:result_poly8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:31:result_poly16x4 [] = { fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:32:result_float32x2 [] = { c1600000, c1500000, } +VLD2/VLD2Q:33:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD2/VLD2Q:34:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD2/VLD2Q:35:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:37:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD2/VLD2Q:38:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD2/VLD2Q:39:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:41:result_poly8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD2/VLD2Q:42:result_poly16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD2/VLD2Q:43:result_float32x4 [] = { c1400000, c1300000, c1200000, c1100000, } + +VLD3/VLD3Q chunk 0 output: +VLD3/VLD3Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD3/VLD3Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD3/VLD3Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD3/VLD3Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD3/VLD3Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD3/VLD3Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD3/VLD3Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD3/VLD3Q:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD3/VLD3Q:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD3/VLD3Q:10:result_float32x2 [] = { c1800000, c1700000, } +VLD3/VLD3Q:11:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD3/VLD3Q:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:13:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD3/VLD3Q:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:15:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:16:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD3/VLD3Q:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:19:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:20:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:21:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } + +VLD3/VLD3Q chunk 1 output: +VLD3/VLD3Q:22:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD3/VLD3Q:23:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:24:result_int32x2 [] = { fffffff2, fffffff3, } +VLD3/VLD3Q:25:result_int64x1 [] = { fffffffffffffff1, } +VLD3/VLD3Q:26:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:27:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:28:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD3/VLD3Q:29:result_uint64x1 [] = { fffffffffffffff1, } +VLD3/VLD3Q:30:result_poly8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:31:result_poly16x4 [] = { fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:32:result_float32x2 [] = { c1600000, c1500000, } +VLD3/VLD3Q:33:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD3/VLD3Q:34:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD3/VLD3Q:35:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:37:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD3/VLD3Q:38:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD3/VLD3Q:39:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:41:result_poly8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD3/VLD3Q:42:result_poly16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD3/VLD3Q:43:result_float32x4 [] = { c1400000, c1300000, c1200000, c1100000, } + +VLD3/VLD3Q chunk 2 output: +VLD3/VLD3Q:44:result_int8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:45:result_int16x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD3/VLD3Q:46:result_int32x2 [] = { fffffff4, fffffff5, } +VLD3/VLD3Q:47:result_int64x1 [] = { fffffffffffffff2, } +VLD3/VLD3Q:48:result_uint8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:49:result_uint16x4 [] = { fff8, fff9, fffa, fffb, } +VLD3/VLD3Q:50:result_uint32x2 [] = { fffffff4, fffffff5, } +VLD3/VLD3Q:51:result_uint64x1 [] = { fffffffffffffff2, } +VLD3/VLD3Q:52:result_poly8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:53:result_poly16x4 [] = { fff8, fff9, fffa, fffb, } +VLD3/VLD3Q:54:result_float32x2 [] = { c1400000, c1300000, } +VLD3/VLD3Q:55:result_int8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD3/VLD3Q:56:result_int16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:57:result_int32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD3/VLD3Q:58:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:59:result_uint8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD3/VLD3Q:60:result_uint16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:61:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD3/VLD3Q:62:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:63:result_poly8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD3/VLD3Q:64:result_poly16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:65:result_float32x4 [] = { c1000000, c0e00000, c0c00000, c0a00000, } + +VLD4/VLD4Q chunk 0 output: +VLD4/VLD4Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4/VLD4Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4/VLD4Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD4/VLD4Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD4/VLD4Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4/VLD4Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4/VLD4Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD4/VLD4Q:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD4/VLD4Q:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4/VLD4Q:10:result_float32x2 [] = { c1800000, c1700000, } +VLD4/VLD4Q:11:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:13:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4/VLD4Q:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:15:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:16:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4/VLD4Q:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:19:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:20:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:21:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } + +VLD4/VLD4Q chunk 1 output: +VLD4/VLD4Q:22:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:23:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:24:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4/VLD4Q:25:result_int64x1 [] = { fffffffffffffff1, } +VLD4/VLD4Q:26:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:27:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:28:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4/VLD4Q:29:result_uint64x1 [] = { fffffffffffffff1, } +VLD4/VLD4Q:30:result_poly8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:31:result_poly16x4 [] = { fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:32:result_float32x2 [] = { c1600000, c1500000, } +VLD4/VLD4Q:33:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:34:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:35:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:37:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:38:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:39:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:41:result_poly8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:42:result_poly16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:43:result_float32x4 [] = { c1400000, c1300000, c1200000, c1100000, } + +VLD4/VLD4Q chunk 2 output: +VLD4/VLD4Q:44:result_int8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:45:result_int16x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD4/VLD4Q:46:result_int32x2 [] = { fffffff4, fffffff5, } +VLD4/VLD4Q:47:result_int64x1 [] = { fffffffffffffff2, } +VLD4/VLD4Q:48:result_uint8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:49:result_uint16x4 [] = { fff8, fff9, fffa, fffb, } +VLD4/VLD4Q:50:result_uint32x2 [] = { fffffff4, fffffff5, } +VLD4/VLD4Q:51:result_uint64x1 [] = { fffffffffffffff2, } +VLD4/VLD4Q:52:result_poly8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:53:result_poly16x4 [] = { fff8, fff9, fffa, fffb, } +VLD4/VLD4Q:54:result_float32x2 [] = { c1400000, c1300000, } +VLD4/VLD4Q:55:result_int8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD4/VLD4Q:56:result_int16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:57:result_int32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD4/VLD4Q:58:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:59:result_uint8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD4/VLD4Q:60:result_uint16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:61:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD4/VLD4Q:62:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:63:result_poly8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD4/VLD4Q:64:result_poly16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:65:result_float32x4 [] = { c1000000, c0e00000, c0c00000, c0a00000, } + +VLD4/VLD4Q chunk 3 output: +VLD4/VLD4Q:66:result_int8x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:67:result_int16x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:68:result_int32x2 [] = { fffffff6, fffffff7, } +VLD4/VLD4Q:69:result_int64x1 [] = { fffffffffffffff3, } +VLD4/VLD4Q:70:result_uint8x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:71:result_uint16x4 [] = { fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:72:result_uint32x2 [] = { fffffff6, fffffff7, } +VLD4/VLD4Q:73:result_uint64x1 [] = { fffffffffffffff3, } +VLD4/VLD4Q:74:result_poly8x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:75:result_poly16x4 [] = { fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:76:result_float32x2 [] = { c1200000, c1100000, } +VLD4/VLD4Q:77:result_int8x16 [] = { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 2a, 2b, 2c, 2d, 2e, 2f, } +VLD4/VLD4Q:78:result_int16x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:79:result_int32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:80:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:81:result_uint8x16 [] = { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 2a, 2b, 2c, 2d, 2e, 2f, } +VLD4/VLD4Q:82:result_uint16x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:83:result_uint32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:84:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:85:result_poly8x16 [] = { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 2a, 2b, 2c, 2d, 2e, 2f, } +VLD4/VLD4Q:86:result_poly16x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:87:result_float32x4 [] = { c0800000, c0400000, c0000000, bf800000, } + +VDUP_LANE/VDUP_LANEQ output: +VDUP_LANE/VDUP_LANEQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP_LANE/VDUP_LANEQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:3:result_int64x1 [] = { fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:4:result_uint8x8 [] = { f7, f7, f7, f7, f7, f7, f7, f7, } +VDUP_LANE/VDUP_LANEQ:5:result_uint16x4 [] = { fff3, fff3, fff3, fff3, } +VDUP_LANE/VDUP_LANEQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:8:result_poly8x8 [] = { f7, f7, f7, f7, f7, f7, f7, f7, } +VDUP_LANE/VDUP_LANEQ:9:result_poly16x4 [] = { fff3, fff3, fff3, fff3, } +VDUP_LANE/VDUP_LANEQ:10:result_float32x2 [] = { c1700000, c1700000, } +VDUP_LANE/VDUP_LANEQ:11:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP_LANE/VDUP_LANEQ:12:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, } +VDUP_LANE/VDUP_LANEQ:13:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:14:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:15:result_uint8x16 [] = { f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, } +VDUP_LANE/VDUP_LANEQ:16:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP_LANE/VDUP_LANEQ:17:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP_LANE/VDUP_LANEQ:18:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:19:result_poly8x16 [] = { f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, } +VDUP_LANE/VDUP_LANEQ:20:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP_LANE/VDUP_LANEQ:21:result_float32x4 [] = { c1700000, c1700000, c1700000, c1700000, } + +VQDMULL_LANE cumulative saturation output: +VQDMULL_LANE:0:vqdmull_lane_s16 Neon cumulative saturation 0 +VQDMULL_LANE:1:vqdmull_lane_s32 Neon cumulative saturation 0 + +VQDMULL_LANE output: +VQDMULL_LANE:2:result_int32x4 [] = { 8000, 8000, 8000, 8000, } +VQDMULL_LANE:3:result_int64x2 [] = { 4000, 4000, } + +VQDMULL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMULL_LANE:4:vqdmull_lane_s16 Neon cumulative saturation 1 +VQDMULL_LANE:5:vqdmull_lane_s32 Neon cumulative saturation 1 + +VQDMULL_LANE (check mul cumulative saturation) output: +VQDMULL_LANE:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULL_LANE:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } + +VQDMULL_N cumulative saturation output: +VQDMULL_N:0:vqdmull_n_s16 Neon cumulative saturation 0 +VQDMULL_N:1:vqdmull_n_s32 Neon cumulative saturation 0 + +VQDMULL_N output: +VQDMULL_N:2:result_int32x4 [] = { 44000, 44000, 44000, 44000, } +VQDMULL_N:3:result_int64x2 [] = { aa000, aa000, } + +VQDMULL_N (check mul cumulative saturation) cumulative saturation output: +VQDMULL_N:4:vqdmull_n_s16 Neon cumulative saturation 1 +VQDMULL_N:5:vqdmull_n_s32 Neon cumulative saturation 1 + +VQDMULL_N (check mul cumulative saturation) output: +VQDMULL_N:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULL_N:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } + +VST1_LANE/VST1_LANEQ output: +VST1_LANE/VST1_LANEQ:0:result_int8x8 [] = { fffffff7, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:1:result_int16x4 [] = { fffffff3, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:2:result_int32x2 [] = { fffffff1, 33333333, } +VST1_LANE/VST1_LANEQ:3:result_int64x1 [] = { fffffffffffffff0, } +VST1_LANE/VST1_LANEQ:4:result_uint8x8 [] = { f6, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:5:result_uint16x4 [] = { fff2, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:6:result_uint32x2 [] = { fffffff0, 33333333, } +VST1_LANE/VST1_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VST1_LANE/VST1_LANEQ:8:result_poly8x8 [] = { f6, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:9:result_poly16x4 [] = { fff2, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:10:result_float32x2 [] = { c1700000, 33333333, } +VST1_LANE/VST1_LANEQ:11:result_int8x16 [] = { ffffffff, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:12:result_int16x8 [] = { fffffff5, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:13:result_int32x4 [] = { fffffff1, 33333333, 33333333, 33333333, } +VST1_LANE/VST1_LANEQ:14:result_int64x2 [] = { fffffffffffffff1, 3333333333333333, } +VST1_LANE/VST1_LANEQ:15:result_uint8x16 [] = { fa, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:16:result_uint16x8 [] = { fff4, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:17:result_uint32x4 [] = { fffffff3, 33333333, 33333333, 33333333, } +VST1_LANE/VST1_LANEQ:18:result_uint64x2 [] = { fffffffffffffff0, 3333333333333333, } +VST1_LANE/VST1_LANEQ:19:result_poly8x16 [] = { fa, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:20:result_poly16x8 [] = { fff4, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:21:result_float32x4 [] = { c1700000, 33333333, 33333333, 33333333, } + +VSUB/VSUBQ output: +VSUB/VSUBQ:0:result_int8x8 [] = { ffffffee, ffffffef, fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, } +VSUB/VSUBQ:1:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VSUB/VSUBQ:2:result_int32x2 [] = { ffffffed, ffffffee, } +VSUB/VSUBQ:3:result_int64x1 [] = { ffffffffffffff8c, } +VSUB/VSUBQ:4:result_uint8x8 [] = { dc, dd, de, df, e0, e1, e2, e3, } +VSUB/VSUBQ:5:result_uint16x4 [] = { ffd2, ffd3, ffd4, ffd5, } +VSUB/VSUBQ:6:result_uint32x2 [] = { ffffffc8, ffffffc9, } +VSUB/VSUBQ:7:result_uint64x1 [] = { ffffffffffffffee, } +VSUB/VSUBQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUB/VSUBQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSUB/VSUBQ:10:result_float32x2 [] = { 33333333, 33333333, } +VSUB/VSUBQ:11:result_int8x16 [] = { fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, } +VSUB/VSUBQ:12:result_int16x8 [] = { 4, 5, 6, 7, 8, 9, a, b, } +VSUB/VSUBQ:13:result_int32x4 [] = { e, f, 10, 11, } +VSUB/VSUBQ:14:result_int64x2 [] = { ffffffffffffffd8, ffffffffffffffd9, } +VSUB/VSUBQ:15:result_uint8x16 [] = { e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef, f0, f1, f2, f3, } +VSUB/VSUBQ:16:result_uint16x8 [] = { ffed, ffee, ffef, fff0, fff1, fff2, fff3, fff4, } +VSUB/VSUBQ:17:result_uint32x4 [] = { ffffffb9, ffffffba, ffffffbb, ffffffbc, } +VSUB/VSUBQ:18:result_uint64x2 [] = { ffffffffffffffed, ffffffffffffffee, } +VSUB/VSUBQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUB/VSUBQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUB/VSUBQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +float32: +VSUB/VSUBQ:22:result_float32x2 [] = { c00ccccd, c00ccccd, } +VSUB/VSUBQ:23:result_float32x4 [] = { c00ccccc, c00ccccc, c00ccccc, c00ccccc, } + +VQADD/VQADDQ cumulative saturation output: +VQADD/VQADDQ:0:vqadd_s8 Neon cumulative saturation 0 +VQADD/VQADDQ:1:vqadd_s16 Neon cumulative saturation 0 +VQADD/VQADDQ:2:vqadd_s32 Neon cumulative saturation 0 +VQADD/VQADDQ:3:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:4:vqadd_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:5:vqadd_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:6:vqadd_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:7:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:8:vqaddq_s8 Neon cumulative saturation 0 +VQADD/VQADDQ:9:vqaddq_s16 Neon cumulative saturation 0 +VQADD/VQADDQ:10:vqaddq_s32 Neon cumulative saturation 0 +VQADD/VQADDQ:11:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:12:vqaddq_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:13:vqaddq_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:14:vqaddq_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:15:vqaddq_u64 Neon cumulative saturation 1 + +VQADD/VQADDQ output: +VQADD/VQADDQ:16:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8, } +VQADD/VQADDQ:17:result_int16x4 [] = { 12, 13, 14, 15, } +VQADD/VQADDQ:18:result_int32x2 [] = { 23, 24, } +VQADD/VQADDQ:19:result_int64x1 [] = { 34, } +VQADD/VQADDQ:20:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:21:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:22:result_uint32x2 [] = { ffffffff, ffffffff, } +VQADD/VQADDQ:23:result_uint64x1 [] = { ffffffffffffffff, } +VQADD/VQADDQ:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQADD/VQADDQ:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQADD/VQADDQ:26:result_float32x2 [] = { 33333333, 33333333, } +VQADD/VQADDQ:27:result_int8x16 [] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, } +VQADD/VQADDQ:28:result_int16x8 [] = { 12, 13, 14, 15, 16, 17, 18, 19, } +VQADD/VQADDQ:29:result_int32x4 [] = { 23, 24, 25, 26, } +VQADD/VQADDQ:30:result_int64x2 [] = { 34, 35, } +VQADD/VQADDQ:31:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:32:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:33:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQADD/VQADDQ:34:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQADD/VQADDQ:35:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQADD/VQADDQ:36:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQADD/VQADDQ:37:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:38:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:39:vqadd_u64 Neon cumulative saturation 0 +VQADD/VQADDQ:40:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:41:vqaddq_u64 Neon cumulative saturation 0 + +64 bits saturation: +VQADD/VQADDQ:42:result_int64x1 [] = { fffffffffffffff0, } +VQADD/VQADDQ:43:result_uint64x1 [] = { fffffffffffffff0, } +VQADD/VQADDQ:44:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQADD/VQADDQ:45:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } + +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:46:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:47:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:48:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:49:vqaddq_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:50:result_int64x1 [] = { 34, } +VQADD/VQADDQ:51:result_uint64x1 [] = { ffffffffffffffff, } +VQADD/VQADDQ:52:result_int64x2 [] = { 34, 35, } +VQADD/VQADDQ:53:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } + +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:54:vqadd_s64 Neon cumulative saturation 1 +VQADD/VQADDQ:55:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:56:vqaddq_s64 Neon cumulative saturation 1 +VQADD/VQADDQ:57:vqaddq_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:58:result_int64x1 [] = { 8000000000000000, } +VQADD/VQADDQ:59:result_uint64x1 [] = { ffffffffffffffff, } +VQADD/VQADDQ:60:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQADD/VQADDQ:61:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } + +less than 64 bits saturation: +VQADD/VQADDQ:62:vqadd_s8 Neon cumulative saturation 1 +VQADD/VQADDQ:63:vqadd_s16 Neon cumulative saturation 1 +VQADD/VQADDQ:64:vqadd_s32 Neon cumulative saturation 1 +VQADD/VQADDQ:65:vqaddq_s8 Neon cumulative saturation 1 +VQADD/VQADDQ:66:vqaddq_s16 Neon cumulative saturation 1 +VQADD/VQADDQ:67:vqaddq_s32 Neon cumulative saturation 1 +VQADD/VQADDQ:68:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQADD/VQADDQ:69:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQADD/VQADDQ:70:result_int32x2 [] = { 80000000, 80000000, } +VQADD/VQADDQ:71:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQADD/VQADDQ:72:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQADD/VQADDQ:73:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VQADD/VQADDQ less than 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:74:vqadd_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:75:vqadd_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:76:vqadd_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:77:vqaddq_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:78:vqaddq_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:79:vqaddq_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:80:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:81:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:82:result_uint32x2 [] = { ffffffff, ffffffff, } +VQADD/VQADDQ:83:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:84:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:85:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } + +VABS/VABSQ output: +VABS/VABSQ:0:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VABS/VABSQ:1:result_int16x4 [] = { 10, f, e, d, } +VABS/VABSQ:2:result_int32x2 [] = { 10, f, } +VABS/VABSQ:3:result_int64x1 [] = { 3333333333333333, } +VABS/VABSQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABS/VABSQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VABS/VABSQ:7:result_uint64x1 [] = { 3333333333333333, } +VABS/VABSQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABS/VABSQ:10:result_float32x2 [] = { 33333333, 33333333, } +VABS/VABSQ:11:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VABS/VABSQ:12:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VABS/VABSQ:13:result_int32x4 [] = { 10, f, e, d, } +VABS/VABSQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VABS/VABSQ:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABS/VABSQ:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABS/VABSQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VABS/VABSQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABS/VABSQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +float32: +VABS/VABSQ:22:result_float32x2 [] = { 40133333, 40133333, } +VABS/VABSQ:23:result_float32x4 [] = { 4059999a, 4059999a, 4059999a, 4059999a, } + +VQABS/VQABSQ cumulative saturation output: +VQABS/VQABSQ:0:vqabs_s8 Neon cumulative saturation 0 +VQABS/VQABSQ:1:vqabs_s16 Neon cumulative saturation 0 +VQABS/VQABSQ:2:vqabs_s32 Neon cumulative saturation 0 +VQABS/VQABSQ:3:vqabsq_s8 Neon cumulative saturation 0 +VQABS/VQABSQ:4:vqabsq_s16 Neon cumulative saturation 0 +VQABS/VQABSQ:5:vqabsq_s32 Neon cumulative saturation 0 + +VQABS/VQABSQ output: +VQABS/VQABSQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VQABS/VQABSQ:7:result_int16x4 [] = { 10, f, e, d, } +VQABS/VQABSQ:8:result_int32x2 [] = { 10, f, } +VQABS/VQABSQ:9:result_int64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:13:result_uint64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:16:result_float32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:17:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VQABS/VQABSQ:18:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VQABS/VQABSQ:19:result_int32x4 [] = { 10, f, e, d, } +VQABS/VQABSQ:20:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:21:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:22:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:23:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQABS/VQABSQ:24:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:25:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:26:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:27:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQABS/VQABSQ cumulative saturation output: +VQABS/VQABSQ:0:vqabs_s8 Neon cumulative saturation 1 +VQABS/VQABSQ:1:vqabs_s16 Neon cumulative saturation 1 +VQABS/VQABSQ:2:vqabs_s32 Neon cumulative saturation 1 +VQABS/VQABSQ:3:vqabsq_s8 Neon cumulative saturation 1 +VQABS/VQABSQ:4:vqabsq_s16 Neon cumulative saturation 1 +VQABS/VQABSQ:5:vqabsq_s32 Neon cumulative saturation 1 + +VQABS/VQABSQ output: +VQABS/VQABSQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQABS/VQABSQ:7:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQABS/VQABSQ:8:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQABS/VQABSQ:9:result_int64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:13:result_uint64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:16:result_float32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:17:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQABS/VQABSQ:18:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQABS/VQABSQ:19:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQABS/VQABSQ:20:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:21:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:22:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:23:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQABS/VQABSQ:24:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:25:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:26:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:27:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VCOMBINE output: +VCOMBINE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCOMBINE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VCOMBINE:2:result_int32x2 [] = { 33333333, 33333333, } +VCOMBINE:3:result_int64x1 [] = { 3333333333333333, } +VCOMBINE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCOMBINE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCOMBINE:6:result_uint32x2 [] = { 33333333, 33333333, } +VCOMBINE:7:result_uint64x1 [] = { 3333333333333333, } +VCOMBINE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCOMBINE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCOMBINE:10:result_float32x2 [] = { 33333333, 33333333, } +VCOMBINE:11:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 11, 11, 11, 11, 11, 11, 11, 11, } +VCOMBINE:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 22, 22, 22, 22, } +VCOMBINE:13:result_int32x4 [] = { fffffff0, fffffff1, 33, 33, } +VCOMBINE:14:result_int64x2 [] = { fffffffffffffff0, 44, } +VCOMBINE:15:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, 55, 55, 55, 55, 55, 55, 55, 55, } +VCOMBINE:16:result_uint16x8 [] = { fff0, fff1, fff2, fff3, 66, 66, 66, 66, } +VCOMBINE:17:result_uint32x4 [] = { fffffff0, fffffff1, 77, 77, } +VCOMBINE:18:result_uint64x2 [] = { fffffffffffffff0, 88, } +VCOMBINE:19:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, 55, 55, 55, 55, 55, 55, 55, 55, } +VCOMBINE:20:result_poly16x8 [] = { fff0, fff1, fff2, fff3, 66, 66, 66, 66, } +VCOMBINE:21:result_float32x4 [] = { c1800000, c1700000, 40533333, 40533333, } + +VMAX/VMAXQ output: +VMAX/VMAXQ:0:result_int8x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMAX/VMAXQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff3, } +VMAX/VMAXQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VMAX/VMAXQ:3:result_int64x1 [] = { 3333333333333333, } +VMAX/VMAXQ:4:result_uint8x8 [] = { f3, f3, f3, f3, f4, f5, f6, f7, } +VMAX/VMAXQ:5:result_uint16x4 [] = { fff1, fff1, fff2, fff3, } +VMAX/VMAXQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VMAX/VMAXQ:7:result_uint64x1 [] = { 3333333333333333, } +VMAX/VMAXQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMAX/VMAXQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMAX/VMAXQ:10:result_float32x2 [] = { c1780000, c1700000, } +VMAX/VMAXQ:11:result_int8x16 [] = { fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VMAX/VMAXQ:12:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMAX/VMAXQ:13:result_int32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff3, } +VMAX/VMAXQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMAX/VMAXQ:15:result_uint8x16 [] = { f9, f9, f9, f9, f9, f9, f9, f9, f9, f9, fa, fb, fc, fd, fe, ff, } +VMAX/VMAXQ:16:result_uint16x8 [] = { fff2, fff2, fff2, fff3, fff4, fff5, fff6, fff7, } +VMAX/VMAXQ:17:result_uint32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff3, } +VMAX/VMAXQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMAX/VMAXQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMAX/VMAXQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMAX/VMAXQ:21:result_float32x4 [] = { c1680000, c1680000, c1600000, c1500000, } +VMAX/VMAXQ FP special (NaN):22:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } +VMAX/VMAXQ FP special (-NaN):23:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } +VMAX/VMAXQ FP special (inf):24:result_float32x4 [] = { 7f800000, 7f800000, 7f800000, 7f800000, } +VMAX/VMAXQ FP special (-inf):25:result_float32x4 [] = { 3f800000, 3f800000, 3f800000, 3f800000, } +VMAX/VMAXQ FP special (-0.0):26:result_float32x4 [] = { 0, 0, 0, 0, } +VMAX/VMAXQ FP special (-0.0):27:result_float32x4 [] = { 0, 0, 0, 0, } + +VMIN/VMINQ output: +VMIN/VMINQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, } +VMIN/VMINQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff2, } +VMIN/VMINQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VMIN/VMINQ:3:result_int64x1 [] = { 3333333333333333, } +VMIN/VMINQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f3, f3, f3, f3, } +VMIN/VMINQ:5:result_uint16x4 [] = { fff0, fff1, fff1, fff1, } +VMIN/VMINQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VMIN/VMINQ:7:result_uint64x1 [] = { 3333333333333333, } +VMIN/VMINQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMIN/VMINQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMIN/VMINQ:10:result_float32x2 [] = { c1800000, c1780000, } +VMIN/VMINQ:11:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, } +VMIN/VMINQ:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, } +VMIN/VMINQ:13:result_int32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff1, } +VMIN/VMINQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMIN/VMINQ:15:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f9, f9, f9, f9, f9, f9, } +VMIN/VMINQ:16:result_uint16x8 [] = { fff0, fff1, fff2, fff2, fff2, fff2, fff2, fff2, } +VMIN/VMINQ:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff1, } +VMIN/VMINQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMIN/VMINQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMIN/VMINQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMIN/VMINQ:21:result_float32x4 [] = { c1800000, c1700000, c1680000, c1680000, } +VMIN/VMINQ FP special (NaN):22:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } +VMIN/VMINQ FP special (-NaN):23:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } +VMIN/VMINQ FP special (inf):24:result_float32x4 [] = { 3f800000, 3f800000, 3f800000, 3f800000, } +VMIN/VMINQ FP special (-inf):25:result_float32x4 [] = { ff800000, ff800000, ff800000, ff800000, } +VMIN/VMINQ FP special (-0.0):26:result_float32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VMIN/VMINQ FP special (-0.0):27:result_float32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VNEG/VNEGQ output: +VNEG/VNEGQ:0:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VNEG/VNEGQ:1:result_int16x4 [] = { 10, f, e, d, } +VNEG/VNEGQ:2:result_int32x2 [] = { 10, f, } +VNEG/VNEGQ:3:result_int64x1 [] = { 3333333333333333, } +VNEG/VNEGQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VNEG/VNEGQ:7:result_uint64x1 [] = { 3333333333333333, } +VNEG/VNEGQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:10:result_float32x2 [] = { 33333333, 33333333, } +VNEG/VNEGQ:11:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VNEG/VNEGQ:12:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VNEG/VNEGQ:13:result_int32x4 [] = { 10, f, e, d, } +VNEG/VNEGQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VNEG/VNEGQ:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VNEG/VNEGQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VNEG/VNEGQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +float32: +VNEG/VNEGQ:22:result_float32x2 [] = { c0133333, c0133333, } +VNEG/VNEGQ:23:result_float32x4 [] = { c059999a, c059999a, c059999a, c059999a, } + +VQNEG/VQNEGQ cumulative saturation output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon cumulative saturation 0 +VQNEG/VQNEGQ:1:vqneg_s16 Neon cumulative saturation 0 +VQNEG/VQNEGQ:2:vqneg_s32 Neon cumulative saturation 0 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon cumulative saturation 0 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon cumulative saturation 0 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon cumulative saturation 0 + +VQNEG/VQNEGQ output: +VQNEG/VQNEGQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VQNEG/VQNEGQ:7:result_int16x4 [] = { 10, f, e, d, } +VQNEG/VQNEGQ:8:result_int32x2 [] = { 10, f, } +VQNEG/VQNEGQ:9:result_int64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:13:result_uint64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:16:result_float32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:17:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VQNEG/VQNEGQ:18:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VQNEG/VQNEGQ:19:result_int32x4 [] = { 10, f, e, d, } +VQNEG/VQNEGQ:20:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:21:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:22:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:23:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQNEG/VQNEGQ:24:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:25:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:26:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:27:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQNEG/VQNEGQ cumulative saturation output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon cumulative saturation 1 +VQNEG/VQNEGQ:1:vqneg_s16 Neon cumulative saturation 1 +VQNEG/VQNEGQ:2:vqneg_s32 Neon cumulative saturation 1 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon cumulative saturation 1 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon cumulative saturation 1 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon cumulative saturation 1 + +VQNEG/VQNEGQ output: +VQNEG/VQNEGQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQNEG/VQNEGQ:7:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQNEG/VQNEGQ:8:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQNEG/VQNEGQ:9:result_int64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:13:result_uint64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:16:result_float32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:17:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQNEG/VQNEGQ:18:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQNEG/VQNEGQ:19:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQNEG/VQNEGQ:20:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:21:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:22:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:23:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQNEG/VQNEGQ:24:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:25:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:26:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:27:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VMLAL output: +VMLAL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL:2:result_int32x2 [] = { 33333333, 33333333, } +VMLAL:3:result_int64x1 [] = { 3333333333333333, } +VMLAL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLAL:7:result_uint64x1 [] = { 3333333333333333, } +VMLAL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL:10:result_float32x2 [] = { 33333333, 33333333, } +VMLAL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:12:result_int16x8 [] = { ffffe907, ffffe908, ffffe909, ffffe90a, ffffe90b, ffffe90c, ffffe90d, ffffe90e, } +VMLAL:13:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL:14:result_int64x2 [] = { 3e07, 3e08, } +VMLAL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:16:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLAL:17:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL:18:result_uint64x2 [] = { 3e07, 3e08, } +VMLAL:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VMLSL output: +VMLSL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL:2:result_int32x2 [] = { 33333333, 33333333, } +VMLSL:3:result_int64x1 [] = { 3333333333333333, } +VMLSL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLSL:7:result_uint64x1 [] = { 3333333333333333, } +VMLSL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL:10:result_float32x2 [] = { 33333333, 33333333, } +VMLSL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:12:result_int16x8 [] = { 16d9, 16da, 16db, 16dc, 16dd, 16de, 16df, 16e0, } +VMLSL:13:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL:14:result_int64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:16:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0, } +VMLSL:17:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL:18:result_uint64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VMLAL_LANE output: +VMLAL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_LANE:2:result_int32x2 [] = { 33333333, 33333333, } +VMLAL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLAL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_LANE:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLAL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLAL_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_LANE:10:result_float32x2 [] = { 33333333, 33333333, } +VMLAL_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_LANE:13:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL_LANE:14:result_int64x2 [] = { 3e07, 3e08, } +VMLAL_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_LANE:17:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL_LANE:18:result_uint64x2 [] = { 3e07, 3e08, } +VMLAL_LANE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_LANE:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VMLSL_LANE output: +VMLSL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_LANE:2:result_int32x2 [] = { 33333333, 33333333, } +VMLSL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLSL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_LANE:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLSL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLSL_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_LANE:10:result_float32x2 [] = { 33333333, 33333333, } +VMLSL_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_LANE:13:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL_LANE:14:result_int64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_LANE:17:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL_LANE:18:result_uint64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL_LANE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_LANE:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VMLAL_N output: +VMLAL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_N:2:result_int32x2 [] = { 33333333, 33333333, } +VMLAL_N:3:result_int64x1 [] = { 3333333333333333, } +VMLAL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_N:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLAL_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLAL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_N:10:result_float32x2 [] = { 33333333, 33333333, } +VMLAL_N:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_N:13:result_int32x4 [] = { 595, 596, 597, 598, } +VMLAL_N:14:result_int64x2 [] = { b3a, b3b, } +VMLAL_N:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_N:17:result_uint32x4 [] = { 10df, 10e0, 10e1, 10e2, } +VMLAL_N:18:result_uint64x2 [] = { 10df, 10e0, } +VMLAL_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VMLSL_N output: +VMLSL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_N:2:result_int32x2 [] = { 33333333, 33333333, } +VMLSL_N:3:result_int64x1 [] = { 3333333333333333, } +VMLSL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_N:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLSL_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLSL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_N:10:result_float32x2 [] = { 33333333, 33333333, } +VMLSL_N:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_N:13:result_int32x4 [] = { fffffa4b, fffffa4c, fffffa4d, fffffa4e, } +VMLSL_N:14:result_int64x2 [] = { fffffffffffff4a6, fffffffffffff4a7, } +VMLSL_N:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_N:17:result_uint32x4 [] = { ffffef01, ffffef02, ffffef03, ffffef04, } +VMLSL_N:18:result_uint64x2 [] = { ffffffffffffef01, ffffffffffffef02, } +VMLSL_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VMOVL output: +VMOVL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVL:2:result_int32x2 [] = { 33333333, 33333333, } +VMOVL:3:result_int64x1 [] = { 3333333333333333, } +VMOVL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMOVL:7:result_uint64x1 [] = { 3333333333333333, } +VMOVL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVL:10:result_float32x2 [] = { 33333333, 33333333, } +VMOVL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMOVL:13:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VMOVL:14:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VMOVL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:16:result_uint16x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VMOVL:17:result_uint32x4 [] = { fff0, fff1, fff2, fff3, } +VMOVL:18:result_uint64x2 [] = { fffffff0, fffffff1, } +VMOVL:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVL:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VMOVN output: +VMOVN:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMOVN:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VMOVN:2:result_int32x2 [] = { fffffff0, fffffff1, } +VMOVN:3:result_int64x1 [] = { 3333333333333333, } +VMOVN:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VMOVN:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VMOVN:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VMOVN:7:result_uint64x1 [] = { 3333333333333333, } +VMOVN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVN:10:result_float32x2 [] = { 33333333, 33333333, } +VMOVN:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVN:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMOVN:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMOVN:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVN:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMOVN:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMOVN:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVN:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VMULL output: +VMULL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL:2:result_int32x2 [] = { 33333333, 33333333, } +VMULL:3:result_int64x1 [] = { 3333333333333333, } +VMULL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMULL:7:result_uint64x1 [] = { 3333333333333333, } +VMULL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL:10:result_float32x2 [] = { 33333333, 33333333, } +VMULL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:12:result_int16x8 [] = { 100, e1, c4, a9, 90, 79, 64, 51, } +VMULL:13:result_int32x4 [] = { 100, e1, c4, a9, } +VMULL:14:result_int64x2 [] = { 100, e1, } +VMULL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:16:result_uint16x8 [] = { e100, e2e1, e4c4, e6a9, e890, ea79, ec64, ee51, } +VMULL:17:result_uint32x4 [] = { ffe00100, ffe200e1, ffe400c4, ffe600a9, } +VMULL:18:result_uint64x2 [] = { ffffffe000000100, ffffffe2000000e1, } +VMULL:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:20:result_poly16x8 [] = { 5500, 5501, 5504, 5505, 5510, 5511, 5514, 5515, } +VMULL:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VMULL_LANE output: +VMULL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL_LANE:2:result_int32x2 [] = { 33333333, 33333333, } +VMULL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMULL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL_LANE:6:result_uint32x2 [] = { 33333333, 33333333, } +VMULL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMULL_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL_LANE:10:result_float32x2 [] = { 33333333, 33333333, } +VMULL_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMULL_LANE:13:result_int32x4 [] = { 4000, 4000, 4000, 4000, } +VMULL_LANE:14:result_int64x2 [] = { 2000, 2000, } +VMULL_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMULL_LANE:17:result_uint32x4 [] = { 4000, 4000, 4000, 4000, } +VMULL_LANE:18:result_uint64x2 [] = { 2000, 2000, } +VMULL_LANE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMULL_LANE:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VREV16 output: +VREV16:0:result_int8x8 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, } +VREV16:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VREV16:2:result_int32x2 [] = { 33333333, 33333333, } +VREV16:3:result_int64x1 [] = { 3333333333333333, } +VREV16:4:result_uint8x8 [] = { f1, f0, f3, f2, f5, f4, f7, f6, } +VREV16:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VREV16:6:result_uint32x2 [] = { 33333333, 33333333, } +VREV16:7:result_uint64x1 [] = { 3333333333333333, } +VREV16:8:result_poly8x8 [] = { f1, f0, f3, f2, f5, f4, f7, f6, } +VREV16:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VREV16:10:result_float32x2 [] = { 33333333, 33333333, } +VREV16:11:result_int8x16 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, fffffff9, fffffff8, fffffffb, fffffffa, fffffffd, fffffffc, ffffffff, fffffffe, } +VREV16:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VREV16:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV16:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VREV16:15:result_uint8x16 [] = { f1, f0, f3, f2, f5, f4, f7, f6, f9, f8, fb, fa, fd, fc, ff, fe, } +VREV16:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VREV16:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV16:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VREV16:19:result_poly8x16 [] = { f1, f0, f3, f2, f5, f4, f7, f6, f9, f8, fb, fa, fd, fc, ff, fe, } +VREV16:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VREV16:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VREV32 output: +VREV32:22:result_int8x8 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, } +VREV32:23:result_int16x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2, } +VREV32:24:result_int32x2 [] = { 33333333, 33333333, } +VREV32:25:result_int64x1 [] = { 3333333333333333, } +VREV32:26:result_uint8x8 [] = { f3, f2, f1, f0, f7, f6, f5, f4, } +VREV32:27:result_uint16x4 [] = { fff1, fff0, fff3, fff2, } +VREV32:28:result_uint32x2 [] = { 33333333, 33333333, } +VREV32:29:result_uint64x1 [] = { 3333333333333333, } +VREV32:30:result_poly8x8 [] = { f3, f2, f1, f0, f7, f6, f5, f4, } +VREV32:31:result_poly16x4 [] = { fff1, fff0, fff3, fff2, } +VREV32:32:result_float32x2 [] = { 33333333, 33333333, } +VREV32:33:result_int8x16 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, fffffffb, fffffffa, fffffff9, fffffff8, ffffffff, fffffffe, fffffffd, fffffffc, } +VREV32:34:result_int16x8 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, } +VREV32:35:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV32:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VREV32:37:result_uint8x16 [] = { f3, f2, f1, f0, f7, f6, f5, f4, fb, fa, f9, f8, ff, fe, fd, fc, } +VREV32:38:result_uint16x8 [] = { fff1, fff0, fff3, fff2, fff5, fff4, fff7, fff6, } +VREV32:39:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV32:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VREV32:41:result_poly8x16 [] = { f3, f2, f1, f0, f7, f6, f5, f4, fb, fa, f9, f8, ff, fe, fd, fc, } +VREV32:42:result_poly16x8 [] = { fff1, fff0, fff3, fff2, fff5, fff4, fff7, fff6, } +VREV32:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VREV64 output: +VREV64:44:result_int8x8 [] = { fffffff7, fffffff6, fffffff5, fffffff4, fffffff3, fffffff2, fffffff1, fffffff0, } +VREV64:45:result_int16x4 [] = { fffffff3, fffffff2, fffffff1, fffffff0, } +VREV64:46:result_int32x2 [] = { fffffff1, fffffff0, } +VREV64:47:result_int64x1 [] = { 3333333333333333, } +VREV64:48:result_uint8x8 [] = { f7, f6, f5, f4, f3, f2, f1, f0, } +VREV64:49:result_uint16x4 [] = { fff3, fff2, fff1, fff0, } +VREV64:50:result_uint32x2 [] = { fffffff1, fffffff0, } +VREV64:51:result_uint64x1 [] = { 3333333333333333, } +VREV64:52:result_poly8x8 [] = { f7, f6, f5, f4, f3, f2, f1, f0, } +VREV64:53:result_poly16x4 [] = { fff3, fff2, fff1, fff0, } +VREV64:54:result_float32x2 [] = { c1700000, c1800000, } +VREV64:55:result_int8x16 [] = { fffffff7, fffffff6, fffffff5, fffffff4, fffffff3, fffffff2, fffffff1, fffffff0, ffffffff, fffffffe, fffffffd, fffffffc, fffffffb, fffffffa, fffffff9, fffffff8, } +VREV64:56:result_int16x8 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, } +VREV64:57:result_int32x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2, } +VREV64:58:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VREV64:59:result_uint8x16 [] = { f7, f6, f5, f4, f3, f2, f1, f0, ff, fe, fd, fc, fb, fa, f9, f8, } +VREV64:60:result_uint16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4, } +VREV64:61:result_uint32x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2, } +VREV64:62:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VREV64:63:result_poly8x16 [] = { f7, f6, f5, f4, f3, f2, f1, f0, ff, fe, fd, fc, fb, fa, f9, f8, } +VREV64:64:result_poly16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4, } +VREV64:65:result_float32x4 [] = { c1700000, c1800000, c1500000, c1600000, } + +VSRA_N output: +VSRA_N:0:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VSRA_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRA_N:2:result_int32x2 [] = { fffffffc, fffffffd, } +VSRA_N:3:result_int64x1 [] = { fffffffffffffff0, } +VSRA_N:4:result_uint8x8 [] = { 5, 6, 7, 8, 9, a, b, c, } +VSRA_N:5:result_uint16x4 [] = { fffc, fffd, fffe, ffff, } +VSRA_N:6:result_uint32x2 [] = { fffffff3, fffffff4, } +VSRA_N:7:result_uint64x1 [] = { fffffffffffffff0, } +VSRA_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSRA_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSRA_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSRA_N:11:result_int8x16 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, } +VSRA_N:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VSRA_N:13:result_int32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VSRA_N:14:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRA_N:15:result_uint8x16 [] = { 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, 11, 12, 13, 14, } +VSRA_N:16:result_uint16x8 [] = { fffc, fffd, fffe, ffff, 0, 1, 2, 3, } +VSRA_N:17:result_uint32x4 [] = { fffffff3, fffffff4, fffffff5, fffffff6, } +VSRA_N:18:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRA_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSRA_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSRA_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VTRN/VTRNQ chunk 0 output: +VTRN/VTRNQ:0:result_int8x8 [] = { fffffff0, fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, } +VTRN/VTRNQ:1:result_int16x4 [] = { fffffff0, fffffff1, 22, 22, } +VTRN/VTRNQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VTRN/VTRNQ:3:result_int64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:4:result_uint8x8 [] = { f0, f1, 55, 55, f2, f3, 55, 55, } +VTRN/VTRNQ:5:result_uint16x4 [] = { fff0, fff1, 66, 66, } +VTRN/VTRNQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VTRN/VTRNQ:7:result_uint64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:8:result_poly8x8 [] = { f0, f1, 55, 55, f2, f3, 55, 55, } +VTRN/VTRNQ:9:result_poly16x4 [] = { fff0, fff1, 66, 66, } +VTRN/VTRNQ:10:result_float32x2 [] = { c1800000, c1700000, } +VTRN/VTRNQ:11:result_int8x16 [] = { fffffff0, fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, } +VTRN/VTRNQ:12:result_int16x8 [] = { fffffff0, fffffff1, 22, 22, fffffff2, fffffff3, 22, 22, } +VTRN/VTRNQ:13:result_int32x4 [] = { fffffff0, fffffff1, 33, 33, } +VTRN/VTRNQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:15:result_uint8x16 [] = { f0, f1, 55, 55, f2, f3, 55, 55, f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:16:result_uint16x8 [] = { fff0, fff1, 66, 66, fff2, fff3, 66, 66, } +VTRN/VTRNQ:17:result_uint32x4 [] = { fffffff0, fffffff1, 77, 77, } +VTRN/VTRNQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:19:result_poly8x16 [] = { f0, f1, 55, 55, f2, f3, 55, 55, f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:20:result_poly16x8 [] = { fff0, fff1, 66, 66, fff2, fff3, 66, 66, } +VTRN/VTRNQ:21:result_float32x4 [] = { c1800000, c1700000, 42073333, 42073333, } + +VTRN/VTRNQ chunk 1 output: +VTRN/VTRNQ:22:result_int8x8 [] = { fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, } +VTRN/VTRNQ:23:result_int16x4 [] = { fffffff2, fffffff3, 22, 22, } +VTRN/VTRNQ:24:result_int32x2 [] = { 33, 33, } +VTRN/VTRNQ:25:result_int64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:26:result_uint8x8 [] = { f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:27:result_uint16x4 [] = { fff2, fff3, 66, 66, } +VTRN/VTRNQ:28:result_uint32x2 [] = { 77, 77, } +VTRN/VTRNQ:29:result_uint64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:30:result_poly8x8 [] = { f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:31:result_poly16x4 [] = { fff2, fff3, 66, 66, } +VTRN/VTRNQ:32:result_float32x2 [] = { 42066666, 42066666, } +VTRN/VTRNQ:33:result_int8x16 [] = { fffffff8, fffffff9, 11, 11, fffffffa, fffffffb, 11, 11, fffffffc, fffffffd, 11, 11, fffffffe, ffffffff, 11, 11, } +VTRN/VTRNQ:34:result_int16x8 [] = { fffffff4, fffffff5, 22, 22, fffffff6, fffffff7, 22, 22, } +VTRN/VTRNQ:35:result_int32x4 [] = { fffffff2, fffffff3, 33, 33, } +VTRN/VTRNQ:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:37:result_uint8x16 [] = { f8, f9, 55, 55, fa, fb, 55, 55, fc, fd, 55, 55, fe, ff, 55, 55, } +VTRN/VTRNQ:38:result_uint16x8 [] = { fff4, fff5, 66, 66, fff6, fff7, 66, 66, } +VTRN/VTRNQ:39:result_uint32x4 [] = { fffffff2, fffffff3, 77, 77, } +VTRN/VTRNQ:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:41:result_poly8x16 [] = { f8, f9, 55, 55, fa, fb, 55, 55, fc, fd, 55, 55, fe, ff, 55, 55, } +VTRN/VTRNQ:42:result_poly16x8 [] = { fff4, fff5, 66, 66, fff6, fff7, 66, 66, } +VTRN/VTRNQ:43:result_float32x4 [] = { c1600000, c1500000, 42073333, 42073333, } + +VUZP/VUZPQ chunk 0 output: +VUZP/VUZPQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VUZP/VUZPQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VUZP/VUZPQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VUZP/VUZPQ:3:result_int64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VUZP/VUZPQ:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VUZP/VUZPQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VUZP/VUZPQ:7:result_uint64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VUZP/VUZPQ:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VUZP/VUZPQ:10:result_float32x2 [] = { c1800000, c1700000, } +VUZP/VUZPQ:11:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VUZP/VUZPQ:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VUZP/VUZPQ:13:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VUZP/VUZPQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:15:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VUZP/VUZPQ:16:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VUZP/VUZPQ:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VUZP/VUZPQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:19:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VUZP/VUZPQ:20:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VUZP/VUZPQ:21:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } + +VUZP/VUZPQ chunk 1 output: +VUZP/VUZPQ:22:result_int8x8 [] = { 11, 11, 11, 11, 11, 11, 11, 11, } +VUZP/VUZPQ:23:result_int16x4 [] = { 22, 22, 22, 22, } +VUZP/VUZPQ:24:result_int32x2 [] = { 33, 33, } +VUZP/VUZPQ:25:result_int64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:26:result_uint8x8 [] = { 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:27:result_uint16x4 [] = { 66, 66, 66, 66, } +VUZP/VUZPQ:28:result_uint32x2 [] = { 77, 77, } +VUZP/VUZPQ:29:result_uint64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:30:result_poly8x8 [] = { 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:31:result_poly16x4 [] = { 66, 66, 66, 66, } +VUZP/VUZPQ:32:result_float32x2 [] = { 42066666, 42066666, } +VUZP/VUZPQ:33:result_int8x16 [] = { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, } +VUZP/VUZPQ:34:result_int16x8 [] = { 22, 22, 22, 22, 22, 22, 22, 22, } +VUZP/VUZPQ:35:result_int32x4 [] = { 33, 33, 33, 33, } +VUZP/VUZPQ:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:37:result_uint8x16 [] = { 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:38:result_uint16x8 [] = { 66, 66, 66, 66, 66, 66, 66, 66, } +VUZP/VUZPQ:39:result_uint32x4 [] = { 77, 77, 77, 77, } +VUZP/VUZPQ:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:41:result_poly8x16 [] = { 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:42:result_poly16x8 [] = { 66, 66, 66, 66, 66, 66, 66, 66, } +VUZP/VUZPQ:43:result_float32x4 [] = { 42073333, 42073333, 42073333, 42073333, } + +VZIP/VZIPQ chunk 0 output: +VZIP/VZIPQ:0:result_int8x8 [] = { fffffff0, fffffff4, 11, 11, fffffff1, fffffff5, 11, 11, } +VZIP/VZIPQ:1:result_int16x4 [] = { fffffff0, fffffff2, 22, 22, } +VZIP/VZIPQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VZIP/VZIPQ:3:result_int64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:4:result_uint8x8 [] = { f0, f4, 55, 55, f1, f5, 55, 55, } +VZIP/VZIPQ:5:result_uint16x4 [] = { fff0, fff2, 66, 66, } +VZIP/VZIPQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VZIP/VZIPQ:7:result_uint64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:8:result_poly8x8 [] = { f0, f4, 55, 55, f1, f5, 55, 55, } +VZIP/VZIPQ:9:result_poly16x4 [] = { fff0, fff2, 66, 66, } +VZIP/VZIPQ:10:result_float32x2 [] = { c1800000, c1700000, } +VZIP/VZIPQ:11:result_int8x16 [] = { fffffff0, fffffff8, 11, 11, fffffff1, fffffff9, 11, 11, fffffff2, fffffffa, 11, 11, fffffff3, fffffffb, 11, 11, } +VZIP/VZIPQ:12:result_int16x8 [] = { fffffff0, fffffff4, 22, 22, fffffff1, fffffff5, 22, 22, } +VZIP/VZIPQ:13:result_int32x4 [] = { fffffff0, fffffff2, 33, 33, } +VZIP/VZIPQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:15:result_uint8x16 [] = { f0, f8, 55, 55, f1, f9, 55, 55, f2, fa, 55, 55, f3, fb, 55, 55, } +VZIP/VZIPQ:16:result_uint16x8 [] = { fff0, fff4, 66, 66, fff1, fff5, 66, 66, } +VZIP/VZIPQ:17:result_uint32x4 [] = { fffffff0, fffffff2, 77, 77, } +VZIP/VZIPQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:19:result_poly8x16 [] = { f0, f8, 55, 55, f1, f9, 55, 55, f2, fa, 55, 55, f3, fb, 55, 55, } +VZIP/VZIPQ:20:result_poly16x8 [] = { fff0, fff4, 66, 66, fff1, fff5, 66, 66, } +VZIP/VZIPQ:21:result_float32x4 [] = { c1800000, c1600000, 42073333, 42073333, } + +VZIP/VZIPQ chunk 1 output: +VZIP/VZIPQ:22:result_int8x8 [] = { fffffff2, fffffff6, 11, 11, fffffff3, fffffff7, 11, 11, } +VZIP/VZIPQ:23:result_int16x4 [] = { fffffff1, fffffff3, 22, 22, } +VZIP/VZIPQ:24:result_int32x2 [] = { 33, 33, } +VZIP/VZIPQ:25:result_int64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:26:result_uint8x8 [] = { f2, f6, 55, 55, f3, f7, 55, 55, } +VZIP/VZIPQ:27:result_uint16x4 [] = { fff1, fff3, 66, 66, } +VZIP/VZIPQ:28:result_uint32x2 [] = { 77, 77, } +VZIP/VZIPQ:29:result_uint64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:30:result_poly8x8 [] = { f2, f6, 55, 55, f3, f7, 55, 55, } +VZIP/VZIPQ:31:result_poly16x4 [] = { fff1, fff3, 66, 66, } +VZIP/VZIPQ:32:result_float32x2 [] = { 42066666, 42066666, } +VZIP/VZIPQ:33:result_int8x16 [] = { fffffff4, fffffffc, 11, 11, fffffff5, fffffffd, 11, 11, fffffff6, fffffffe, 11, 11, fffffff7, ffffffff, 11, 11, } +VZIP/VZIPQ:34:result_int16x8 [] = { fffffff2, fffffff6, 22, 22, fffffff3, fffffff7, 22, 22, } +VZIP/VZIPQ:35:result_int32x4 [] = { fffffff1, fffffff3, 33, 33, } +VZIP/VZIPQ:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:37:result_uint8x16 [] = { f4, fc, 55, 55, f5, fd, 55, 55, f6, fe, 55, 55, f7, ff, 55, 55, } +VZIP/VZIPQ:38:result_uint16x8 [] = { fff2, fff6, 66, 66, fff3, fff7, 66, 66, } +VZIP/VZIPQ:39:result_uint32x4 [] = { fffffff1, fffffff3, 77, 77, } +VZIP/VZIPQ:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:41:result_poly8x16 [] = { f4, fc, 55, 55, f5, fd, 55, 55, f6, fe, 55, 55, f7, ff, 55, 55, } +VZIP/VZIPQ:42:result_poly16x8 [] = { fff2, fff6, 66, 66, fff3, fff7, 66, 66, } +VZIP/VZIPQ:43:result_float32x4 [] = { c1700000, c1500000, 42073333, 42073333, } + +VREINTERPRET/VREINTERPRETQ output: +VREINTERPRET/VREINTERPRETQ:0:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:1:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:2:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:3:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:4:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:5:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:6:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:7:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:8:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:9:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, } +VREINTERPRET/VREINTERPRETQ:10:result_int16x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:11:result_int16x4 [] = { fffffff0, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:12:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, } +VREINTERPRET/VREINTERPRETQ:13:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:14:result_int16x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:15:result_int16x4 [] = { fffffff0, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:16:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, } +VREINTERPRET/VREINTERPRETQ:17:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:18:result_int32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:19:result_int32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:20:result_int32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:21:result_int32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:22:result_int32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:23:result_int32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:24:result_int32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:25:result_int32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:26:result_int32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:27:result_int64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:28:result_int64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:29:result_int64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:30:result_int64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:31:result_int64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:32:result_int64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:33:result_int64x1 [] = { fffffffffffffff0, } +VREINTERPRET/VREINTERPRETQ:34:result_int64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:35:result_int64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:36:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:37:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:38:result_uint8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:39:result_uint8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:40:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:41:result_uint8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:42:result_uint8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:43:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:44:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:45:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:46:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:47:result_uint16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:48:result_uint16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:49:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:50:result_uint16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:51:result_uint16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:52:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:53:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:54:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:55:result_uint32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:56:result_uint32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:57:result_uint32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:58:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:59:result_uint32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:60:result_uint32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:61:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:62:result_uint32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:63:result_uint64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:64:result_uint64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:65:result_uint64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:66:result_uint64x1 [] = { fffffffffffffff0, } +VREINTERPRET/VREINTERPRETQ:67:result_uint64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:68:result_uint64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:69:result_uint64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:70:result_uint64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:71:result_uint64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:72:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:73:result_poly8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:74:result_poly8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:75:result_poly8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:76:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:77:result_poly8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:78:result_poly8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:79:result_poly8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:80:result_poly8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:81:result_poly16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:82:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:83:result_poly16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:84:result_poly16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:85:result_poly16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:86:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:87:result_poly16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:88:result_poly16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:89:result_poly16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:90:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VREINTERPRET/VREINTERPRETQ:91:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, fffffff2, ffffffff, ffffffff, ffffffff, fffffff3, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:92:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:93:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VREINTERPRET/VREINTERPRETQ:94:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VREINTERPRET/VREINTERPRETQ:95:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, fffffff2, ffffffff, ffffffff, ffffffff, fffffff3, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:96:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:97:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VREINTERPRET/VREINTERPRETQ:98:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VREINTERPRET/VREINTERPRETQ:99:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe, } +VREINTERPRET/VREINTERPRETQ:100:result_int16x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:101:result_int16x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:102:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe, } +VREINTERPRET/VREINTERPRETQ:103:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:104:result_int16x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:105:result_int16x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:106:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe, } +VREINTERPRET/VREINTERPRETQ:107:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:108:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:109:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:110:result_int32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:111:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:112:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:113:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:114:result_int32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:115:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:116:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:117:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:118:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:119:result_int64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:120:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:121:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:122:result_int64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:123:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VREINTERPRET/VREINTERPRETQ:124:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:125:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:126:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:127:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:128:result_uint16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff, } +VREINTERPRET/VREINTERPRETQ:129:result_uint16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:130:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:131:result_uint16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff, } +VREINTERPRET/VREINTERPRETQ:132:result_uint16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:133:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:134:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:135:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:136:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:137:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:138:result_uint32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:139:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:140:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:141:result_uint32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:142:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:143:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:144:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:145:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:146:result_uint64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:147:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VREINTERPRET/VREINTERPRETQ:148:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:149:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:150:result_uint64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:151:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:152:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:153:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VREINTERPRET/VREINTERPRETQ:154:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VREINTERPRET/VREINTERPRETQ:155:result_uint8x16 [] = { f0, ff, ff, ff, f1, ff, ff, ff, f2, ff, ff, ff, f3, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:156:result_uint8x16 [] = { f0, ff, ff, ff, ff, ff, ff, ff, f1, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:157:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VREINTERPRET/VREINTERPRETQ:158:result_uint8x16 [] = { f0, ff, ff, ff, f1, ff, ff, ff, f2, ff, ff, ff, f3, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:159:result_uint8x16 [] = { f0, ff, ff, ff, ff, ff, ff, ff, f1, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:160:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VREINTERPRET/VREINTERPRETQ:161:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VREINTERPRET/VREINTERPRETQ:162:result_float32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:163:result_float32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:164:result_float32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:165:result_float32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:166:result_float32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:167:result_float32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:168:result_float32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:169:result_float32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:170:result_float32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:171:result_float32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:172:result_float32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:173:result_float32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:174:result_float32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:175:result_float32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:176:result_float32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:177:result_float32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:178:result_float32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:179:result_float32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:180:result_float32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:181:result_float32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:182:result_int8x8 [] = { 0, 0, ffffff80, ffffffc1, 0, 0, 70, ffffffc1, } +VREINTERPRET/VREINTERPRETQ:183:result_int16x4 [] = { 0, ffffc180, 0, ffffc170, } +VREINTERPRET/VREINTERPRETQ:184:result_int32x2 [] = { c1800000, c1700000, } +VREINTERPRET/VREINTERPRETQ:185:result_int64x1 [] = { c1700000c1800000, } +VREINTERPRET/VREINTERPRETQ:186:result_uint8x8 [] = { 0, 0, 80, c1, 0, 0, 70, c1, } +VREINTERPRET/VREINTERPRETQ:187:result_uint16x4 [] = { 0, c180, 0, c170, } +VREINTERPRET/VREINTERPRETQ:188:result_uint32x2 [] = { c1800000, c1700000, } +VREINTERPRET/VREINTERPRETQ:189:result_uint64x1 [] = { c1700000c1800000, } +VREINTERPRET/VREINTERPRETQ:190:result_poly8x8 [] = { 0, 0, 80, c1, 0, 0, 70, c1, } +VREINTERPRET/VREINTERPRETQ:191:result_poly16x4 [] = { 0, c180, 0, c170, } +VREINTERPRET/VREINTERPRETQ:192:result_int8x16 [] = { 0, 0, ffffff80, ffffffc1, 0, 0, 70, ffffffc1, 0, 0, 60, ffffffc1, 0, 0, 50, ffffffc1, } +VREINTERPRET/VREINTERPRETQ:193:result_int16x8 [] = { 0, ffffc180, 0, ffffc170, 0, ffffc160, 0, ffffc150, } +VREINTERPRET/VREINTERPRETQ:194:result_int32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VREINTERPRET/VREINTERPRETQ:195:result_int64x2 [] = { c1700000c1800000, c1500000c1600000, } +VREINTERPRET/VREINTERPRETQ:196:result_uint8x16 [] = { 0, 0, 80, c1, 0, 0, 70, c1, 0, 0, 60, c1, 0, 0, 50, c1, } +VREINTERPRET/VREINTERPRETQ:197:result_uint16x8 [] = { 0, c180, 0, c170, 0, c160, 0, c150, } +VREINTERPRET/VREINTERPRETQ:198:result_uint32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VREINTERPRET/VREINTERPRETQ:199:result_uint64x2 [] = { c1700000c1800000, c1500000c1600000, } +VREINTERPRET/VREINTERPRETQ:200:result_poly8x16 [] = { 0, 0, 80, c1, 0, 0, 70, c1, 0, 0, 60, c1, 0, 0, 50, c1, } +VREINTERPRET/VREINTERPRETQ:201:result_poly16x8 [] = { 0, c180, 0, c170, 0, c160, 0, c150, } + +VQRDMULH cumulative saturation output: +VQRDMULH:0:vqrdmulh_s16 Neon cumulative saturation 0 +VQRDMULH:1:vqrdmulh_s32 Neon cumulative saturation 0 +VQRDMULH:2:vqrdmulhq_s16 Neon cumulative saturation 0 +VQRDMULH:3:vqrdmulhq_s32 Neon cumulative saturation 0 + +VQRDMULH output: +VQRDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:5:result_int16x4 [] = { fffffff5, fffffff6, fffffff7, fffffff7, } +VQRDMULH:6:result_int32x2 [] = { 0, 0, } +VQRDMULH:7:result_int64x1 [] = { 3333333333333333, } +VQRDMULH:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:10:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH:11:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH:12:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:13:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:14:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:16:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRDMULH:17:result_int32x4 [] = { 0, 0, 0, 0, } +VQRDMULH:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:25:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRDMULH (check mul cumulative saturation) cumulative saturation output: +VQRDMULH:26:vqrdmulh_s16 Neon cumulative saturation 1 +VQRDMULH:27:vqrdmulh_s32 Neon cumulative saturation 1 +VQRDMULH:28:vqrdmulhq_s16 Neon cumulative saturation 1 +VQRDMULH:29:vqrdmulhq_s32 Neon cumulative saturation 1 + +VQRDMULH (check mul cumulative saturation) output: +VQRDMULH:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:32:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH:33:result_int64x1 [] = { 3333333333333333, } +VQRDMULH:34:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:35:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:36:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH:37:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH:38:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:39:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:40:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH:41:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:42:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:43:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH:44:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:45:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:46:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:47:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:48:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:49:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:50:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:51:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRDMULH (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH:52:vqrdmulh_s16 Neon cumulative saturation 0 +VQRDMULH:53:vqrdmulh_s32 Neon cumulative saturation 0 +VQRDMULH:54:vqrdmulhq_s16 Neon cumulative saturation 0 +VQRDMULH:55:vqrdmulhq_s32 Neon cumulative saturation 0 + +VQRDMULH (check rounding cumulative saturation) output: +VQRDMULH:56:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:57:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:58:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH:59:result_int64x1 [] = { 3333333333333333, } +VQRDMULH:60:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:61:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:62:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH:63:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH:64:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:65:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:66:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH:67:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:68:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:69:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH:70:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:71:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:72:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:73:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:74:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:75:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:76:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:77:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRDMULH_LANE cumulative saturation output: +VQRDMULH_LANE:0:vqrdmulh_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:1:vqrdmulh_lane_s32 Neon cumulative saturation 0 +VQRDMULH_LANE:2:vqrdmulhq_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:3:vqrdmulhq_lane_s32 Neon cumulative saturation 0 + +VQRDMULH_LANE output: +VQRDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:5:result_int16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:6:result_int32x2 [] = { 0, 0, } +VQRDMULH_LANE:7:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:10:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:11:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:12:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:13:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:14:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:16:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRDMULH_LANE:17:result_int32x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:25:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRDMULH_LANE (check mul cumulative saturation) cumulative saturation output: +VQRDMULH_LANE:26:vqrdmulh_lane_s16 Neon cumulative saturation 1 +VQRDMULH_LANE:27:vqrdmulh_lane_s32 Neon cumulative saturation 1 +VQRDMULH_LANE:28:vqrdmulhq_lane_s16 Neon cumulative saturation 1 +VQRDMULH_LANE:29:vqrdmulhq_lane_s32 Neon cumulative saturation 1 + +VQRDMULH_LANE (check mul cumulative saturation) output: +VQRDMULH_LANE:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:32:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_LANE:33:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:34:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:35:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:36:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:37:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:38:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:39:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:40:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:41:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:42:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:43:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_LANE:44:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:45:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:46:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:47:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:48:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:49:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:50:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:51:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRDMULH_LANE (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH_LANE:52:vqrdmulh_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:53:vqrdmulh_lane_s32 Neon cumulative saturation 0 +VQRDMULH_LANE:54:vqrdmulhq_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:55:vqrdmulhq_lane_s32 Neon cumulative saturation 0 + +VQRDMULH_LANE (check rounding cumulative saturation) output: +VQRDMULH_LANE:56:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:57:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:58:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_LANE:59:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:60:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:61:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:62:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:63:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:64:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:65:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:66:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:67:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:68:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:69:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_LANE:70:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:71:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:72:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:73:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:74:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:75:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:76:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:77:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRDMULH_N cumulative saturation output: +VQRDMULH_N:0:vqrdmulh_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:1:vqrdmulh_n_s32 Neon cumulative saturation 0 +VQRDMULH_N:2:vqrdmulhq_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:3:vqrdmulhq_n_s32 Neon cumulative saturation 0 + +VQRDMULH_N output: +VQRDMULH_N:4:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffd, } +VQRDMULH_N:5:result_int32x2 [] = { fffffffe, fffffffe, } +VQRDMULH_N:6:result_int16x8 [] = { 6, 6, 6, 5, 5, 4, 4, 4, } +VQRDMULH_N:7:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } + +VQRDMULH_N (check mul cumulative saturation) cumulative saturation output: +VQRDMULH_N:8:vqrdmulh_n_s16 Neon cumulative saturation 1 +VQRDMULH_N:9:vqrdmulh_n_s32 Neon cumulative saturation 1 +VQRDMULH_N:10:vqrdmulhq_n_s16 Neon cumulative saturation 1 +VQRDMULH_N:11:vqrdmulhq_n_s32 Neon cumulative saturation 1 + +VQRDMULH_N (check mul cumulative saturation) output: +VQRDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_N:15:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_N:16:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:17:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:18:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:19:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_N:20:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:21:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:22:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:23:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:24:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:25:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_N:26:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:27:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:28:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:29:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_N:30:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:31:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:32:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:33:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRDMULH_N (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH_N:34:vqrdmulh_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:35:vqrdmulh_n_s32 Neon cumulative saturation 0 +VQRDMULH_N:36:vqrdmulhq_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:37:vqrdmulhq_n_s32 Neon cumulative saturation 0 + +VQRDMULH_N (check rounding cumulative saturation) output: +VQRDMULH_N:38:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:39:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:40:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_N:41:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_N:42:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:43:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:44:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:45:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_N:46:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:47:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:48:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:49:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:50:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:51:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_N:52:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:53:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:54:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:55:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_N:56:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:57:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:58:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:59:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHL/VQRSHLQ (with input = 0) cumulative saturation output: +VQRSHL/VQRSHLQ:0:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:1:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:2:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:3:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:4:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:5:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:6:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:7:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:8:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:9:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:10:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:11:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:12:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:13:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:14:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:15:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (with input = 0) output: +VQRSHL/VQRSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:17:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:18:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:19:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:20:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:21:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:22:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:23:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:26:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:27:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:29:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:30:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:31:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:32:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:33:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:34:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:35:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:36:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:37:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHL/VQRSHLQ (input 0 and negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:38:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:39:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:40:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:41:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:42:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:43:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:44:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:45:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:46:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:47:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:48:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:49:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:50:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:51:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:52:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:53:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (input 0 and negative shift amount) output: +VQRSHL/VQRSHLQ:54:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:55:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:56:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:57:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:58:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:59:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:60:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:61:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:62:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:63:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:64:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:65:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:66:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:67:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:68:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:69:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:70:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:71:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:72:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:73:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:74:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:75:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHL/VQRSHLQ cumulative saturation output: +VQRSHL/VQRSHLQ:76:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:77:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:78:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:79:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:80:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:81:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:82:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:83:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:84:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:85:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:86:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:87:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:88:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:89:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:90:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:91:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ output: +VQRSHL/VQRSHLQ:92:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VQRSHL/VQRSHLQ:93:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VQRSHL/VQRSHLQ:94:result_int32x2 [] = { fffff000, fffff100, } +VQRSHL/VQRSHLQ:95:result_int64x1 [] = { ffffffffffffff80, } +VQRSHL/VQRSHLQ:96:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:97:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:98:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:99:result_uint64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:100:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:101:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:102:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:103:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQRSHL/VQRSHLQ:104:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQRSHL/VQRSHLQ:105:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQRSHL/VQRSHLQ:106:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQRSHL/VQRSHLQ:107:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:108:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:109:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:110:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQRSHL/VQRSHLQ:111:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:112:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:113:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHL/VQRSHLQ (negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:114:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:115:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:116:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:117:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:118:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:119:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:120:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:121:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:122:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:123:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:124:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:125:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:126:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:127:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:128:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:129:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (negative shift amount) output: +VQRSHL/VQRSHLQ:130:result_int8x8 [] = { fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe, } +VQRSHL/VQRSHLQ:131:result_int16x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd, } +VQRSHL/VQRSHLQ:132:result_int32x2 [] = { fffffffe, fffffffe, } +VQRSHL/VQRSHLQ:133:result_int64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:134:result_uint8x8 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, } +VQRSHL/VQRSHLQ:135:result_uint16x4 [] = { 3ffc, 3ffc, 3ffd, 3ffd, } +VQRSHL/VQRSHLQ:136:result_uint32x2 [] = { 1ffffffe, 1ffffffe, } +VQRSHL/VQRSHLQ:137:result_uint64x1 [] = { fffffffffffffff, } +VQRSHL/VQRSHLQ:138:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:139:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:140:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:141:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:142:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:143:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:144:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:145:result_uint8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, } +VQRSHL/VQRSHLQ:146:result_uint16x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VQRSHL/VQRSHLQ:147:result_uint32x4 [] = { 80000, 80000, 80000, 80000, } +VQRSHL/VQRSHLQ:148:result_uint64x2 [] = { 100000000000, 100000000000, } +VQRSHL/VQRSHLQ:149:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:150:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:151:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) cumulative saturation output: +VQRSHL/VQRSHLQ:152:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:153:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:154:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:155:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:156:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:157:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:158:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:159:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:160:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:161:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:162:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:163:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:164:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:165:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:166:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:167:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) output: +VQRSHL/VQRSHLQ:168:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VQRSHL/VQRSHLQ:169:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VQRSHL/VQRSHLQ:170:result_int32x2 [] = { 40000000, 40000000, } +VQRSHL/VQRSHLQ:171:result_int64x1 [] = { 4000000000000000, } +VQRSHL/VQRSHLQ:172:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VQRSHL/VQRSHLQ:173:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VQRSHL/VQRSHLQ:174:result_uint32x2 [] = { 80000000, 80000000, } +VQRSHL/VQRSHLQ:175:result_uint64x1 [] = { 8000000000000000, } +VQRSHL/VQRSHLQ:176:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:177:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:178:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:179:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VQRSHL/VQRSHLQ:180:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VQRSHL/VQRSHLQ:181:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VQRSHL/VQRSHLQ:182:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VQRSHL/VQRSHLQ:183:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VQRSHL/VQRSHLQ:184:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VQRSHL/VQRSHLQ:185:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQRSHL/VQRSHLQ:186:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VQRSHL/VQRSHLQ:187:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:188:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:189:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) cumulative saturation output: +VQRSHL/VQRSHLQ:190:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:191:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:192:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:193:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:194:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:195:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:196:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:197:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:198:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:199:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:200:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:201:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:202:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:203:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:204:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:205:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) output: +VQRSHL/VQRSHLQ:206:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VQRSHL/VQRSHLQ:207:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VQRSHL/VQRSHLQ:208:result_int32x2 [] = { 10000000, 10000000, } +VQRSHL/VQRSHLQ:209:result_int64x1 [] = { 1000000000000000, } +VQRSHL/VQRSHLQ:210:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VQRSHL/VQRSHLQ:211:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VQRSHL/VQRSHLQ:212:result_uint32x2 [] = { 20000000, 20000000, } +VQRSHL/VQRSHLQ:213:result_uint64x1 [] = { 2000000000000000, } +VQRSHL/VQRSHLQ:214:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:215:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:216:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:217:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VQRSHL/VQRSHLQ:218:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VQRSHL/VQRSHLQ:219:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VQRSHL/VQRSHLQ:220:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VQRSHL/VQRSHLQ:221:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VQRSHL/VQRSHLQ:222:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VQRSHL/VQRSHLQ:223:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VQRSHL/VQRSHLQ:224:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VQRSHL/VQRSHLQ:225:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:226:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:227:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:228:vqrshl_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:229:vqrshl_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:230:vqrshl_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:231:vqrshl_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:232:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:233:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:234:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:235:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:236:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:237:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:238:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:239:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:240:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:241:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:242:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:243:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) output: +VQRSHL/VQRSHLQ:244:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHL/VQRSHLQ:245:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRSHL/VQRSHLQ:246:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRSHL/VQRSHLQ:247:result_int64x1 [] = { 7fffffffffffffff, } +VQRSHL/VQRSHLQ:248:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:249:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:250:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:251:result_uint64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:252:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:253:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:254:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:255:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHL/VQRSHLQ:256:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRSHL/VQRSHLQ:257:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRSHL/VQRSHLQ:258:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQRSHL/VQRSHLQ:259:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:260:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:261:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:262:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQRSHL/VQRSHLQ:263:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:264:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:265:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) cumulative saturation output: +VQRSHL/VQRSHLQ:266:vqrshl_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:267:vqrshl_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:268:vqrshl_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:269:vqrshl_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:270:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:271:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:272:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:273:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:274:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:275:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:276:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:277:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:278:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:279:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:280:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:281:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) output: +VQRSHL/VQRSHLQ:282:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQRSHL/VQRSHLQ:283:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQRSHL/VQRSHLQ:284:result_int32x2 [] = { 80000000, 80000000, } +VQRSHL/VQRSHLQ:285:result_int64x1 [] = { 8000000000000000, } +VQRSHL/VQRSHLQ:286:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:287:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:288:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:289:result_uint64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:290:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:291:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:292:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:293:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQRSHL/VQRSHLQ:294:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQRSHL/VQRSHLQ:295:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQRSHL/VQRSHLQ:296:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQRSHL/VQRSHLQ:297:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:298:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:299:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:300:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQRSHL/VQRSHLQ:301:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:302:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:303:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:304:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:305:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:306:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:307:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:308:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:309:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:310:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:311:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:312:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:313:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:314:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:315:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:316:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:317:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:318:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:319:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) output: +VQRSHL/VQRSHLQ:320:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:321:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:322:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:323:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:324:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:325:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:326:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:327:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:328:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:329:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:330:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:331:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:332:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:333:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:334:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:335:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:336:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:337:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:338:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:339:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:340:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:341:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) cumulative saturation output: +VQRSHL/VQRSHLQ:342:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:343:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:344:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:345:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:346:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:347:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:348:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:349:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:350:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:351:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:352:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:353:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:354:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:355:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:356:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:357:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) output: +VQRSHL/VQRSHLQ:358:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:359:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:360:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:361:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:362:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:363:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:364:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:365:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:366:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:367:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:368:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:369:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:370:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:371:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:372:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:373:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:374:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:375:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:376:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:377:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:378:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:379:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VABA/VABAQ output: +VABA/VABAQ:0:result_int8x8 [] = { fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, } +VABA/VABAQ:1:result_int16x4 [] = { 16, 17, 18, 19, } +VABA/VABAQ:2:result_int32x2 [] = { 20, 21, } +VABA/VABAQ:3:result_int64x1 [] = { 3333333333333333, } +VABA/VABAQ:4:result_uint8x8 [] = { 53, 54, 55, 56, 57, 58, 59, 5a, } +VABA/VABAQ:5:result_uint16x4 [] = { 907, 908, 909, 90a, } +VABA/VABAQ:6:result_uint32x2 [] = { ffffffe7, ffffffe8, } +VABA/VABAQ:7:result_uint64x1 [] = { 3333333333333333, } +VABA/VABAQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABA/VABAQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABA/VABAQ:10:result_float32x2 [] = { 33333333, 33333333, } +VABA/VABAQ:11:result_int8x16 [] = { 5e, 5f, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 6a, 6b, 6c, 6d, } +VABA/VABAQ:12:result_int16x8 [] = { b9c, b9d, b9e, b9f, ba0, ba1, ba2, ba3, } +VABA/VABAQ:13:result_int32x4 [] = { 26e0, 26e1, 26e2, 26e3, } +VABA/VABAQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VABA/VABAQ:15:result_uint8x16 [] = { f8, f9, fa, fb, fc, fd, fe, ff, 0, 1, 2, 3, 4, 5, 6, 7, } +VABA/VABAQ:16:result_uint16x8 [] = { fff9, fffa, fffb, fffc, fffd, fffe, ffff, 0, } +VABA/VABAQ:17:result_uint32x4 [] = { c, d, e, f, } +VABA/VABAQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VABA/VABAQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABA/VABAQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABA/VABAQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VABAL output: +VABAL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:2:result_int32x2 [] = { 33333333, 33333333, } +VABAL:3:result_int64x1 [] = { 3333333333333333, } +VABAL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:6:result_uint32x2 [] = { 33333333, 33333333, } +VABAL:7:result_uint64x1 [] = { 3333333333333333, } +VABAL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:10:result_float32x2 [] = { 33333333, 33333333, } +VABAL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:12:result_int16x8 [] = { fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, } +VABAL:13:result_int32x4 [] = { 16, 17, 18, 19, } +VABAL:14:result_int64x2 [] = { 20, 21, } +VABAL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:16:result_uint16x8 [] = { 53, 54, 55, 56, 57, 58, 59, 5a, } +VABAL:17:result_uint32x4 [] = { 907, 908, 909, 90a, } +VABAL:18:result_uint64x2 [] = { ffffffe7, ffffffe8, } +VABAL:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABAL:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VABAL test intermediate overflow output: +VABAL:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:23:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:24:result_int32x2 [] = { 33333333, 33333333, } +VABAL:25:result_int64x1 [] = { 3333333333333333, } +VABAL:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:28:result_uint32x2 [] = { 33333333, 33333333, } +VABAL:29:result_uint64x1 [] = { 3333333333333333, } +VABAL:30:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:31:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:32:result_float32x2 [] = { 33333333, 33333333, } +VABAL:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:34:result_int16x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6, } +VABAL:35:result_int32x4 [] = { ffef, fff0, fff1, fff2, } +VABAL:36:result_int64x2 [] = { ffffffef, fffffff0, } +VABAL:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:38:result_uint16x8 [] = { ee, ef, f0, f1, f2, f3, f4, f5, } +VABAL:39:result_uint32x4 [] = { ffe2, ffe3, ffe4, ffe5, } +VABAL:40:result_uint64x2 [] = { ffffffe7, ffffffe8, } +VABAL:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:42:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABAL:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VABD/VABDQ output: +VABD/VABDQ:0:result_int8x8 [] = { 11, 10, f, e, d, c, b, a, } +VABD/VABDQ:1:result_int16x4 [] = { 3, 2, 1, 0, } +VABD/VABDQ:2:result_int32x2 [] = { 18, 17, } +VABD/VABDQ:3:result_int64x1 [] = { 3333333333333333, } +VABD/VABDQ:4:result_uint8x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6, } +VABD/VABDQ:5:result_uint16x4 [] = { ffe3, ffe4, ffe5, ffe6, } +VABD/VABDQ:6:result_uint32x2 [] = { ffffffe8, ffffffe9, } +VABD/VABDQ:7:result_uint64x1 [] = { 3333333333333333, } +VABD/VABDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABD/VABDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABD/VABDQ:10:result_float32x2 [] = { 41c26666, 41ba6666, } +VABD/VABDQ:11:result_int8x16 [] = { 1a, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, f, e, d, c, b, } +VABD/VABDQ:12:result_int16x8 [] = { 4, 3, 2, 1, 0, 1, 2, 3, } +VABD/VABDQ:13:result_int32x4 [] = { 30, 2f, 2e, 2d, } +VABD/VABDQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VABD/VABDQ:15:result_uint8x16 [] = { e6, e7, e8, e9, ea, eb, ec, ed, ee, ef, f0, f1, f2, f3, f4, f5, } +VABD/VABDQ:16:result_uint16x8 [] = { ffe4, ffe5, ffe6, ffe7, ffe8, ffe9, ffea, ffeb, } +VABD/VABDQ:17:result_uint32x4 [] = { ffffffd0, ffffffd1, ffffffd2, ffffffd3, } +VABD/VABDQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VABD/VABDQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABD/VABDQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABD/VABDQ:21:result_float32x4 [] = { 42407ae1, 423c7ae1, 42387ae1, 42347ae1, } +VABD/VABDQ FP special (-0.0):22:result_float32x4 [] = { 0, 0, 0, 0, } +VABD/VABDQ FP special (-0.0):23:result_float32x4 [] = { 0, 0, 0, 0, } + +VABDL output: +VABDL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VABDL:2:result_int32x2 [] = { 33333333, 33333333, } +VABDL:3:result_int64x1 [] = { 3333333333333333, } +VABDL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABDL:6:result_uint32x2 [] = { 33333333, 33333333, } +VABDL:7:result_uint64x1 [] = { 3333333333333333, } +VABDL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABDL:10:result_float32x2 [] = { 33333333, 33333333, } +VABDL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:12:result_int16x8 [] = { 11, 10, f, e, d, c, b, a, } +VABDL:13:result_int32x4 [] = { 3, 2, 1, 0, } +VABDL:14:result_int64x2 [] = { 18, 17, } +VABDL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:16:result_uint16x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6, } +VABDL:17:result_uint32x4 [] = { ffe3, ffe4, ffe5, ffe6, } +VABDL:18:result_uint64x2 [] = { ffffffe8, ffffffe9, } +VABDL:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABDL:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VAND/VANDQ output: +VAND/VANDQ:0:result_int8x8 [] = { 0, 0, 2, 2, 0, 0, 2, 2, } +VAND/VANDQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VAND/VANDQ:2:result_int32x2 [] = { 0, 1, } +VAND/VANDQ:3:result_int64x1 [] = { 60, } +VAND/VANDQ:4:result_uint8x8 [] = { 10, 10, 10, 10, 14, 14, 14, 14, } +VAND/VANDQ:5:result_uint16x4 [] = { 10, 10, 12, 12, } +VAND/VANDQ:6:result_uint32x2 [] = { 20, 20, } +VAND/VANDQ:7:result_uint64x1 [] = { 0, } +VAND/VANDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VAND/VANDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VAND/VANDQ:10:result_float32x2 [] = { 33333333, 33333333, } +VAND/VANDQ:11:result_int8x16 [] = { fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, } +VAND/VANDQ:12:result_int16x8 [] = { ffffffe0, ffffffe0, ffffffe0, ffffffe0, ffffffe4, ffffffe4, ffffffe4, ffffffe4, } +VAND/VANDQ:13:result_int32x4 [] = { ffffffe0, ffffffe0, ffffffe2, ffffffe2, } +VAND/VANDQ:14:result_int64x2 [] = { 10, 10, } +VAND/VANDQ:15:result_uint8x16 [] = { 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, c, c, c, c, } +VAND/VANDQ:16:result_uint16x8 [] = { 0, 1, 2, 3, 0, 1, 2, 3, } +VAND/VANDQ:17:result_uint32x4 [] = { 30, 31, 32, 33, } +VAND/VANDQ:18:result_uint64x2 [] = { 0, 1, } +VAND/VANDQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VAND/VANDQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VAND/VANDQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VORR/VORRQ output: +VORR/VORRQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff2, fffffff3, fffffff6, fffffff7, fffffff6, fffffff7, } +VORR/VORRQ:1:result_int16x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VORR/VORRQ:2:result_int32x2 [] = { fffffff3, fffffff3, } +VORR/VORRQ:3:result_int64x1 [] = { fffffffffffffff4, } +VORR/VORRQ:4:result_uint8x8 [] = { f4, f5, f6, f7, f4, f5, f6, f7, } +VORR/VORRQ:5:result_uint16x4 [] = { fffe, ffff, fffe, ffff, } +VORR/VORRQ:6:result_uint32x2 [] = { fffffff8, fffffff9, } +VORR/VORRQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VORR/VORRQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VORR/VORRQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VORR/VORRQ:10:result_float32x2 [] = { 33333333, 33333333, } +VORR/VORRQ:11:result_int8x16 [] = { fffffff6, fffffff7, fffffff6, fffffff7, fffffff6, fffffff7, fffffff6, fffffff7, fffffffe, ffffffff, fffffffe, ffffffff, fffffffe, ffffffff, fffffffe, ffffffff, } +VORR/VORRQ:12:result_int16x8 [] = { fffffffc, fffffffd, fffffffe, ffffffff, fffffffc, fffffffd, fffffffe, ffffffff, } +VORR/VORRQ:13:result_int32x4 [] = { fffffff2, fffffff3, fffffff2, fffffff3, } +VORR/VORRQ:14:result_int64x2 [] = { fffffffffffffff8, fffffffffffffff9, } +VORR/VORRQ:15:result_uint8x16 [] = { fc, fd, fe, ff, fc, fd, fe, ff, fc, fd, fe, ff, fc, fd, fe, ff, } +VORR/VORRQ:16:result_uint16x8 [] = { fff3, fff3, fff3, fff3, fff7, fff7, fff7, fff7, } +VORR/VORRQ:17:result_uint32x4 [] = { fffffff7, fffffff7, fffffff7, fffffff7, } +VORR/VORRQ:18:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff3, } +VORR/VORRQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VORR/VORRQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VORR/VORRQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VORN/VORNQ output: +VORN/VORNQ:0:result_int8x8 [] = { fffffffd, fffffffd, ffffffff, ffffffff, fffffffd, fffffffd, ffffffff, ffffffff, } +VORN/VORNQ:1:result_int16x4 [] = { fffffff3, fffffff3, fffffff3, fffffff3, } +VORN/VORNQ:2:result_int32x2 [] = { fffffffc, fffffffd, } +VORN/VORNQ:3:result_int64x1 [] = { fffffffffffffffb, } +VORN/VORNQ:4:result_uint8x8 [] = { fb, fb, fb, fb, ff, ff, ff, ff, } +VORN/VORNQ:5:result_uint16x4 [] = { fff1, fff1, fff3, fff3, } +VORN/VORNQ:6:result_uint32x2 [] = { fffffff7, fffffff7, } +VORN/VORNQ:7:result_uint64x1 [] = { fffffffffffffffd, } +VORN/VORNQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VORN/VORNQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VORN/VORNQ:10:result_float32x2 [] = { 33333333, 33333333, } +VORN/VORNQ:11:result_int8x16 [] = { fffffff9, fffffff9, fffffffb, fffffffb, fffffffd, fffffffd, ffffffff, ffffffff, fffffff9, fffffff9, fffffffb, fffffffb, fffffffd, fffffffd, ffffffff, ffffffff, } +VORN/VORNQ:12:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff7, fffffff7, fffffff7, fffffff7, } +VORN/VORNQ:13:result_int32x4 [] = { fffffffd, fffffffd, ffffffff, ffffffff, } +VORN/VORNQ:14:result_int64x2 [] = { fffffffffffffff7, fffffffffffffff7, } +VORN/VORNQ:15:result_uint8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, fb, fb, fb, fb, ff, ff, ff, ff, } +VORN/VORNQ:16:result_uint16x8 [] = { fffc, fffd, fffe, ffff, fffc, fffd, fffe, ffff, } +VORN/VORNQ:17:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VORN/VORNQ:18:result_uint64x2 [] = { fffffffffffffffc, fffffffffffffffd, } +VORN/VORNQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VORN/VORNQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VORN/VORNQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VEOR/VEORQ output: +VEOR/VEORQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff0, fffffff1, fffffff6, fffffff7, fffffff4, fffffff5, } +VEOR/VEORQ:1:result_int16x4 [] = { c, d, e, f, } +VEOR/VEORQ:2:result_int32x2 [] = { fffffff3, fffffff2, } +VEOR/VEORQ:3:result_int64x1 [] = { ffffffffffffff94, } +VEOR/VEORQ:4:result_uint8x8 [] = { e4, e5, e6, e7, e0, e1, e2, e3, } +VEOR/VEORQ:5:result_uint16x4 [] = { ffee, ffef, ffec, ffed, } +VEOR/VEORQ:6:result_uint32x2 [] = { ffffffd8, ffffffd9, } +VEOR/VEORQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VEOR/VEORQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VEOR/VEORQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VEOR/VEORQ:10:result_float32x2 [] = { 33333333, 33333333, } +VEOR/VEORQ:11:result_int8x16 [] = { 6, 7, 4, 5, 2, 3, 0, 1, e, f, c, d, a, b, 8, 9, } +VEOR/VEORQ:12:result_int16x8 [] = { 1c, 1d, 1e, 1f, 18, 19, 1a, 1b, } +VEOR/VEORQ:13:result_int32x4 [] = { 12, 13, 10, 11, } +VEOR/VEORQ:14:result_int64x2 [] = { ffffffffffffffe8, ffffffffffffffe9, } +VEOR/VEORQ:15:result_uint8x16 [] = { fc, fd, fe, ff, f8, f9, fa, fb, f4, f5, f6, f7, f0, f1, f2, f3, } +VEOR/VEORQ:16:result_uint16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4, } +VEOR/VEORQ:17:result_uint32x4 [] = { ffffffc7, ffffffc6, ffffffc5, ffffffc4, } +VEOR/VEORQ:18:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff2, } +VEOR/VEORQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VEOR/VEORQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VEOR/VEORQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VBIC/VBICQ output: +VBIC/VBICQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff4, fffffff5, fffffff4, fffffff5, } +VBIC/VBICQ:1:result_int16x4 [] = { 0, 1, 2, 3, } +VBIC/VBICQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VBIC/VBICQ:3:result_int64x1 [] = { ffffffffffffff90, } +VBIC/VBICQ:4:result_uint8x8 [] = { e0, e1, e2, e3, e0, e1, e2, e3, } +VBIC/VBICQ:5:result_uint16x4 [] = { ffe0, ffe1, ffe0, ffe1, } +VBIC/VBICQ:6:result_uint32x2 [] = { ffffffd0, ffffffd1, } +VBIC/VBICQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VBIC/VBICQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VBIC/VBICQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VBIC/VBICQ:10:result_float32x2 [] = { 33333333, 33333333, } +VBIC/VBICQ:11:result_int8x16 [] = { 0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9, } +VBIC/VBICQ:12:result_int16x8 [] = { 10, 11, 12, 13, 10, 11, 12, 13, } +VBIC/VBICQ:13:result_int32x4 [] = { 10, 11, 10, 11, } +VBIC/VBICQ:14:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1, } +VBIC/VBICQ:15:result_uint8x16 [] = { f0, f1, f2, f3, f0, f1, f2, f3, f0, f1, f2, f3, f0, f1, f2, f3, } +VBIC/VBICQ:16:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff4, fff4, fff4, fff4, } +VBIC/VBICQ:17:result_uint32x4 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VBIC/VBICQ:18:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VBIC/VBICQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VBIC/VBICQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VBIC/VBICQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VCREATE output: +VCREATE:0:result_int8x8 [] = { fffffff0, ffffffde, ffffffbc, ffffff9a, 78, 56, 34, 12, } +VCREATE:1:result_int16x4 [] = { ffffdef0, ffff9abc, 5678, 1234, } +VCREATE:2:result_int32x2 [] = { 9abcdef0, 12345678, } +VCREATE:3:result_int64x1 [] = { 123456789abcdef0, } +VCREATE:4:result_uint8x8 [] = { f0, de, bc, 9a, 78, 56, 34, 12, } +VCREATE:5:result_uint16x4 [] = { def0, 9abc, 5678, 1234, } +VCREATE:6:result_uint32x2 [] = { 9abcdef0, 12345678, } +VCREATE:7:result_uint64x1 [] = { 123456789abcdef0, } +VCREATE:8:result_poly8x8 [] = { f0, de, bc, 9a, 78, 56, 34, 12, } +VCREATE:9:result_poly16x4 [] = { def0, 9abc, 5678, 1234, } +VCREATE:10:result_float32x2 [] = { 9abcdef0, 12345678, } +VCREATE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCREATE:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCREATE:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCREATE:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCREATE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCREATE:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCREATE:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCREATE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCREATE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCREATE:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCREATE:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VLD2_LANE/VLD2Q_LANE chunk 0 output: +VLD2_LANE/VLD2Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD2_LANE/VLD2Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2_LANE/VLD2Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:8:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:9:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VLD2_LANE/VLD2Q_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:12:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:13:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:16:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:17:result_uint32x4 [] = { fffffff0, fffffff1, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:20:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:21:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } + +VLD2_LANE/VLD2Q_LANE chunk 1 output: +VLD2_LANE/VLD2Q_LANE:22:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, } +VLD2_LANE/VLD2Q_LANE:23:result_int16x4 [] = { fffffff0, fffffff1, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:24:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:25:result_int64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:26:result_uint8x8 [] = { f0, f1, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:27:result_uint16x4 [] = { aaaa, aaaa, fff0, fff1, } +VLD2_LANE/VLD2Q_LANE:28:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2_LANE/VLD2Q_LANE:29:result_uint64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:30:result_poly8x8 [] = { f0, f1, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:31:result_poly16x4 [] = { aaaa, aaaa, fff0, fff1, } +VLD2_LANE/VLD2Q_LANE:32:result_float32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:34:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, fffffff1, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:35:result_int32x4 [] = { fffffff0, fffffff1, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:38:result_uint16x8 [] = { aaaa, aaaa, fff0, fff1, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:39:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:42:result_poly16x8 [] = { aaaa, aaaa, fff0, fff1, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:43:result_float32x4 [] = { c1800000, c1700000, aaaaaaaa, aaaaaaaa, } + +VLD3_LANE/VLD3Q_LANE chunk 0 output: +VLD3_LANE/VLD3Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD3_LANE/VLD3Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD3_LANE/VLD3Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:8:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:9:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VLD3_LANE/VLD3Q_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:12:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:13:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:16:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:20:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:21:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } + +VLD3_LANE/VLD3Q_LANE chunk 1 output: +VLD3_LANE/VLD3Q_LANE:22:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD3_LANE/VLD3Q_LANE:23:result_int16x4 [] = { ffffaaaa, ffffaaaa, fffffff0, fffffff1, } +VLD3_LANE/VLD3Q_LANE:24:result_int32x2 [] = { fffffff2, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:25:result_int64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:26:result_uint8x8 [] = { aa, aa, aa, aa, f0, f1, f2, aa, } +VLD3_LANE/VLD3Q_LANE:27:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:28:result_uint32x2 [] = { aaaaaaaa, fffffff0, } +VLD3_LANE/VLD3Q_LANE:29:result_uint64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:30:result_poly8x8 [] = { aa, aa, aa, aa, f0, f1, f2, aa, } +VLD3_LANE/VLD3Q_LANE:31:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:32:result_float32x2 [] = { c1600000, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:34:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:35:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, fffffff1, } +VLD3_LANE/VLD3Q_LANE:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:38:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, } +VLD3_LANE/VLD3Q_LANE:39:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:42:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, } +VLD3_LANE/VLD3Q_LANE:43:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, c1800000, c1700000, } + +VLD3_LANE/VLD3Q_LANE chunk 2 output: +VLD3_LANE/VLD3Q_LANE:44:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, fffffff2, } +VLD3_LANE/VLD3Q_LANE:45:result_int16x4 [] = { fffffff2, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:46:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:47:result_int64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:48:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:49:result_uint16x4 [] = { aaaa, fff0, fff1, fff2, } +VLD3_LANE/VLD3Q_LANE:50:result_uint32x2 [] = { fffffff1, fffffff2, } +VLD3_LANE/VLD3Q_LANE:51:result_uint64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:52:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:53:result_poly16x4 [] = { aaaa, fff0, fff1, fff2, } +VLD3_LANE/VLD3Q_LANE:54:result_float32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:55:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:56:result_int16x8 [] = { ffffaaaa, ffffaaaa, fffffff0, fffffff1, fffffff2, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:57:result_int32x4 [] = { fffffff2, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:58:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:59:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:60:result_uint16x8 [] = { fff1, fff2, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:61:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:62:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:63:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:64:result_poly16x8 [] = { fff1, fff2, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:65:result_float32x4 [] = { c1600000, aaaaaaaa, aaaaaaaa, aaaaaaaa, } + +VLD4_LANE/VLD4Q_LANE chunk 0 output: +VLD4_LANE/VLD4Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD4_LANE/VLD4Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4_LANE/VLD4Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:8:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:9:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VLD4_LANE/VLD4Q_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:12:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:13:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:16:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:20:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:21:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } + +VLD4_LANE/VLD4Q_LANE chunk 1 output: +VLD4_LANE/VLD4Q_LANE:22:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD4_LANE/VLD4Q_LANE:23:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:24:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:25:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:26:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:27:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:28:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:29:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:30:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:31:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:32:result_float32x2 [] = { c1600000, c1500000, } +VLD4_LANE/VLD4Q_LANE:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:34:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:35:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:38:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:39:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:42:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:43:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } + +VLD4_LANE/VLD4Q_LANE chunk 2 output: +VLD4_LANE/VLD4Q_LANE:44:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD4_LANE/VLD4Q_LANE:45:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:46:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:47:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:48:result_uint8x8 [] = { f0, f1, f2, f3, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:49:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:50:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4_LANE/VLD4Q_LANE:51:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:52:result_poly8x8 [] = { f0, f1, f2, f3, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:53:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:54:result_float32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:55:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:56:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:57:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:58:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:59:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:60:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:61:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:62:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:63:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:64:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:65:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } + +VLD4_LANE/VLD4Q_LANE chunk 3 output: +VLD4_LANE/VLD4Q_LANE:66:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:67:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:68:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:69:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:70:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:71:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:72:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:73:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:74:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:75:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:76:result_float32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:77:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:78:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:79:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:80:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:81:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:82:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:83:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:84:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:85:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:86:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:87:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } + +VLD2_DUP/VLD2Q_DUP chunk 0 output: +VLD2_DUP/VLD2Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:3:result_int64x1 [] = { fffffffffffffff0, } +VLD2_DUP/VLD2Q_DUP:4:result_uint8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD2_DUP/VLD2Q_DUP:8:result_poly8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:9:result_poly16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:10:result_float32x2 [] = { c1800000, c1700000, } +VLD2_DUP/VLD2Q_DUP:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VLD2_DUP/VLD2Q_DUP chunk 1 output: +VLD2_DUP/VLD2Q_DUP:22:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:23:result_int16x4 [] = { fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:24:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:25:result_int64x1 [] = { fffffffffffffff1, } +VLD2_DUP/VLD2Q_DUP:26:result_uint8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:27:result_uint16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:28:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:29:result_uint64x1 [] = { fffffffffffffff1, } +VLD2_DUP/VLD2Q_DUP:30:result_poly8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:31:result_poly16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:32:result_float32x2 [] = { c1800000, c1700000, } +VLD2_DUP/VLD2Q_DUP:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:34:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:35:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:38:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:39:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:42:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VLD3_DUP/VLD3Q_DUP chunk 0 output: +VLD3_DUP/VLD3Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:3:result_int64x1 [] = { fffffffffffffff0, } +VLD3_DUP/VLD3Q_DUP:4:result_uint8x8 [] = { f0, f1, f2, f0, f1, f2, f0, f1, } +VLD3_DUP/VLD3Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff2, fff0, } +VLD3_DUP/VLD3Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD3_DUP/VLD3Q_DUP:8:result_poly8x8 [] = { f0, f1, f2, f0, f1, f2, f0, f1, } +VLD3_DUP/VLD3Q_DUP:9:result_poly16x4 [] = { fff0, fff1, fff2, fff0, } +VLD3_DUP/VLD3Q_DUP:10:result_float32x2 [] = { c1800000, c1700000, } +VLD3_DUP/VLD3Q_DUP:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VLD3_DUP/VLD3Q_DUP chunk 1 output: +VLD3_DUP/VLD3Q_DUP:22:result_int8x8 [] = { fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:23:result_int16x4 [] = { fffffff1, fffffff2, fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:24:result_int32x2 [] = { fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:25:result_int64x1 [] = { fffffffffffffff1, } +VLD3_DUP/VLD3Q_DUP:26:result_uint8x8 [] = { f2, f0, f1, f2, f0, f1, f2, f0, } +VLD3_DUP/VLD3Q_DUP:27:result_uint16x4 [] = { fff1, fff2, fff0, fff1, } +VLD3_DUP/VLD3Q_DUP:28:result_uint32x2 [] = { fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:29:result_uint64x1 [] = { fffffffffffffff1, } +VLD3_DUP/VLD3Q_DUP:30:result_poly8x8 [] = { f2, f0, f1, f2, f0, f1, f2, f0, } +VLD3_DUP/VLD3Q_DUP:31:result_poly16x4 [] = { fff1, fff2, fff0, fff1, } +VLD3_DUP/VLD3Q_DUP:32:result_float32x2 [] = { c1600000, c1800000, } +VLD3_DUP/VLD3Q_DUP:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:34:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:35:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:38:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:39:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:42:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VLD3_DUP/VLD3Q_DUP chunk 2 output: +VLD3_DUP/VLD3Q_DUP:44:result_int8x8 [] = { fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:45:result_int16x4 [] = { fffffff2, fffffff0, fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:46:result_int32x2 [] = { fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:47:result_int64x1 [] = { fffffffffffffff2, } +VLD3_DUP/VLD3Q_DUP:48:result_uint8x8 [] = { f1, f2, f0, f1, f2, f0, f1, f2, } +VLD3_DUP/VLD3Q_DUP:49:result_uint16x4 [] = { fff2, fff0, fff1, fff2, } +VLD3_DUP/VLD3Q_DUP:50:result_uint32x2 [] = { fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:51:result_uint64x1 [] = { fffffffffffffff2, } +VLD3_DUP/VLD3Q_DUP:52:result_poly8x8 [] = { f1, f2, f0, f1, f2, f0, f1, f2, } +VLD3_DUP/VLD3Q_DUP:53:result_poly16x4 [] = { fff2, fff0, fff1, fff2, } +VLD3_DUP/VLD3Q_DUP:54:result_float32x2 [] = { c1700000, c1600000, } +VLD3_DUP/VLD3Q_DUP:55:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:56:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:57:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:58:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:59:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:60:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:61:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:62:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:63:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:64:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:65:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VLD4_DUP/VLD4Q_DUP chunk 0 output: +VLD4_DUP/VLD4Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:3:result_int64x1 [] = { fffffffffffffff0, } +VLD4_DUP/VLD4Q_DUP:4:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD4_DUP/VLD4Q_DUP:8:result_poly8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:10:result_float32x2 [] = { c1800000, c1700000, } +VLD4_DUP/VLD4Q_DUP:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VLD4_DUP/VLD4Q_DUP chunk 1 output: +VLD4_DUP/VLD4Q_DUP:22:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:23:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:24:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:25:result_int64x1 [] = { fffffffffffffff1, } +VLD4_DUP/VLD4Q_DUP:26:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:27:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:28:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:29:result_uint64x1 [] = { fffffffffffffff1, } +VLD4_DUP/VLD4Q_DUP:30:result_poly8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:31:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:32:result_float32x2 [] = { c1600000, c1500000, } +VLD4_DUP/VLD4Q_DUP:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:34:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:35:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:38:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:39:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:42:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VLD4_DUP/VLD4Q_DUP chunk 2 output: +VLD4_DUP/VLD4Q_DUP:44:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:45:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:46:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:47:result_int64x1 [] = { fffffffffffffff2, } +VLD4_DUP/VLD4Q_DUP:48:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:49:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:50:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:51:result_uint64x1 [] = { fffffffffffffff2, } +VLD4_DUP/VLD4Q_DUP:52:result_poly8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:53:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:54:result_float32x2 [] = { c1800000, c1700000, } +VLD4_DUP/VLD4Q_DUP:55:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:56:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:57:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:58:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:59:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:60:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:61:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:62:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:63:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:64:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:65:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VLD4_DUP/VLD4Q_DUP chunk 3 output: +VLD4_DUP/VLD4Q_DUP:66:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:67:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:68:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:69:result_int64x1 [] = { fffffffffffffff3, } +VLD4_DUP/VLD4Q_DUP:70:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:71:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:72:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:73:result_uint64x1 [] = { fffffffffffffff3, } +VLD4_DUP/VLD4Q_DUP:74:result_poly8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:75:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:76:result_float32x2 [] = { c1600000, c1500000, } +VLD4_DUP/VLD4Q_DUP:77:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:78:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:79:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:80:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:81:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:82:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:83:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:84:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:85:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:86:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:87:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VMLA output: +VMLA:0:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, } +VMLA:1:result_int16x4 [] = { 1f8c, 1f8d, 1f8e, 1f8f, } +VMLA:2:result_int32x2 [] = { 2bf7, 2bf8, } +VMLA:3:result_int64x1 [] = { 3333333333333333, } +VMLA:4:result_uint8x8 [] = { 20, 21, 22, 23, 24, 25, 26, 27, } +VMLA:5:result_uint16x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA:6:result_uint32x2 [] = { 43ac, 43ad, } +VMLA:7:result_uint64x1 [] = { 3333333333333333, } +VMLA:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLA:10:result_float32x2 [] = { 43a14e76, 43a1ce76, } +VMLA:11:result_int8x16 [] = { f, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, } +VMLA:12:result_int16x8 [] = { 4830, 4831, 4832, 4833, 4834, 4835, 4836, 4837, } +VMLA:13:result_int32x4 [] = { 470f, 4710, 4711, 4712, } +VMLA:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA:15:result_uint8x16 [] = { ac, ad, ae, af, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, } +VMLA:16:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLA:17:result_uint32x4 [] = { 3620, 3621, 3622, 3623, } +VMLA:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLA:21:result_float32x4 [] = { 45f0ae15, 45f0b615, 45f0be15, 45f0c615, } + +VMLS output: +VMLS:0:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8, } +VMLS:1:result_int16x4 [] = { ffffe054, ffffe055, ffffe056, ffffe057, } +VMLS:2:result_int32x2 [] = { ffffd3e9, ffffd3ea, } +VMLS:3:result_int64x1 [] = { 3333333333333333, } +VMLS:4:result_uint8x8 [] = { c0, c1, c2, c3, c4, c5, c6, c7, } +VMLS:5:result_uint16x4 [] = { c1d9, c1da, c1db, c1dc, } +VMLS:6:result_uint32x2 [] = { ffffbc34, ffffbc35, } +VMLS:7:result_uint64x1 [] = { 3333333333333333, } +VMLS:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLS:10:result_float32x2 [] = { c3b14e76, c3b0ce76, } +VMLS:11:result_int8x16 [] = { ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, ffffffd6, ffffffd7, ffffffd8, ffffffd9, ffffffda, ffffffdb, ffffffdc, ffffffdd, ffffffde, ffffffdf, ffffffe0, } +VMLS:12:result_int16x8 [] = { ffffb7b0, ffffb7b1, ffffb7b2, ffffb7b3, ffffb7b4, ffffb7b5, ffffb7b6, ffffb7b7, } +VMLS:13:result_int32x4 [] = { ffffb8d1, ffffb8d2, ffffb8d3, ffffb8d4, } +VMLS:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS:15:result_uint8x16 [] = { 34, 35, 36, 37, 38, 39, 3a, 3b, 3c, 3d, 3e, 3f, 40, 41, 42, 43, } +VMLS:16:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0, } +VMLS:17:result_uint32x4 [] = { ffffc9c0, ffffc9c1, ffffc9c2, ffffc9c3, } +VMLS:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLS:21:result_float32x4 [] = { c5f1ae15, c5f1a615, c5f19e15, c5f19615, } + +VMUL output: +VMUL:0:result_int8x8 [] = { fffffff0, 1, 12, 23, 34, 45, 56, 67, } +VMUL:1:result_int16x4 [] = { fffffde0, fffffe02, fffffe24, fffffe46, } +VMUL:2:result_int32x2 [] = { fffffcd0, fffffd03, } +VMUL:3:result_int64x1 [] = { 3333333333333333, } +VMUL:4:result_uint8x8 [] = { c0, 4, 48, 8c, d0, 14, 58, 9c, } +VMUL:5:result_uint16x4 [] = { fab0, fb05, fb5a, fbaf, } +VMUL:6:result_uint32x2 [] = { fffff9a0, fffffa06, } +VMUL:7:result_uint64x1 [] = { 3333333333333333, } +VMUL:8:result_poly8x8 [] = { c0, 84, 48, c, d0, 94, 58, 1c, } +VMUL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMUL:10:result_float32x2 [] = { c4053333, c3f9c000, } +VMUL:11:result_int8x16 [] = { ffffff90, 7, 7e, fffffff5, 6c, ffffffe3, 5a, ffffffd1, 48, ffffffbf, 36, ffffffad, 24, ffffff9b, 12, ffffff89, } +VMUL:12:result_int16x8 [] = { fffff780, fffff808, fffff890, fffff918, fffff9a0, fffffa28, fffffab0, fffffb38, } +VMUL:13:result_int32x4 [] = { fffff670, fffff709, fffff7a2, fffff83b, } +VMUL:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL:15:result_uint8x16 [] = { 60, a, b4, 5e, 8, b2, 5c, 6, b0, 5a, 4, ae, 58, 2, ac, 56, } +VMUL:16:result_uint16x8 [] = { f450, f50b, f5c6, f681, f73c, f7f7, f8b2, f96d, } +VMUL:17:result_uint32x4 [] = { fffff340, fffff40c, fffff4d8, fffff5a4, } +VMUL:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL:19:result_poly8x16 [] = { 60, ca, 34, 9e, c8, 62, 9c, 36, 30, 9a, 64, ce, 98, 32, cc, 66, } +VMUL:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMUL:21:result_float32x4 [] = { c4c73333, c4bac000, c4ae4ccd, c4a1d999, } + +VMUL_LANE output: +VMUL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:1:result_int16x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, } +VMUL_LANE:2:result_int32x2 [] = { fffffde0, fffffe02, } +VMUL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMUL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:5:result_uint16x4 [] = { bbc0, c004, c448, c88c, } +VMUL_LANE:6:result_uint32x2 [] = { fffface0, ffffb212, } +VMUL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMUL_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMUL_LANE:10:result_float32x2 [] = { c3b66666, c3ab0000, } +VMUL_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:12:result_int16x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, } +VMUL_LANE:13:result_int32x4 [] = { fffffde0, fffffe02, fffffe24, fffffe46, } +VMUL_LANE:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:16:result_uint16x8 [] = { bbc0, c004, c448, c88c, ccd0, d114, d558, d99c, } +VMUL_LANE:17:result_uint32x4 [] = { fffface0, ffffb212, ffffb744, ffffbc76, } +VMUL_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_LANE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMUL_LANE:21:result_float32x4 [] = { c3b66666, c3ab0000, c39f9999, c3943333, } + +VMUL_N output: +VMUL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:1:result_int16x4 [] = { fffffef0, ffffff01, ffffff12, ffffff23, } +VMUL_N:2:result_int32x2 [] = { fffffde0, fffffe02, } +VMUL_N:3:result_int64x1 [] = { 3333333333333333, } +VMUL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:5:result_uint16x4 [] = { fcd0, fd03, fd36, fd69, } +VMUL_N:6:result_uint32x2 [] = { fffffbc0, fffffc04, } +VMUL_N:7:result_uint64x1 [] = { 3333333333333333, } +VMUL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMUL_N:10:result_float32x2 [] = { c3b26666, c3a74000, } +VMUL_N:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:12:result_int16x8 [] = { fffffab0, fffffb05, fffffb5a, fffffbaf, fffffc04, fffffc59, fffffcae, fffffd03, } +VMUL_N:13:result_int32x4 [] = { fffff9a0, fffffa06, fffffa6c, fffffad2, } +VMUL_N:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_N:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:16:result_uint16x8 [] = { f890, f907, f97e, f9f5, fa6c, fae3, fb5a, fbd1, } +VMUL_N:17:result_uint32x4 [] = { fffff780, fffff808, fffff890, fffff918, } +VMUL_N:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMUL_N:21:result_float32x4 [] = { c4b1cccd, c4a6b000, c49b9333, c4907667, } + +VMULL_N output: +VMULL_N:0:result_int32x4 [] = { 11000, 11000, 11000, 11000, } +VMULL_N:1:result_int64x2 [] = { 22000, 22000, } +VMULL_N:2:result_uint32x4 [] = { 33000, 33000, 33000, 33000, } +VMULL_N:3:result_uint64x2 [] = { 44000, 44000, } + +VMLA_LANE output: +VMLA_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:1:result_int16x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:2:result_int32x2 [] = { 3e07, 3e08, } +VMLA_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLA_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:5:result_uint16x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:6:result_uint32x2 [] = { 3e07, 3e08, } +VMLA_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLA_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLA_LANE:10:result_float32x2 [] = { 4418c687, 44190687, } +VMLA_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:12:result_int16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLA_LANE:13:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:16:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLA_LANE:17:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_LANE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLA_LANE:21:result_float32x4 [] = { 441a3168, 441a7168, 441ab168, 441af168, } + +VMLS_LANE output: +VMLS_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:1:result_int16x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLS_LANE:2:result_int32x2 [] = { ffffc1d9, ffffc1da, } +VMLS_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLS_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:5:result_uint16x4 [] = { c1d9, c1da, c1db, c1dc, } +VMLS_LANE:6:result_uint32x2 [] = { ffffc1d9, ffffc1da, } +VMLS_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLS_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLS_LANE:10:result_float32x2 [] = { c420c687, c4208687, } +VMLS_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:12:result_int16x8 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, ffffc1dd, ffffc1de, ffffc1df, ffffc1e0, } +VMLS_LANE:13:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLS_LANE:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:16:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0, } +VMLS_LANE:17:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLS_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_LANE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLS_LANE:21:result_float32x4 [] = { c4223168, c421f168, c421b168, c4217168, } + +VMLA_N output: +VMLA_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:1:result_int16x4 [] = { 595, 596, 597, 598, } +VMLA_N:2:result_int32x2 [] = { b3a, b3b, } +VMLA_N:3:result_int64x1 [] = { 3333333333333333, } +VMLA_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:5:result_uint16x4 [] = { 10df, 10e0, 10e1, 10e2, } +VMLA_N:6:result_uint32x2 [] = { 1684, 1685, } +VMLA_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLA_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLA_N:10:result_float32x2 [] = { 4497deb8, 4497feb8, } +VMLA_N:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:12:result_int16x8 [] = { 1c29, 1c2a, 1c2b, 1c2c, 1c2d, 1c2e, 1c2f, 1c30, } +VMLA_N:13:result_int32x4 [] = { 21ce, 21cf, 21d0, 21d1, } +VMLA_N:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_N:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:16:result_uint16x8 [] = { 2773, 2774, 2775, 2776, 2777, 2778, 2779, 277a, } +VMLA_N:17:result_uint32x4 [] = { 2d18, 2d19, 2d1a, 2d1b, } +VMLA_N:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLA_N:21:result_float32x4 [] = { 4568087b, 4568187b, 4568287b, 4568387b, } + +VMLS_N output: +VMLS_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:1:result_int16x4 [] = { fffffa4b, fffffa4c, fffffa4d, fffffa4e, } +VMLS_N:2:result_int32x2 [] = { fffff4a6, fffff4a7, } +VMLS_N:3:result_int64x1 [] = { 3333333333333333, } +VMLS_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:5:result_uint16x4 [] = { ef01, ef02, ef03, ef04, } +VMLS_N:6:result_uint32x2 [] = { ffffe95c, ffffe95d, } +VMLS_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLS_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLS_N:10:result_float32x2 [] = { c49bdeb8, c49bbeb8, } +VMLS_N:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:12:result_int16x8 [] = { ffffe3b7, ffffe3b8, ffffe3b9, ffffe3ba, ffffe3bb, ffffe3bc, ffffe3bd, ffffe3be, } +VMLS_N:13:result_int32x4 [] = { ffffde12, ffffde13, ffffde14, ffffde15, } +VMLS_N:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_N:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:16:result_uint16x8 [] = { d86d, d86e, d86f, d870, d871, d872, d873, d874, } +VMLS_N:17:result_uint32x4 [] = { ffffd2c8, ffffd2c9, ffffd2ca, ffffd2cb, } +VMLS_N:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLS_N:21:result_float32x4 [] = { c56a087b, c569f87b, c569e87b, c569d87b, } + +VSLI_N output: +VSLI_N:0:result_int8x8 [] = { 20, 21, 22, 23, 24, 25, 26, 27, } +VSLI_N:1:result_int16x4 [] = { ffffffe0, ffffffe1, ffffffe2, ffffffe3, } +VSLI_N:2:result_int32x2 [] = { 6, 7, } +VSLI_N:3:result_int64x1 [] = { 64fffffff0, } +VSLI_N:4:result_uint8x8 [] = { 50, 51, 52, 53, 50, 51, 52, 53, } +VSLI_N:5:result_uint16x4 [] = { 7bf0, 7bf1, 7bf2, 7bf3, } +VSLI_N:6:result_uint32x2 [] = { 3ffffff0, 3ffffff1, } +VSLI_N:7:result_uint64x1 [] = { 10, } +VSLI_N:8:result_poly8x8 [] = { 50, 51, 52, 53, 50, 51, 52, 53, } +VSLI_N:9:result_poly16x4 [] = { 7bf0, 7bf1, 7bf2, 7bf3, } +VSLI_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSLI_N:11:result_int8x16 [] = { ffffffd0, ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, ffffffd6, ffffffd7, ffffffd8, ffffffd9, ffffffda, ffffffdb, ffffffdc, ffffffdd, ffffffde, ffffffdf, } +VSLI_N:12:result_int16x8 [] = { ffffff60, ffffff61, ffffff62, ffffff63, ffffff64, ffffff65, ffffff66, ffffff67, } +VSLI_N:13:result_int32x4 [] = { fe2ffff0, fe2ffff1, fe2ffff2, fe2ffff3, } +VSLI_N:14:result_int64x2 [] = { 18fff0, 18fff1, } +VSLI_N:15:result_uint8x16 [] = { 60, 61, 62, 63, 64, 65, 66, 67, 60, 61, 62, 63, 64, 65, 66, 67, } +VSLI_N:16:result_uint16x8 [] = { 3ff0, 3ff1, 3ff2, 3ff3, 3ff4, 3ff5, 3ff6, 3ff7, } +VSLI_N:17:result_uint32x4 [] = { 1bfffff0, 1bfffff1, 1bfffff2, 1bfffff3, } +VSLI_N:18:result_uint64x2 [] = { 7ffffffffffff0, 7ffffffffffff1, } +VSLI_N:19:result_poly8x16 [] = { 60, 61, 62, 63, 64, 65, 66, 67, 60, 61, 62, 63, 64, 65, 66, 67, } +VSLI_N:20:result_poly16x8 [] = { 3ff0, 3ff1, 3ff2, 3ff3, 3ff4, 3ff5, 3ff6, 3ff7, } +VSLI_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VSLI_Nmax shift amount output: +VSLI_N:0:result_int8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, } +VSLI_N:1:result_int16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, } +VSLI_N:2:result_int32x2 [] = { fffffff0, fffffff1, } +VSLI_N:3:result_int64x1 [] = { 7ffffffffffffff0, } +VSLI_N:4:result_uint8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, } +VSLI_N:5:result_uint16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, } +VSLI_N:6:result_uint32x2 [] = { 7ffffff0, 7ffffff1, } +VSLI_N:7:result_uint64x1 [] = { 7ffffffffffffff0, } +VSLI_N:8:result_poly8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, } +VSLI_N:9:result_poly16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, } +VSLI_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSLI_N:11:result_int8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, } +VSLI_N:12:result_int16x8 [] = { 7ff0, 7ff1, 7ff2, 7ff3, 7ff4, 7ff5, 7ff6, 7ff7, } +VSLI_N:13:result_int32x4 [] = { 7ffffff0, 7ffffff1, 7ffffff2, 7ffffff3, } +VSLI_N:14:result_int64x2 [] = { 7ffffffffffffff0, 7ffffffffffffff1, } +VSLI_N:15:result_uint8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, } +VSLI_N:16:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSLI_N:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSLI_N:18:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSLI_N:19:result_poly8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, } +VSLI_N:20:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSLI_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VSRI_N output: +VSRI_N:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VSRI_N:1:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VSRI_N:2:result_int32x2 [] = { 80000001, 80000001, } +VSRI_N:3:result_int64x1 [] = { ffffffff00000000, } +VSRI_N:4:result_uint8x8 [] = { c5, c5, c5, c5, c5, c5, c5, c5, } +VSRI_N:5:result_uint16x4 [] = { ffc0, ffc0, ffc0, ffc0, } +VSRI_N:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VSRI_N:7:result_uint64x1 [] = { e000000000000000, } +VSRI_N:8:result_poly8x8 [] = { c5, c5, c5, c5, c5, c5, c5, c5, } +VSRI_N:9:result_poly16x4 [] = { ffc0, ffc0, ffc0, ffc0, } +VSRI_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSRI_N:11:result_int8x16 [] = { fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VSRI_N:12:result_int16x8 [] = { fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, } +VSRI_N:13:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VSRI_N:14:result_int64x2 [] = { ffff000000000000, ffff000000000000, } +VSRI_N:15:result_uint8x16 [] = { e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, } +VSRI_N:16:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VSRI_N:17:result_uint32x4 [] = { fffffe00, fffffe00, fffffe00, fffffe00, } +VSRI_N:18:result_uint64x2 [] = { fffffffffffff800, fffffffffffff800, } +VSRI_N:19:result_poly8x16 [] = { e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, } +VSRI_N:20:result_poly16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VSRI_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VSRI_N max shift amount output: +VSRI_N:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VSRI_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRI_N:2:result_int32x2 [] = { fffffff0, fffffff1, } +VSRI_N:3:result_int64x1 [] = { fffffffffffffff0, } +VSRI_N:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VSRI_N:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VSRI_N:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VSRI_N:7:result_uint64x1 [] = { fffffffffffffff0, } +VSRI_N:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VSRI_N:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VSRI_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSRI_N:11:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VSRI_N:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VSRI_N:13:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRI_N:14:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRI_N:15:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VSRI_N:16:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSRI_N:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRI_N:18:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRI_N:19:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VSRI_N:20:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSRI_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VTST/VTSTQ (signed input) output: +VTST/VTSTQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:2:result_int32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:3:result_int64x1 [] = { 3333333333333333, } +VTST/VTSTQ:4:result_uint8x8 [] = { 0, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:5:result_uint16x4 [] = { 0, ffff, 0, ffff, } +VTST/VTSTQ:6:result_uint32x2 [] = { 0, ffffffff, } +VTST/VTSTQ:7:result_uint64x1 [] = { 3333333333333333, } +VTST/VTSTQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:10:result_float32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTST/VTSTQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:15:result_uint8x16 [] = { 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:16:result_uint16x8 [] = { 0, ffff, 0, ffff, ffff, ffff, ffff, ffff, } +VTST/VTSTQ:17:result_uint32x4 [] = { 0, ffffffff, 0, ffffffff, } +VTST/VTSTQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VTST/VTSTQ (unsigned input) output: +VTST/VTSTQ:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:23:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:24:result_int32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:25:result_int64x1 [] = { 3333333333333333, } +VTST/VTSTQ:26:result_uint8x8 [] = { 0, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:27:result_uint16x4 [] = { 0, ffff, 0, ffff, } +VTST/VTSTQ:28:result_uint32x2 [] = { 0, ffffffff, } +VTST/VTSTQ:29:result_uint64x1 [] = { 3333333333333333, } +VTST/VTSTQ:30:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:31:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:32:result_float32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:34:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:35:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTST/VTSTQ:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:37:result_uint8x16 [] = { 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:38:result_uint16x8 [] = { 0, ffff, 0, ffff, ffff, ffff, ffff, ffff, } +VTST/VTSTQ:39:result_uint32x4 [] = { 0, ffffffff, 0, ffffffff, } +VTST/VTSTQ:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:42:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VADDHN output: +VADDHN:0:result_int8x8 [] = { 32, 32, 32, 32, 32, 32, 32, 32, } +VADDHN:1:result_int16x4 [] = { 32, 32, 32, 32, } +VADDHN:2:result_int32x2 [] = { 18, 18, } +VADDHN:3:result_int64x1 [] = { 3333333333333333, } +VADDHN:4:result_uint8x8 [] = { 3, 3, 3, 3, 3, 3, 3, 3, } +VADDHN:5:result_uint16x4 [] = { 37, 37, 37, 37, } +VADDHN:6:result_uint32x2 [] = { 3, 3, } +VADDHN:7:result_uint64x1 [] = { 3333333333333333, } +VADDHN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VADDHN:10:result_float32x2 [] = { 33333333, 33333333, } +VADDHN:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDHN:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDHN:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VADDHN:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDHN:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDHN:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VADDHN:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDHN:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRADDHN output: +VRADDHN:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:1:result_int16x4 [] = { 33, 33, 33, 33, } +VRADDHN:2:result_int32x2 [] = { 19, 19, } +VRADDHN:3:result_int64x1 [] = { 3333333333333333, } +VRADDHN:4:result_uint8x8 [] = { 4, 4, 4, 4, 4, 4, 4, 4, } +VRADDHN:5:result_uint16x4 [] = { 38, 38, 38, 38, } +VRADDHN:6:result_uint32x2 [] = { 4, 4, } +VRADDHN:7:result_uint64x1 [] = { 3333333333333333, } +VRADDHN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRADDHN:10:result_float32x2 [] = { 33333333, 33333333, } +VRADDHN:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRADDHN:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRADDHN:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRADDHN:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRADDHN:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRADDHN:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRADDHN:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRADDHN:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VADDL output: +VADDL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VADDL:2:result_int32x2 [] = { 33333333, 33333333, } +VADDL:3:result_int64x1 [] = { 3333333333333333, } +VADDL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VADDL:6:result_uint32x2 [] = { 33333333, 33333333, } +VADDL:7:result_uint64x1 [] = { 3333333333333333, } +VADDL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VADDL:10:result_float32x2 [] = { 33333333, 33333333, } +VADDL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:12:result_int16x8 [] = { ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, } +VADDL:13:result_int32x4 [] = { ffffffe2, ffffffe3, ffffffe4, ffffffe5, } +VADDL:14:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1, } +VADDL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:16:result_uint16x8 [] = { 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1ea, } +VADDL:17:result_uint32x4 [] = { 1ffe1, 1ffe2, 1ffe3, 1ffe4, } +VADDL:18:result_uint64x2 [] = { 1ffffffe0, 1ffffffe1, } +VADDL:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDL:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VADDW output: +VADDW:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VADDW:2:result_int32x2 [] = { 33333333, 33333333, } +VADDW:3:result_int64x1 [] = { 3333333333333333, } +VADDW:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VADDW:6:result_uint32x2 [] = { 33333333, 33333333, } +VADDW:7:result_uint64x1 [] = { 3333333333333333, } +VADDW:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VADDW:10:result_float32x2 [] = { 33333333, 33333333, } +VADDW:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:12:result_int16x8 [] = { ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, } +VADDW:13:result_int32x4 [] = { ffffffe2, ffffffe3, ffffffe4, ffffffe5, } +VADDW:14:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1, } +VADDW:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:16:result_uint16x8 [] = { e3, e4, e5, e6, e7, e8, e9, ea, } +VADDW:17:result_uint32x4 [] = { ffe1, ffe2, ffe3, ffe4, } +VADDW:18:result_uint64x2 [] = { ffffffe0, ffffffe1, } +VADDW:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDW:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VHADD/VHADDQ output: +VHADD/VHADDQ:0:result_int8x8 [] = { fffffff1, fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, } +VHADD/VHADDQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2, } +VHADD/VHADDQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VHADD/VHADDQ:3:result_int64x1 [] = { 3333333333333333, } +VHADD/VHADDQ:4:result_uint8x8 [] = { f1, f2, f2, f3, f3, f4, f4, f5, } +VHADD/VHADDQ:5:result_uint16x4 [] = { fff0, fff1, fff1, fff2, } +VHADD/VHADDQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VHADD/VHADDQ:7:result_uint64x1 [] = { 3333333333333333, } +VHADD/VHADDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VHADD/VHADDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VHADD/VHADDQ:10:result_float32x2 [] = { 33333333, 33333333, } +VHADD/VHADDQ:11:result_int8x16 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, fffffff6, fffffff6, fffffff7, fffffff7, fffffff8, fffffff8, fffffff9, fffffff9, } +VHADD/VHADDQ:12:result_int16x8 [] = { fffffff1, fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, } +VHADD/VHADDQ:13:result_int32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff2, } +VHADD/VHADDQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VHADD/VHADDQ:15:result_uint8x16 [] = { f4, f5, f5, f6, f6, f7, f7, f8, f8, f9, f9, fa, fa, fb, fb, fc, } +VHADD/VHADDQ:16:result_uint16x8 [] = { fff1, fff1, fff2, fff2, fff3, fff3, fff4, fff4, } +VHADD/VHADDQ:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff2, } +VHADD/VHADDQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VHADD/VHADDQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VHADD/VHADDQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VHADD/VHADDQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRHADD/VRHADDQ output: +VRHADD/VRHADDQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, } +VRHADD/VRHADDQ:1:result_int16x4 [] = { fffffff1, fffffff2, fffffff2, fffffff3, } +VRHADD/VRHADDQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VRHADD/VRHADDQ:3:result_int64x1 [] = { 3333333333333333, } +VRHADD/VRHADDQ:4:result_uint8x8 [] = { f2, f2, f3, f3, f4, f4, f5, f5, } +VRHADD/VRHADDQ:5:result_uint16x4 [] = { fff1, fff1, fff2, fff2, } +VRHADD/VRHADDQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VRHADD/VRHADDQ:7:result_uint64x1 [] = { 3333333333333333, } +VRHADD/VRHADDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRHADD/VRHADDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRHADD/VRHADDQ:10:result_float32x2 [] = { 33333333, 33333333, } +VRHADD/VRHADDQ:11:result_int8x16 [] = { fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, fffffff6, fffffff6, fffffff7, fffffff7, fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, } +VRHADD/VRHADDQ:12:result_int16x8 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, } +VRHADD/VRHADDQ:13:result_int32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2, } +VRHADD/VRHADDQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRHADD/VRHADDQ:15:result_uint8x16 [] = { f5, f5, f6, f6, f7, f7, f8, f8, f9, f9, fa, fa, fb, fb, fc, fc, } +VRHADD/VRHADDQ:16:result_uint16x8 [] = { fff1, fff2, fff2, fff3, fff3, fff4, fff4, fff5, } +VRHADD/VRHADDQ:17:result_uint32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2, } +VRHADD/VRHADDQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRHADD/VRHADDQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRHADD/VRHADDQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRHADD/VRHADDQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VHSUB/VHSUBQ output: +VHSUB/VHSUBQ:0:result_int8x8 [] = { fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, } +VHSUB/VHSUBQ:1:result_int16x4 [] = { ffffffff, ffffffff, 0, 0, } +VHSUB/VHSUBQ:2:result_int32x2 [] = { 0, 0, } +VHSUB/VHSUBQ:3:result_int64x1 [] = { 3333333333333333, } +VHSUB/VHSUBQ:4:result_uint8x8 [] = { fe, ff, ff, 0, 0, 1, 1, 2, } +VHSUB/VHSUBQ:5:result_uint16x4 [] = { ffff, 0, 0, 1, } +VHSUB/VHSUBQ:6:result_uint32x2 [] = { 0, 0, } +VHSUB/VHSUBQ:7:result_uint64x1 [] = { 3333333333333333, } +VHSUB/VHSUBQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VHSUB/VHSUBQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VHSUB/VHSUBQ:10:result_float32x2 [] = { 33333333, 33333333, } +VHSUB/VHSUBQ:11:result_int8x16 [] = { fffffffe, fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, } +VHSUB/VHSUBQ:12:result_int16x8 [] = { fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, } +VHSUB/VHSUBQ:13:result_int32x4 [] = { ffffffff, 0, 0, 1, } +VHSUB/VHSUBQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VHSUB/VHSUBQ:15:result_uint8x16 [] = { fb, fc, fc, fd, fd, fe, fe, ff, ff, 0, 0, 1, 1, 2, 2, 3, } +VHSUB/VHSUBQ:16:result_uint16x8 [] = { ffff, ffff, 0, 0, 1, 1, 2, 2, } +VHSUB/VHSUBQ:17:result_uint32x4 [] = { ffffffff, 0, 0, 1, } +VHSUB/VHSUBQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VHSUB/VHSUBQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VHSUB/VHSUBQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VHSUB/VHSUBQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VSUBL output: +VSUBL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBL:2:result_int32x2 [] = { 33333333, 33333333, } +VSUBL:3:result_int64x1 [] = { 3333333333333333, } +VSUBL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBL:6:result_uint32x2 [] = { 33333333, 33333333, } +VSUBL:7:result_uint64x1 [] = { 3333333333333333, } +VSUBL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBL:10:result_float32x2 [] = { 33333333, 33333333, } +VSUBL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:12:result_int16x8 [] = { fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, } +VSUBL:13:result_int32x4 [] = { fffffffe, ffffffff, 0, 1, } +VSUBL:14:result_int64x2 [] = { 0, 1, } +VSUBL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:16:result_uint16x8 [] = { fffd, fffe, ffff, 0, 1, 2, 3, 4, } +VSUBL:17:result_uint32x4 [] = { ffffffff, 0, 1, 2, } +VSUBL:18:result_uint64x2 [] = { 0, 1, } +VSUBL:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBL:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VSUBW output: +VSUBW:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBW:2:result_int32x2 [] = { 33333333, 33333333, } +VSUBW:3:result_int64x1 [] = { 3333333333333333, } +VSUBW:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBW:6:result_uint32x2 [] = { 33333333, 33333333, } +VSUBW:7:result_uint64x1 [] = { 3333333333333333, } +VSUBW:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBW:10:result_float32x2 [] = { 33333333, 33333333, } +VSUBW:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:12:result_int16x8 [] = { fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, } +VSUBW:13:result_int32x4 [] = { fffffffe, ffffffff, 0, 1, } +VSUBW:14:result_int64x2 [] = { 0, 1, } +VSUBW:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:16:result_uint16x8 [] = { fefd, fefe, feff, ff00, ff01, ff02, ff03, ff04, } +VSUBW:17:result_uint32x4 [] = { fffeffff, ffff0000, ffff0001, ffff0002, } +VSUBW:18:result_uint64x2 [] = { ffffffff00000000, ffffffff00000001, } +VSUBW:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBW:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VSUBHN output: +VSUBHN:0:result_int8x8 [] = { 31, 31, 31, 31, 31, 31, 31, 31, } +VSUBHN:1:result_int16x4 [] = { 31, 31, 31, 31, } +VSUBHN:2:result_int32x2 [] = { 17, 17, } +VSUBHN:3:result_int64x1 [] = { 3333333333333333, } +VSUBHN:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VSUBHN:5:result_uint16x4 [] = { 36, 36, 36, 36, } +VSUBHN:6:result_uint32x2 [] = { 2, 2, } +VSUBHN:7:result_uint64x1 [] = { 3333333333333333, } +VSUBHN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBHN:10:result_float32x2 [] = { 33333333, 33333333, } +VSUBHN:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBHN:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBHN:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VSUBHN:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBHN:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBHN:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VSUBHN:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBHN:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSUBHN output: +VRSUBHN:0:result_int8x8 [] = { 31, 31, 31, 31, 31, 31, 31, 31, } +VRSUBHN:1:result_int16x4 [] = { 31, 31, 31, 31, } +VRSUBHN:2:result_int32x2 [] = { 17, 17, } +VRSUBHN:3:result_int64x1 [] = { 3333333333333333, } +VRSUBHN:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VRSUBHN:5:result_uint16x4 [] = { 36, 36, 36, 36, } +VRSUBHN:6:result_uint32x2 [] = { 2, 2, } +VRSUBHN:7:result_uint64x1 [] = { 3333333333333333, } +VRSUBHN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSUBHN:10:result_float32x2 [] = { 33333333, 33333333, } +VRSUBHN:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSUBHN:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSUBHN:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSUBHN:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSUBHN:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSUBHN:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSUBHN:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSUBHN:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VMVN/VMVNQ output: +VMVN/VMVNQ:0:result_int8x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:1:result_int16x4 [] = { f, e, d, c, } +VMVN/VMVNQ:2:result_int32x2 [] = { f, e, } +VMVN/VMVNQ:3:result_int64x1 [] = { 3333333333333333, } +VMVN/VMVNQ:4:result_uint8x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:5:result_uint16x4 [] = { f, e, d, c, } +VMVN/VMVNQ:6:result_uint32x2 [] = { f, e, } +VMVN/VMVNQ:7:result_uint64x1 [] = { 3333333333333333, } +VMVN/VMVNQ:8:result_poly8x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMVN/VMVNQ:10:result_float32x2 [] = { 33333333, 33333333, } +VMVN/VMVNQ:11:result_int8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, } +VMVN/VMVNQ:12:result_int16x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:13:result_int32x4 [] = { f, e, d, c, } +VMVN/VMVNQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMVN/VMVNQ:15:result_uint8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, } +VMVN/VMVNQ:16:result_uint16x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:17:result_uint32x4 [] = { f, e, d, c, } +VMVN/VMVNQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMVN/VMVNQ:19:result_poly8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, } +VMVN/VMVNQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMVN/VMVNQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQMOVN cumulative saturation output: +VQMOVN:0:vqmovn_s16 Neon cumulative saturation 0 +VQMOVN:1:vqmovn_s32 Neon cumulative saturation 0 +VQMOVN:2:vqmovn_s64 Neon cumulative saturation 0 +VQMOVN:3:vqmovn_u16 Neon cumulative saturation 0 +VQMOVN:4:vqmovn_u32 Neon cumulative saturation 0 +VQMOVN:5:vqmovn_u64 Neon cumulative saturation 0 + +VQMOVN output: +VQMOVN:6:result_int8x8 [] = { 12, 12, 12, 12, 12, 12, 12, 12, } +VQMOVN:7:result_int16x4 [] = { 1278, 1278, 1278, 1278, } +VQMOVN:8:result_int32x2 [] = { 12345678, 12345678, } +VQMOVN:9:result_int64x1 [] = { 3333333333333333, } +VQMOVN:10:result_uint8x8 [] = { 82, 82, 82, 82, 82, 82, 82, 82, } +VQMOVN:11:result_uint16x4 [] = { 8765, 8765, 8765, 8765, } +VQMOVN:12:result_uint32x2 [] = { 87654321, 87654321, } +VQMOVN:13:result_uint64x1 [] = { 3333333333333333, } +VQMOVN:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVN:16:result_float32x2 [] = { 33333333, 33333333, } +VQMOVN:17:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:18:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:19:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:20:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:21:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:22:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:23:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:24:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:25:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:26:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:27:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQMOVN cumulative saturation output: +VQMOVN:28:vqmovn_s16 Neon cumulative saturation 1 +VQMOVN:29:vqmovn_s32 Neon cumulative saturation 1 +VQMOVN:30:vqmovn_s64 Neon cumulative saturation 1 +VQMOVN:31:vqmovn_u16 Neon cumulative saturation 1 +VQMOVN:32:vqmovn_u32 Neon cumulative saturation 1 +VQMOVN:33:vqmovn_u64 Neon cumulative saturation 1 + +VQMOVN output: +VQMOVN:34:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQMOVN:35:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQMOVN:36:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQMOVN:37:result_int64x1 [] = { 3333333333333333, } +VQMOVN:38:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQMOVN:39:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQMOVN:40:result_uint32x2 [] = { ffffffff, ffffffff, } +VQMOVN:41:result_uint64x1 [] = { 3333333333333333, } +VQMOVN:42:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:43:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVN:44:result_float32x2 [] = { 33333333, 33333333, } +VQMOVN:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:46:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:47:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:50:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:51:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:53:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:54:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:55:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQMOVUN cumulative saturation output: +VQMOVUN:0:vqmovun_s16 Neon cumulative saturation 0 +VQMOVUN:1:vqmovun_s32 Neon cumulative saturation 0 +VQMOVUN:2:vqmovun_s64 Neon cumulative saturation 0 + +VQMOVUN output: +VQMOVUN:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:4:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:5:result_int32x2 [] = { 33333333, 33333333, } +VQMOVUN:6:result_int64x1 [] = { 3333333333333333, } +VQMOVUN:7:result_uint8x8 [] = { 34, 34, 34, 34, 34, 34, 34, 34, } +VQMOVUN:8:result_uint16x4 [] = { 5678, 5678, 5678, 5678, } +VQMOVUN:9:result_uint32x2 [] = { 12345678, 12345678, } +VQMOVUN:10:result_uint64x1 [] = { 3333333333333333, } +VQMOVUN:11:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:12:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:13:result_float32x2 [] = { 33333333, 33333333, } +VQMOVUN:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:16:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:17:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQMOVUN (negative input) cumulative saturation output: +VQMOVUN:25:vqmovun_s16 Neon cumulative saturation 1 +VQMOVUN:26:vqmovun_s32 Neon cumulative saturation 1 +VQMOVUN:27:vqmovun_s64 Neon cumulative saturation 1 + +VQMOVUN (negative input) output: +VQMOVUN:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:30:result_int32x2 [] = { 33333333, 33333333, } +VQMOVUN:31:result_int64x1 [] = { 3333333333333333, } +VQMOVUN:32:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQMOVUN:33:result_uint16x4 [] = { 0, 0, 0, 0, } +VQMOVUN:34:result_uint32x2 [] = { 0, 0, } +VQMOVUN:35:result_uint64x1 [] = { 3333333333333333, } +VQMOVUN:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:38:result_float32x2 [] = { 33333333, 33333333, } +VQMOVUN:39:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:40:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:41:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:42:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:43:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:44:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:45:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:46:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:47:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:48:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:49:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHR_N output: +VRSHR_N:0:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VRSHR_N:1:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:2:result_int32x2 [] = { fffffffc, fffffffc, } +VRSHR_N:3:result_int64x1 [] = { 0, } +VRSHR_N:4:result_uint8x8 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, } +VRSHR_N:5:result_uint16x4 [] = { 1ffe, 1ffe, 1ffe, 1ffe, } +VRSHR_N:6:result_uint32x2 [] = { 8000000, 8000000, } +VRSHR_N:7:result_uint64x1 [] = { 80000000, } +VRSHR_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:10:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:11:result_int8x16 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, fffffffc, fffffffd, fffffffd, fffffffe, fffffffe, ffffffff, ffffffff, 0, } +VRSHR_N:12:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:13:result_int32x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd, } +VRSHR_N:14:result_int64x2 [] = { 0, 0, } +VRSHR_N:15:result_uint8x16 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, 40, 40, } +VRSHR_N:16:result_uint16x8 [] = { 1ffe, 1ffe, 1ffe, 1ffe, 1fff, 1fff, 1fff, 1fff, } +VRSHR_N:17:result_uint32x4 [] = { 8000000, 8000000, 8000000, 8000000, } +VRSHR_N:18:result_uint64x2 [] = { 80000000, 80000000, } +VRSHR_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHR_N (overflow test: max shift amount, positive input) output: +VRSHR_N:22:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:23:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:24:result_int32x2 [] = { 0, 0, } +VRSHR_N:25:result_int64x1 [] = { 0, } +VRSHR_N:26:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHR_N:27:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSHR_N:28:result_uint32x2 [] = { 1, 1, } +VRSHR_N:29:result_uint64x1 [] = { 1, } +VRSHR_N:30:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:31:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:32:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:33:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:34:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:35:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHR_N:36:result_int64x2 [] = { 0, 0, } +VRSHR_N:37:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHR_N:38:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHR_N:39:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSHR_N:40:result_uint64x2 [] = { 1, 1, } +VRSHR_N:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:42:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHR_N (overflow test: shift by 1, with negative input) output: +VRSHR_N:44:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHR_N:45:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VRSHR_N:46:result_int32x2 [] = { 40000000, 40000000, } +VRSHR_N:47:result_int64x1 [] = { 4000000000000000, } +VRSHR_N:48:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:49:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSHR_N:50:result_uint32x2 [] = { 80000000, 80000000, } +VRSHR_N:51:result_uint64x1 [] = { 8000000000000000, } +VRSHR_N:52:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:53:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:54:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:55:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHR_N:56:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VRSHR_N:57:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VRSHR_N:58:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VRSHR_N:59:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:60:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSHR_N:61:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSHR_N:62:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSHR_N:63:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:64:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:65:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHR_N (overflow test: shift by 3, positive input) output: +VRSHR_N:66:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHR_N:67:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VRSHR_N:68:result_int32x2 [] = { 10000000, 10000000, } +VRSHR_N:69:result_int64x1 [] = { 1000000000000000, } +VRSHR_N:70:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:71:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSHR_N:72:result_uint32x2 [] = { 20000000, 20000000, } +VRSHR_N:73:result_uint64x1 [] = { 2000000000000000, } +VRSHR_N:74:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:75:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:76:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:77:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHR_N:78:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VRSHR_N:79:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VRSHR_N:80:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VRSHR_N:81:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:82:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSHR_N:83:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSHR_N:84:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSHR_N:85:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:86:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:87:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHR_N (overflow test: shift by 1, with negative input) output: +VRSHR_N:88:result_int8x8 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSHR_N:89:result_int16x4 [] = { ffffc000, ffffc000, ffffc000, ffffc000, } +VRSHR_N:90:result_int32x2 [] = { c0000000, c0000000, } +VRSHR_N:91:result_int64x1 [] = { c000000000000000, } +VRSHR_N:92:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:93:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSHR_N:94:result_uint32x2 [] = { 80000000, 80000000, } +VRSHR_N:95:result_uint64x1 [] = { 8000000000000000, } +VRSHR_N:96:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:97:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:98:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:99:result_int8x16 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSHR_N:100:result_int16x8 [] = { ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, } +VRSHR_N:101:result_int32x4 [] = { c0000000, c0000000, c0000000, c0000000, } +VRSHR_N:102:result_int64x2 [] = { c000000000000000, c000000000000000, } +VRSHR_N:103:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:104:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSHR_N:105:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSHR_N:106:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSHR_N:107:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:108:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:109:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSHR_N (overflow test: shift by 3, with negative input) output: +VRSHR_N:110:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSHR_N:111:result_int16x4 [] = { fffff000, fffff000, fffff000, fffff000, } +VRSHR_N:112:result_int32x2 [] = { f0000000, f0000000, } +VRSHR_N:113:result_int64x1 [] = { f000000000000000, } +VRSHR_N:114:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:115:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSHR_N:116:result_uint32x2 [] = { 20000000, 20000000, } +VRSHR_N:117:result_uint64x1 [] = { 2000000000000000, } +VRSHR_N:118:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:119:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:120:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:121:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSHR_N:122:result_int16x8 [] = { fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, } +VRSHR_N:123:result_int32x4 [] = { f0000000, f0000000, f0000000, f0000000, } +VRSHR_N:124:result_int64x2 [] = { f000000000000000, f000000000000000, } +VRSHR_N:125:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:126:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSHR_N:127:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSHR_N:128:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSHR_N:129:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:130:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:131:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSRA_N output: +VRSRA_N:0:result_int8x8 [] = { fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, } +VRSRA_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VRSRA_N:2:result_int32x2 [] = { fffffffd, fffffffe, } +VRSRA_N:3:result_int64x1 [] = { fffffffffffffff0, } +VRSRA_N:4:result_uint8x8 [] = { 5, 6, 7, 8, 9, a, b, c, } +VRSRA_N:5:result_uint16x4 [] = { fffd, fffe, ffff, 0, } +VRSRA_N:6:result_uint32x2 [] = { fffffff4, fffffff5, } +VRSRA_N:7:result_uint64x1 [] = { fffffffffffffff0, } +VRSRA_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:10:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:11:result_int8x16 [] = { fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, 8, } +VRSRA_N:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VRSRA_N:13:result_int32x4 [] = { fffffffd, fffffffe, ffffffff, 0, } +VRSRA_N:14:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VRSRA_N:15:result_uint8x16 [] = { 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, 11, 12, 13, 14, } +VRSRA_N:16:result_uint16x8 [] = { fffd, fffe, ffff, 0, 1, 2, 3, 4, } +VRSRA_N:17:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VRSRA_N:18:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VRSRA_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSRA_N (checking overflow: shift by 1, positive input) output: +VRSRA_N:22:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VRSRA_N:23:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VRSRA_N:24:result_int32x2 [] = { 40000000, 40000000, } +VRSRA_N:25:result_int64x1 [] = { 4000000000000000, } +VRSRA_N:26:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSRA_N:27:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSRA_N:28:result_uint32x2 [] = { 80000000, 80000000, } +VRSRA_N:29:result_uint64x1 [] = { 8000000000000000, } +VRSRA_N:30:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:31:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:32:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:33:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VRSRA_N:34:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VRSRA_N:35:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VRSRA_N:36:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VRSRA_N:37:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSRA_N:38:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSRA_N:39:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSRA_N:40:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSRA_N:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:42:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSRA_N (checking overflow: shift by 3, positive input) output: +VRSRA_N:44:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VRSRA_N:45:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VRSRA_N:46:result_int32x2 [] = { 10000000, 10000000, } +VRSRA_N:47:result_int64x1 [] = { 1000000000000000, } +VRSRA_N:48:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSRA_N:49:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSRA_N:50:result_uint32x2 [] = { 20000000, 20000000, } +VRSRA_N:51:result_uint64x1 [] = { 2000000000000000, } +VRSRA_N:52:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:53:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:54:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:55:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VRSRA_N:56:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VRSRA_N:57:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VRSRA_N:58:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VRSRA_N:59:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSRA_N:60:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSRA_N:61:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSRA_N:62:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSRA_N:63:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:64:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:65:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSRA_N (checking overflow: shift by max, positive input) output: +VRSRA_N:66:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:67:result_int16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:68:result_int32x2 [] = { 0, 0, } +VRSRA_N:69:result_int64x1 [] = { 0, } +VRSRA_N:70:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:71:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:72:result_uint32x2 [] = { 1, 1, } +VRSRA_N:73:result_uint64x1 [] = { 1, } +VRSRA_N:74:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:75:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:76:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:77:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:78:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:79:result_int32x4 [] = { 0, 0, 0, 0, } +VRSRA_N:80:result_int64x2 [] = { 0, 0, } +VRSRA_N:81:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:82:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:83:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:84:result_uint64x2 [] = { 1, 1, } +VRSRA_N:85:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:86:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:87:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSRA_N (checking overflow: shift by 1, negative input) output: +VRSRA_N:88:result_int8x8 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSRA_N:89:result_int16x4 [] = { ffffc000, ffffc000, ffffc000, ffffc000, } +VRSRA_N:90:result_int32x2 [] = { c0000000, c0000000, } +VRSRA_N:91:result_int64x1 [] = { c000000000000000, } +VRSRA_N:92:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:93:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:94:result_uint32x2 [] = { 1, 1, } +VRSRA_N:95:result_uint64x1 [] = { 1, } +VRSRA_N:96:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:97:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:98:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:99:result_int8x16 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSRA_N:100:result_int16x8 [] = { ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, } +VRSRA_N:101:result_int32x4 [] = { c0000000, c0000000, c0000000, c0000000, } +VRSRA_N:102:result_int64x2 [] = { c000000000000000, c000000000000000, } +VRSRA_N:103:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:104:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:105:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:106:result_uint64x2 [] = { 1, 1, } +VRSRA_N:107:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:108:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:109:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSRA_N (checking overflow: shift by max, negative input) output: +VRSRA_N:110:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSRA_N:111:result_int16x4 [] = { fffff000, fffff000, fffff000, fffff000, } +VRSRA_N:112:result_int32x2 [] = { f0000000, f0000000, } +VRSRA_N:113:result_int64x1 [] = { f000000000000000, } +VRSRA_N:114:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:115:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:116:result_uint32x2 [] = { 1, 1, } +VRSRA_N:117:result_uint64x1 [] = { 1, } +VRSRA_N:118:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:119:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:120:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:121:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSRA_N:122:result_int16x8 [] = { fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, } +VRSRA_N:123:result_int32x4 [] = { f0000000, f0000000, f0000000, f0000000, } +VRSRA_N:124:result_int64x2 [] = { f000000000000000, f000000000000000, } +VRSRA_N:125:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:126:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:127:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:128:result_uint64x2 [] = { 1, 1, } +VRSRA_N:129:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:130:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:131:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRSRA_N (checking overflow: shift by max, negative input) output: +VRSRA_N:132:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:133:result_int16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:134:result_int32x2 [] = { 0, 0, } +VRSRA_N:135:result_int64x1 [] = { 0, } +VRSRA_N:136:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:137:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:138:result_uint32x2 [] = { 1, 1, } +VRSRA_N:139:result_uint64x1 [] = { 1, } +VRSRA_N:140:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:141:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:142:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:143:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:144:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:145:result_int32x4 [] = { 0, 0, 0, 0, } +VRSRA_N:146:result_int64x2 [] = { 0, 0, } +VRSRA_N:147:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:148:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:149:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:150:result_uint64x2 [] = { 1, 1, } +VRSRA_N:151:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:152:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:153:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VSHLL_N output: +VSHLL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VSHLL_N:2:result_int32x2 [] = { 33333333, 33333333, } +VSHLL_N:3:result_int64x1 [] = { 3333333333333333, } +VSHLL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VSHLL_N:6:result_uint32x2 [] = { 33333333, 33333333, } +VSHLL_N:7:result_uint64x1 [] = { 3333333333333333, } +VSHLL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHLL_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSHLL_N:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:12:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHLL_N:13:result_int32x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VSHLL_N:14:result_int64x2 [] = { ffffffffffffff80, ffffffffffffff88, } +VSHLL_N:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:16:result_uint16x8 [] = { 3c0, 3c4, 3c8, 3cc, 3d0, 3d4, 3d8, 3dc, } +VSHLL_N:17:result_uint32x4 [] = { fff00, fff10, fff20, fff30, } +VSHLL_N:18:result_uint64x2 [] = { 7ffffff80, 7ffffff88, } +VSHLL_N:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHLL_N:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VPADDL/VPADDLQ output: +VPADDL/VPADDLQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:1:result_int16x4 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, } +VPADDL/VPADDLQ:2:result_int32x2 [] = { ffffffe1, ffffffe5, } +VPADDL/VPADDLQ:3:result_int64x1 [] = { ffffffffffffffe1, } +VPADDL/VPADDLQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:5:result_uint16x4 [] = { 1e1, 1e5, 1e9, 1ed, } +VPADDL/VPADDLQ:6:result_uint32x2 [] = { 1ffe1, 1ffe5, } +VPADDL/VPADDLQ:7:result_uint64x1 [] = { 1ffffffe1, } +VPADDL/VPADDLQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPADDL/VPADDLQ:10:result_float32x2 [] = { 33333333, 33333333, } +VPADDL/VPADDLQ:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:12:result_int16x8 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, fffffff1, fffffff5, fffffff9, fffffffd, } +VPADDL/VPADDLQ:13:result_int32x4 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, } +VPADDL/VPADDLQ:14:result_int64x2 [] = { ffffffffffffffe1, ffffffffffffffe5, } +VPADDL/VPADDLQ:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:16:result_uint16x8 [] = { 1e1, 1e5, 1e9, 1ed, 1f1, 1f5, 1f9, 1fd, } +VPADDL/VPADDLQ:17:result_uint32x4 [] = { 1ffe1, 1ffe5, 1ffe9, 1ffed, } +VPADDL/VPADDLQ:18:result_uint64x2 [] = { 1ffffffe1, 1ffffffe5, } +VPADDL/VPADDLQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADDL/VPADDLQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VPADD output: +VPADD:0:result_int8x8 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, ffffffe1, ffffffe5, ffffffe9, ffffffed, } +VPADD:1:result_int16x4 [] = { ffffffe1, ffffffe5, ffffffe1, ffffffe5, } +VPADD:2:result_int32x2 [] = { ffffffe1, ffffffe1, } +VPADD:3:result_int64x1 [] = { 3333333333333333, } +VPADD:4:result_uint8x8 [] = { e1, e5, e9, ed, e1, e5, e9, ed, } +VPADD:5:result_uint16x4 [] = { ffe1, ffe5, ffe1, ffe5, } +VPADD:6:result_uint32x2 [] = { ffffffe1, ffffffe1, } +VPADD:7:result_uint64x1 [] = { 3333333333333333, } +VPADD:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPADD:10:result_float32x2 [] = { c1f80000, c1f80000, } +VPADD:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADD:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADD:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VPADD:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADD:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADD:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VPADD:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADD:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VPADAL/VPADALQ output: +VPADAL/VPADALQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:1:result_int16x4 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, } +VPADAL/VPADALQ:2:result_int32x2 [] = { ffffffd1, ffffffd6, } +VPADAL/VPADALQ:3:result_int64x1 [] = { ffffffffffffffd1, } +VPADAL/VPADALQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:5:result_uint16x4 [] = { 1d1, 1d6, 1db, 1e0, } +VPADAL/VPADALQ:6:result_uint32x2 [] = { 1ffd1, 1ffd6, } +VPADAL/VPADALQ:7:result_uint64x1 [] = { 1ffffffd1, } +VPADAL/VPADALQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPADAL/VPADALQ:10:result_float32x2 [] = { 33333333, 33333333, } +VPADAL/VPADALQ:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:12:result_int16x8 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, ffffffe5, ffffffea, ffffffef, fffffff4, } +VPADAL/VPADALQ:13:result_int32x4 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, } +VPADAL/VPADALQ:14:result_int64x2 [] = { ffffffffffffffd1, ffffffffffffffd6, } +VPADAL/VPADALQ:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:16:result_uint16x8 [] = { 1d1, 1d6, 1db, 1e0, 1e5, 1ea, 1ef, 1f4, } +VPADAL/VPADALQ:17:result_uint32x4 [] = { 1ffd1, 1ffd6, 1ffdb, 1ffe0, } +VPADAL/VPADALQ:18:result_uint64x2 [] = { 1ffffffd1, 1ffffffd6, } +VPADAL/VPADALQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADAL/VPADALQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHLU_N/VQSHLUQ_N (negative input) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:0:vqshlu_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:1:vqshlu_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:2:vqshlu_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:3:vqshlu_n_s64 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:4:vqshluq_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:5:vqshluq_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:6:vqshluq_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:7:vqshluq_n_s64 Neon cumulative saturation 1 + +VQSHLU_N/VQSHLUQ_N (negative input) output: +VQSHLU_N/VQSHLUQ_N:8:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:9:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:10:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:11:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:12:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:13:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:14:result_uint32x2 [] = { 0, 0, } +VQSHLU_N/VQSHLUQ_N:15:result_uint64x1 [] = { 0, } +VQSHLU_N/VQSHLUQ_N:16:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:17:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:18:result_float32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:19:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:20:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:21:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:22:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:23:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:24:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:25:result_uint32x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:26:result_uint64x2 [] = { 0, 0, } +VQSHLU_N/VQSHLUQ_N:27:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:28:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:29:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:30:vqshlu_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:31:vqshlu_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:32:vqshlu_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:33:vqshlu_n_s64 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:34:vqshluq_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:35:vqshluq_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:36:vqshluq_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:37:vqshluq_n_s64 Neon cumulative saturation 0 + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) output: +VQSHLU_N/VQSHLUQ_N:38:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:39:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:40:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:41:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:42:result_uint8x8 [] = { fe, fe, fe, fe, fe, fe, fe, fe, } +VQSHLU_N/VQSHLUQ_N:43:result_uint16x4 [] = { fffe, fffe, fffe, fffe, } +VQSHLU_N/VQSHLUQ_N:44:result_uint32x2 [] = { fffffffe, fffffffe, } +VQSHLU_N/VQSHLUQ_N:45:result_uint64x1 [] = { fffffffffffffffe, } +VQSHLU_N/VQSHLUQ_N:46:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:47:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:48:result_float32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:49:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:50:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:51:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:52:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:53:result_uint8x16 [] = { fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, } +VQSHLU_N/VQSHLUQ_N:54:result_uint16x8 [] = { fffe, fffe, fffe, fffe, fffe, fffe, fffe, fffe, } +VQSHLU_N/VQSHLUQ_N:55:result_uint32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } +VQSHLU_N/VQSHLUQ_N:56:result_uint64x2 [] = { fffffffffffffffe, fffffffffffffffe, } +VQSHLU_N/VQSHLUQ_N:57:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:58:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:59:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:60:vqshlu_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:61:vqshlu_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:62:vqshlu_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:63:vqshlu_n_s64 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:64:vqshluq_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:65:vqshluq_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:66:vqshluq_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:67:vqshluq_n_s64 Neon cumulative saturation 1 + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) output: +VQSHLU_N/VQSHLUQ_N:68:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:69:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:70:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:71:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:72:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHLU_N/VQSHLUQ_N:73:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHLU_N/VQSHLUQ_N:74:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHLU_N/VQSHLUQ_N:75:result_uint64x1 [] = { ffffffffffffffff, } +VQSHLU_N/VQSHLUQ_N:76:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:77:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:78:result_float32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:79:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:80:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:81:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:82:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:83:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHLU_N/VQSHLUQ_N:84:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHLU_N/VQSHLUQ_N:85:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHLU_N/VQSHLUQ_N:86:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHLU_N/VQSHLUQ_N:87:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:88:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:89:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHLU_N/VQSHLUQ_N cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:90:vqshlu_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:91:vqshlu_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:92:vqshlu_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:93:vqshlu_n_s64 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:94:vqshluq_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:95:vqshluq_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:96:vqshluq_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:97:vqshluq_n_s64 Neon cumulative saturation 0 + +VQSHLU_N/VQSHLUQ_N output: +VQSHLU_N/VQSHLUQ_N:98:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:99:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:100:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:101:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:102:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VQSHLU_N/VQSHLUQ_N:103:result_uint16x4 [] = { 8, 8, 8, 8, } +VQSHLU_N/VQSHLUQ_N:104:result_uint32x2 [] = { 18, 18, } +VQSHLU_N/VQSHLUQ_N:105:result_uint64x1 [] = { 40, } +VQSHLU_N/VQSHLUQ_N:106:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:107:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:108:result_float32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:109:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:110:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:111:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:112:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:113:result_uint8x16 [] = { a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, } +VQSHLU_N/VQSHLUQ_N:114:result_uint16x8 [] = { 180, 180, 180, 180, 180, 180, 180, 180, } +VQSHLU_N/VQSHLUQ_N:115:result_uint32x4 [] = { 380, 380, 380, 380, } +VQSHLU_N/VQSHLUQ_N:116:result_uint64x2 [] = { 800, 800, } +VQSHLU_N/VQSHLUQ_N:117:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:118:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:119:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VCLZ/VCLZQ output: +VCLZ/VCLZQ:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VCLZ/VCLZQ:1:result_int16x4 [] = { 3, 3, 3, 3, } +VCLZ/VCLZQ:2:result_int32x2 [] = { 11, 11, } +VCLZ/VCLZQ:3:result_int64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VCLZ/VCLZQ:5:result_uint16x4 [] = { 0, 0, 0, 0, } +VCLZ/VCLZQ:6:result_uint32x2 [] = { 5, 5, } +VCLZ/VCLZQ:7:result_uint64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLZ/VCLZQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCLZ/VCLZQ:10:result_float32x2 [] = { 33333333, 33333333, } +VCLZ/VCLZQ:11:result_int8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, } +VCLZ/VCLZQ:12:result_int16x8 [] = { 3, 3, 3, 3, 3, 3, 3, 3, } +VCLZ/VCLZQ:13:result_int32x4 [] = { 3, 3, 3, 3, } +VCLZ/VCLZQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:15:result_uint8x16 [] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, } +VCLZ/VCLZQ:16:result_uint16x8 [] = { d, d, d, d, d, d, d, d, } +VCLZ/VCLZQ:17:result_uint32x4 [] = { 1f, 1f, 1f, 1f, } +VCLZ/VCLZQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLZ/VCLZQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLZ/VCLZQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VCLZ/VCLZQ (input=0) output: +VCLZ/VCLZQ:22:result_int8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8, } +VCLZ/VCLZQ:23:result_int16x4 [] = { 10, 10, 10, 10, } +VCLZ/VCLZQ:24:result_int32x2 [] = { 20, 20, } +VCLZ/VCLZQ:25:result_int64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:26:result_uint8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8, } +VCLZ/VCLZQ:27:result_uint16x4 [] = { 10, 10, 10, 10, } +VCLZ/VCLZQ:28:result_uint32x2 [] = { 20, 20, } +VCLZ/VCLZQ:29:result_uint64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:30:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLZ/VCLZQ:31:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCLZ/VCLZQ:32:result_float32x2 [] = { 33333333, 33333333, } +VCLZ/VCLZQ:33:result_int8x16 [] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, } +VCLZ/VCLZQ:34:result_int16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VCLZ/VCLZQ:35:result_int32x4 [] = { 20, 20, 20, 20, } +VCLZ/VCLZQ:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:37:result_uint8x16 [] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, } +VCLZ/VCLZQ:38:result_uint16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VCLZ/VCLZQ:39:result_uint32x4 [] = { 20, 20, 20, 20, } +VCLZ/VCLZQ:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLZ/VCLZQ:42:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLZ/VCLZQ:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VCLS/VCLSQ (positive input) output: +VCLS/VCLSQ:0:result_int8x8 [] = { 6, 6, 6, 6, 6, 6, 6, 6, } +VCLS/VCLSQ:1:result_int16x4 [] = { 2, 2, 2, 2, } +VCLS/VCLSQ:2:result_int32x2 [] = { 19, 19, } +VCLS/VCLSQ:3:result_int64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:7:result_uint64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:10:result_float32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:11:result_int8x16 [] = { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, } +VCLS/VCLSQ:12:result_int16x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VCLS/VCLSQ:13:result_int32x4 [] = { 14, 14, 14, 14, } +VCLS/VCLSQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLS/VCLSQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VCLS/VCLSQ (negative input) output: +VCLS/VCLSQ:22:result_int8x8 [] = { 7, 7, 7, 7, 7, 7, 7, 7, } +VCLS/VCLSQ:23:result_int16x4 [] = { 1, 1, 1, 1, } +VCLS/VCLSQ:24:result_int32x2 [] = { 1, 1, } +VCLS/VCLSQ:25:result_int64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:28:result_uint32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:29:result_uint64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:30:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:31:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:32:result_float32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:33:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCLS/VCLSQ:34:result_int16x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VCLS/VCLSQ:35:result_int32x4 [] = { 0, 0, 0, 0, } +VCLS/VCLSQ:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:38:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:39:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLS/VCLSQ:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:42:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:43:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VCNT/VCNTQ output: +VCNT/VCNTQ:0:result_int8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8, } +VCNT/VCNTQ:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:2:result_int32x2 [] = { 33333333, 33333333, } +VCNT/VCNTQ:3:result_int64x1 [] = { 3333333333333333, } +VCNT/VCNTQ:4:result_uint8x8 [] = { 4, 4, 4, 4, 4, 4, 4, 4, } +VCNT/VCNTQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VCNT/VCNTQ:7:result_uint64x1 [] = { 3333333333333333, } +VCNT/VCNTQ:8:result_poly8x8 [] = { 4, 4, 4, 4, 4, 4, 4, 4, } +VCNT/VCNTQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:10:result_float32x2 [] = { 33333333, 33333333, } +VCNT/VCNTQ:11:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCNT/VCNTQ:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCNT/VCNTQ:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCNT/VCNTQ:15:result_uint8x16 [] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, } +VCNT/VCNTQ:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCNT/VCNTQ:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCNT/VCNTQ:19:result_poly8x16 [] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, } +VCNT/VCNTQ:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHRN_N cumulative saturation output: +VQSHRN_N:0:vqshrn_n_s16 Neon cumulative saturation 0 +VQSHRN_N:1:vqshrn_n_s32 Neon cumulative saturation 0 +VQSHRN_N:2:vqshrn_n_s64 Neon cumulative saturation 0 +VQSHRN_N:3:vqshrn_n_u16 Neon cumulative saturation 1 +VQSHRN_N:4:vqshrn_n_u32 Neon cumulative saturation 1 +VQSHRN_N:5:vqshrn_n_u64 Neon cumulative saturation 1 + +VQSHRN_N output: +VQSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VQSHRN_N:7:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9, } +VQSHRN_N:8:result_int32x2 [] = { fffffffc, fffffffc, } +VQSHRN_N:9:result_int64x1 [] = { 3333333333333333, } +VQSHRN_N:10:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRN_N:11:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRN_N:12:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRN_N:13:result_uint64x1 [] = { 3333333333333333, } +VQSHRN_N:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRN_N:16:result_float32x2 [] = { 33333333, 33333333, } +VQSHRN_N:17:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:18:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:19:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:20:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:21:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:22:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:23:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:24:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:25:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:26:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:27:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHRN_N (check saturation: shift by 3) cumulative saturation output: +VQSHRN_N:28:vqshrn_n_s16 Neon cumulative saturation 1 +VQSHRN_N:29:vqshrn_n_s32 Neon cumulative saturation 1 +VQSHRN_N:30:vqshrn_n_s64 Neon cumulative saturation 1 +VQSHRN_N:31:vqshrn_n_u16 Neon cumulative saturation 1 +VQSHRN_N:32:vqshrn_n_u32 Neon cumulative saturation 1 +VQSHRN_N:33:vqshrn_n_u64 Neon cumulative saturation 1 + +VQSHRN_N (check saturation: shift by 3) output: +VQSHRN_N:34:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHRN_N:35:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHRN_N:36:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHRN_N:37:result_int64x1 [] = { 3333333333333333, } +VQSHRN_N:38:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRN_N:39:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRN_N:40:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRN_N:41:result_uint64x1 [] = { 3333333333333333, } +VQSHRN_N:42:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:43:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRN_N:44:result_float32x2 [] = { 33333333, 33333333, } +VQSHRN_N:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:46:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:47:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:50:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:51:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:53:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:54:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:55:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHRN_N (check saturation: shift by max) cumulative saturation output: +VQSHRN_N:56:vqshrn_n_s16 Neon cumulative saturation 0 +VQSHRN_N:57:vqshrn_n_s32 Neon cumulative saturation 0 +VQSHRN_N:58:vqshrn_n_s64 Neon cumulative saturation 0 +VQSHRN_N:59:vqshrn_n_u16 Neon cumulative saturation 0 +VQSHRN_N:60:vqshrn_n_u32 Neon cumulative saturation 0 +VQSHRN_N:61:vqshrn_n_u64 Neon cumulative saturation 0 + +VQSHRN_N (check saturation: shift by max) output: +VQSHRN_N:62:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHRN_N:63:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHRN_N:64:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHRN_N:65:result_int64x1 [] = { 3333333333333333, } +VQSHRN_N:66:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRN_N:67:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRN_N:68:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRN_N:69:result_uint64x1 [] = { 3333333333333333, } +VQSHRN_N:70:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:71:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRN_N:72:result_float32x2 [] = { 33333333, 33333333, } +VQSHRN_N:73:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:74:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:75:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:76:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:77:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:78:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:79:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:80:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:81:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:82:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:83:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VPMAX output: +VPMAX:0:result_int8x8 [] = { fffffff1, fffffff3, fffffff5, fffffff7, fffffff1, fffffff3, fffffff5, fffffff7, } +VPMAX:1:result_int16x4 [] = { fffffff1, fffffff3, fffffff1, fffffff3, } +VPMAX:2:result_int32x2 [] = { fffffff1, fffffff1, } +VPMAX:3:result_int64x1 [] = { 3333333333333333, } +VPMAX:4:result_uint8x8 [] = { f1, f3, f5, f7, f1, f3, f5, f7, } +VPMAX:5:result_uint16x4 [] = { fff1, fff3, fff1, fff3, } +VPMAX:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VPMAX:7:result_uint64x1 [] = { 3333333333333333, } +VPMAX:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPMAX:10:result_float32x2 [] = { c1700000, c1700000, } +VPMAX:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMAX:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMAX:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VPMAX:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMAX:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMAX:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VPMAX:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMAX:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VPMIN output: +VPMIN:0:result_int8x8 [] = { fffffff0, fffffff2, fffffff4, fffffff6, fffffff0, fffffff2, fffffff4, fffffff6, } +VPMIN:1:result_int16x4 [] = { fffffff0, fffffff2, fffffff0, fffffff2, } +VPMIN:2:result_int32x2 [] = { fffffff0, fffffff0, } +VPMIN:3:result_int64x1 [] = { 3333333333333333, } +VPMIN:4:result_uint8x8 [] = { f0, f2, f4, f6, f0, f2, f4, f6, } +VPMIN:5:result_uint16x4 [] = { fff0, fff2, fff0, fff2, } +VPMIN:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VPMIN:7:result_uint64x1 [] = { 3333333333333333, } +VPMIN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPMIN:10:result_float32x2 [] = { c1800000, c1800000, } +VPMIN:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMIN:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMIN:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VPMIN:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMIN:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMIN:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VPMIN:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMIN:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHRUN_N (negative input) cumulative saturation output: +VQSHRUN_N:0:vqshrun_n_s16 Neon cumulative saturation 1 +VQSHRUN_N:1:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:2:vqshrun_n_s64 Neon cumulative saturation 1 + +VQSHRUN_N (negative input) output: +VQSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:4:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:5:result_int32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:6:result_int64x1 [] = { 3333333333333333, } +VQSHRUN_N:7:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHRUN_N:8:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:9:result_uint32x2 [] = { 0, 0, } +VQSHRUN_N:10:result_uint64x1 [] = { 3333333333333333, } +VQSHRUN_N:11:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:12:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:13:result_float32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:16:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:17:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHRUN_N (check cumulative saturation) cumulative saturation output: +VQSHRUN_N:25:vqshrun_n_s16 Neon cumulative saturation 1 +VQSHRUN_N:26:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:27:vqshrun_n_s64 Neon cumulative saturation 1 + +VQSHRUN_N (check cumulative saturation) output: +VQSHRUN_N:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:30:result_int32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:31:result_int64x1 [] = { 3333333333333333, } +VQSHRUN_N:32:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRUN_N:33:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRUN_N:34:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRUN_N:35:result_uint64x1 [] = { 3333333333333333, } +VQSHRUN_N:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:38:result_float32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:39:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:40:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:41:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:42:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:43:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:44:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:45:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:46:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:47:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:48:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:49:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQSHRUN_N cumulative saturation output: +VQSHRUN_N:50:vqshrun_n_s16 Neon cumulative saturation 0 +VQSHRUN_N:51:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:52:vqshrun_n_s64 Neon cumulative saturation 0 + +VQSHRUN_N output: +VQSHRUN_N:53:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:54:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:55:result_int32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:56:result_int64x1 [] = { 3333333333333333, } +VQSHRUN_N:57:result_uint8x8 [] = { 48, 48, 48, 48, 48, 48, 48, 48, } +VQSHRUN_N:58:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:59:result_uint32x2 [] = { deadbe, deadbe, } +VQSHRUN_N:60:result_uint64x1 [] = { 3333333333333333, } +VQSHRUN_N:61:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:62:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:63:result_float32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:64:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:65:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:66:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:67:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:68:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:69:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:70:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:71:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:72:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:73:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:74:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHRUN_N (negative input) cumulative saturation output: +VQRSHRUN_N:0:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:1:vqrshrun_n_s32 Neon cumulative saturation 0 +VQRSHRUN_N:2:vqrshrun_n_s64 Neon cumulative saturation 1 + +VQRSHRUN_N (negative input) output: +VQRSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:4:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:5:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:6:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:7:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHRUN_N:8:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:9:result_uint32x2 [] = { 0, 0, } +VQRSHRUN_N:10:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:11:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:12:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:13:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:16:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:17:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHRUN_N (check cumulative saturation: shift by 1) cumulative saturation output: +VQRSHRUN_N:25:vqrshrun_n_s16 Neon cumulative saturation 1 +VQRSHRUN_N:26:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:27:vqrshrun_n_s64 Neon cumulative saturation 1 + +VQRSHRUN_N (check cumulative saturation: shift by 1) output: +VQRSHRUN_N:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:30:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:31:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:32:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRUN_N:33:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRUN_N:34:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRUN_N:35:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:38:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:39:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:40:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:41:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:42:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:43:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:44:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:45:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:46:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:47:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:48:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:49:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) cumulative saturation output: +VQRSHRUN_N:50:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:51:vqrshrun_n_s32 Neon cumulative saturation 0 +VQRSHRUN_N:52:vqrshrun_n_s64 Neon cumulative saturation 0 + +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) output: +VQRSHRUN_N:53:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:54:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:55:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:56:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:57:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VQRSHRUN_N:58:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VQRSHRUN_N:59:result_uint32x2 [] = { 80000000, 80000000, } +VQRSHRUN_N:60:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:61:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:62:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:63:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:64:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:65:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:66:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:67:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:68:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:69:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:70:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:71:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:72:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:73:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:74:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) cumulative saturation output: +VQRSHRUN_N:75:vqrshrun_n_s16 Neon cumulative saturation 1 +VQRSHRUN_N:76:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:77:vqrshrun_n_s64 Neon cumulative saturation 1 + +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) output: +VQRSHRUN_N:78:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:79:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:80:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:81:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:82:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHRUN_N:83:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:84:result_uint32x2 [] = { 0, 0, } +VQRSHRUN_N:85:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:86:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:87:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:88:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:89:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:90:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:91:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:92:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:93:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:94:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:95:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:96:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:97:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:98:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:99:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VQRSHRUN_N cumulative saturation output: +VQRSHRUN_N:100:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:101:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:102:vqrshrun_n_s64 Neon cumulative saturation 0 + +VQRSHRUN_N output: +VQRSHRUN_N:103:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:104:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:105:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:106:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:107:result_uint8x8 [] = { 49, 49, 49, 49, 49, 49, 49, 49, } +VQRSHRUN_N:108:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:109:result_uint32x2 [] = { deadbf, deadbf, } +VQRSHRUN_N:110:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:111:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:112:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:113:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:114:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:115:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:116:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:117:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:118:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:119:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:120:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:121:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:122:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:123:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:124:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VST2_LANE/VST2Q_LANE chunk 0 output: +VST2_LANE/VST2Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, 0, 0, } +VST2_LANE/VST2Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VST2_LANE/VST2Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:4:result_uint8x8 [] = { f0, f1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:5:result_uint16x4 [] = { fff0, fff1, 0, 0, } +VST2_LANE/VST2Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VST2_LANE/VST2Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:8:result_poly8x8 [] = { f0, f1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:9:result_poly16x4 [] = { fff0, fff1, 0, 0, } +VST2_LANE/VST2Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VST2_LANE/VST2Q_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:12:result_int16x8 [] = { fffffff0, fffffff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:13:result_int32x4 [] = { fffffff0, fffffff1, 0, 0, } +VST2_LANE/VST2Q_LANE:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:16:result_uint16x8 [] = { fff0, fff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:17:result_uint32x4 [] = { fffffff0, fffffff1, 0, 0, } +VST2_LANE/VST2Q_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:20:result_poly16x8 [] = { fff0, fff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:21:result_float32x4 [] = { c1800000, c1700000, 0, 0, } + +VST2_LANE/VST2Q_LANE chunk 1 output: +VST2_LANE/VST2Q_LANE:22:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:23:result_int16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:24:result_int32x2 [] = { 0, 0, } +VST2_LANE/VST2Q_LANE:25:result_int64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:26:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:27:result_uint16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:28:result_uint32x2 [] = { 0, 0, } +VST2_LANE/VST2Q_LANE:29:result_uint64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:30:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:31:result_poly16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:32:result_float32x2 [] = { 0, 0, } +VST2_LANE/VST2Q_LANE:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:34:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:35:result_int32x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:38:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:39:result_uint32x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:42:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:43:result_float32x4 [] = { 0, 0, 0, 0, } + +VST3_LANE/VST3Q_LANE chunk 0 output: +VST3_LANE/VST3Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, 0, } +VST3_LANE/VST3Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VST3_LANE/VST3Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:4:result_uint8x8 [] = { f0, f1, f2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:5:result_uint16x4 [] = { fff0, fff1, fff2, 0, } +VST3_LANE/VST3Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VST3_LANE/VST3Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:8:result_poly8x8 [] = { f0, f1, f2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:9:result_poly16x4 [] = { fff0, fff1, fff2, 0, } +VST3_LANE/VST3Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VST3_LANE/VST3Q_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:13:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, 0, } +VST3_LANE/VST3Q_LANE:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:16:result_uint16x8 [] = { fff0, fff1, fff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, 0, } +VST3_LANE/VST3Q_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:20:result_poly16x8 [] = { fff0, fff1, fff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:21:result_float32x4 [] = { c1800000, c1700000, c1600000, 0, } + +VST3_LANE/VST3Q_LANE chunk 1 output: +VST3_LANE/VST3Q_LANE:22:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:23:result_int16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:24:result_int32x2 [] = { fffffff2, 0, } +VST3_LANE/VST3Q_LANE:25:result_int64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:26:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:27:result_uint16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:28:result_uint32x2 [] = { fffffff2, 0, } +VST3_LANE/VST3Q_LANE:29:result_uint64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:30:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:31:result_poly16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:32:result_float32x2 [] = { c1600000, 0, } +VST3_LANE/VST3Q_LANE:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:34:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:35:result_int32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:38:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:39:result_uint32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:42:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:43:result_float32x4 [] = { 0, 0, 0, 0, } + +VST3_LANE/VST3Q_LANE chunk 2 output: +VST3_LANE/VST3Q_LANE:44:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:45:result_int16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:46:result_int32x2 [] = { 0, 0, } +VST3_LANE/VST3Q_LANE:47:result_int64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:48:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:49:result_uint16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:50:result_uint32x2 [] = { 0, 0, } +VST3_LANE/VST3Q_LANE:51:result_uint64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:52:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:53:result_poly16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:54:result_float32x2 [] = { 0, 0, } +VST3_LANE/VST3Q_LANE:55:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:56:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:57:result_int32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:58:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:59:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:60:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:61:result_uint32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:62:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:63:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:64:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:65:result_float32x4 [] = { 0, 0, 0, 0, } + +VST4_LANE/VST4Q_LANE chunk 0 output: +VST4_LANE/VST4Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VST4_LANE/VST4Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:4:result_uint8x8 [] = { f0, f1, f2, f3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VST4_LANE/VST4Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VST4_LANE/VST4Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:8:result_poly8x8 [] = { f0, f1, f2, f3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VST4_LANE/VST4Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VST4_LANE/VST4Q_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:12:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:13:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:16:result_uint16x8 [] = { fff0, fff1, fff2, fff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:17:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:20:result_poly16x8 [] = { fff0, fff1, fff2, fff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:21:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } + +VST4_LANE/VST4Q_LANE chunk 1 output: +VST4_LANE/VST4Q_LANE:22:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:23:result_int16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:24:result_int32x2 [] = { fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:25:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:26:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:27:result_uint16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:28:result_uint32x2 [] = { fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:29:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:30:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:31:result_poly16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:32:result_float32x2 [] = { c1600000, c1500000, } +VST4_LANE/VST4Q_LANE:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:34:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:35:result_int32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:38:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:39:result_uint32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:41:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:42:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:43:result_float32x4 [] = { 0, 0, 0, 0, } + +VST4_LANE/VST4Q_LANE chunk 2 output: +VST4_LANE/VST4Q_LANE:44:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:45:result_int16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:46:result_int32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:47:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:48:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:49:result_uint16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:50:result_uint32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:51:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:52:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:53:result_poly16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:54:result_float32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:55:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:56:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:57:result_int32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:58:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:59:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:60:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:61:result_uint32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:62:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:63:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:64:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:65:result_float32x4 [] = { 0, 0, 0, 0, } + +VST4_LANE/VST4Q_LANE chunk 3 output: +VST4_LANE/VST4Q_LANE:66:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:67:result_int16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:68:result_int32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:69:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:70:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:71:result_uint16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:72:result_uint32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:73:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:74:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:75:result_poly16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:76:result_float32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:77:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:78:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:79:result_int32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:80:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:81:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:82:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:83:result_uint32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:84:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:85:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:86:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:87:result_float32x4 [] = { 0, 0, 0, 0, } + +VTBL1 output: +VTBL1:0:result_int8x8 [] = { 0, fffffff2, fffffff2, fffffff2, 0, 0, fffffff2, fffffff2, } +VTBL1:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL1:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL1:3:result_int64x1 [] = { 3333333333333333, } +VTBL1:4:result_uint8x8 [] = { 0, f3, f3, f3, 0, 0, f3, f3, } +VTBL1:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL1:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL1:7:result_uint64x1 [] = { 3333333333333333, } +VTBL1:8:result_poly8x8 [] = { 0, f3, f3, f3, 0, 0, f3, f3, } +VTBL1:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL1:10:result_float32x2 [] = { 33333333, 33333333, } +VTBL1:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL1:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL1:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL1:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL1:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL1:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL1:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL1:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL1:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL1:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL1:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VTBL2 output: +VTBL2:0:result_int8x8 [] = { fffffff6, fffffff3, fffffff3, fffffff3, 0, 0, fffffff3, fffffff3, } +VTBL2:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL2:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL2:3:result_int64x1 [] = { 3333333333333333, } +VTBL2:4:result_uint8x8 [] = { f6, f5, f5, f5, 0, 0, f5, f5, } +VTBL2:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL2:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL2:7:result_uint64x1 [] = { 3333333333333333, } +VTBL2:8:result_poly8x8 [] = { f6, f5, f5, f5, 0, 0, f5, f5, } +VTBL2:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL2:10:result_float32x2 [] = { 33333333, 33333333, } +VTBL2:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL2:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL2:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL2:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL2:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL2:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL2:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL2:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL2:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL2:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL2:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VTBL3 output: +VTBL3:0:result_int8x8 [] = { fffffff8, fffffff4, fffffff4, fffffff4, ffffffff, 0, fffffff4, fffffff4, } +VTBL3:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL3:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL3:3:result_int64x1 [] = { 3333333333333333, } +VTBL3:4:result_uint8x8 [] = { f8, f7, f7, f7, ff, 0, f7, f7, } +VTBL3:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL3:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL3:7:result_uint64x1 [] = { 3333333333333333, } +VTBL3:8:result_poly8x8 [] = { f8, f7, f7, f7, ff, 0, f7, f7, } +VTBL3:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL3:10:result_float32x2 [] = { 33333333, 33333333, } +VTBL3:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL3:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL3:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL3:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL3:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL3:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL3:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL3:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL3:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL3:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL3:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VTBL4 output: +VTBL4:0:result_int8x8 [] = { fffffffa, fffffff5, fffffff5, fffffff5, 3, 0, fffffff5, fffffff5, } +VTBL4:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL4:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL4:3:result_int64x1 [] = { 3333333333333333, } +VTBL4:4:result_uint8x8 [] = { fa, f9, f9, f9, 3, 0, f9, f9, } +VTBL4:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL4:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL4:7:result_uint64x1 [] = { 3333333333333333, } +VTBL4:8:result_poly8x8 [] = { fa, f9, f9, f9, 3, 0, f9, f9, } +VTBL4:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL4:10:result_float32x2 [] = { 33333333, 33333333, } +VTBL4:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL4:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL4:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL4:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL4:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL4:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL4:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL4:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL4:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL4:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL4:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VTBX1 output: +VTBX1:0:result_int8x8 [] = { 33, fffffff2, fffffff2, fffffff2, 33, 33, fffffff2, fffffff2, } +VTBX1:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX1:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX1:3:result_int64x1 [] = { 3333333333333333, } +VTBX1:4:result_uint8x8 [] = { cc, f3, f3, f3, cc, cc, f3, f3, } +VTBX1:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX1:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX1:7:result_uint64x1 [] = { 3333333333333333, } +VTBX1:8:result_poly8x8 [] = { cc, f3, f3, f3, cc, cc, f3, f3, } +VTBX1:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX1:10:result_float32x2 [] = { 33333333, 33333333, } +VTBX1:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX1:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX1:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX1:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX1:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX1:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX1:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX1:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX1:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX1:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX1:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VTBX2 output: +VTBX2:0:result_int8x8 [] = { fffffff6, fffffff3, fffffff3, fffffff3, 33, 33, fffffff3, fffffff3, } +VTBX2:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX2:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX2:3:result_int64x1 [] = { 3333333333333333, } +VTBX2:4:result_uint8x8 [] = { f6, f5, f5, f5, cc, cc, f5, f5, } +VTBX2:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX2:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX2:7:result_uint64x1 [] = { 3333333333333333, } +VTBX2:8:result_poly8x8 [] = { f6, f5, f5, f5, cc, cc, f5, f5, } +VTBX2:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX2:10:result_float32x2 [] = { 33333333, 33333333, } +VTBX2:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX2:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX2:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX2:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX2:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX2:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX2:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX2:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX2:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX2:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX2:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VTBX3 output: +VTBX3:0:result_int8x8 [] = { fffffff8, fffffff4, fffffff4, fffffff4, ffffffff, 33, fffffff4, fffffff4, } +VTBX3:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX3:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX3:3:result_int64x1 [] = { 3333333333333333, } +VTBX3:4:result_uint8x8 [] = { f8, f7, f7, f7, ff, cc, f7, f7, } +VTBX3:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX3:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX3:7:result_uint64x1 [] = { 3333333333333333, } +VTBX3:8:result_poly8x8 [] = { f8, f7, f7, f7, ff, cc, f7, f7, } +VTBX3:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX3:10:result_float32x2 [] = { 33333333, 33333333, } +VTBX3:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX3:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX3:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX3:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX3:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX3:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX3:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX3:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX3:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX3:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX3:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VTBX4 output: +VTBX4:0:result_int8x8 [] = { fffffffa, fffffff5, fffffff5, fffffff5, 3, 33, fffffff5, fffffff5, } +VTBX4:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX4:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX4:3:result_int64x1 [] = { 3333333333333333, } +VTBX4:4:result_uint8x8 [] = { fa, f9, f9, f9, 3, cc, f9, f9, } +VTBX4:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX4:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX4:7:result_uint64x1 [] = { 3333333333333333, } +VTBX4:8:result_poly8x8 [] = { fa, f9, f9, f9, 3, cc, f9, f9, } +VTBX4:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX4:10:result_float32x2 [] = { 33333333, 33333333, } +VTBX4:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX4:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX4:13:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX4:14:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX4:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX4:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX4:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX4:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX4:19:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX4:20:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX4:21:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } + +VRECPE/VRECPEQ (positive input) output: +VRECPE/VRECPEQ:0:result_uint32x2 [] = { ffffffff, ffffffff, } +VRECPE/VRECPEQ:1:result_uint32x4 [] = { bf000000, bf000000, bf000000, bf000000, } +VRECPE/VRECPEQ:2:result_float32x2 [] = { 3f068000, 3f068000, } +VRECPE/VRECPEQ:3:result_float32x4 [] = { 3c030000, 3c030000, 3c030000, 3c030000, } + +VRECPE/VRECPEQ (negative input) output: +VRECPE/VRECPEQ:4:result_uint32x2 [] = { 80000000, 80000000, } +VRECPE/VRECPEQ:5:result_uint32x4 [] = { ee800000, ee800000, ee800000, ee800000, } +VRECPE/VRECPEQ:6:result_float32x2 [] = { bdcc8000, bdcc8000, } +VRECPE/VRECPEQ:7:result_float32x4 [] = { bc030000, bc030000, bc030000, bc030000, } + +VRECPE/VRECPEQ FP special (NaN, infinity) output: +VRECPE/VRECPEQ:8:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRECPE/VRECPEQ:9:result_float32x4 [] = { 0, 0, 0, 0, } + +VRECPE/VRECPEQ FP special (zero, large value) output: +VRECPE/VRECPEQ:10:result_float32x2 [] = { 7f800000, 7f800000, } +VRECPE/VRECPEQ:11:result_float32x4 [] = { 0, 0, 0, 0, } + +VRECPE/VRECPEQ FP special (-0, -infinity) output: +VRECPE/VRECPEQ:12:result_float32x2 [] = { ff800000, ff800000, } +VRECPE/VRECPEQ:13:result_float32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VRECPE/VRECPEQ FP special (large negative value) output: +VRECPE/VRECPEQ:14:result_float32x2 [] = { 80000000, 80000000, } + +VRSQRTE/VRSQRTEQ output: +VRSQRTE/VRSQRTEQ:0:result_uint32x2 [] = { ffffffff, ffffffff, } +VRSQRTE/VRSQRTEQ:1:result_uint32x4 [] = { 9c800000, 9c800000, 9c800000, 9c800000, } +VRSQRTE/VRSQRTEQ:2:result_float32x2 [] = { 3e498000, 3e498000, } +VRSQRTE/VRSQRTEQ:3:result_float32x4 [] = { 3e700000, 3e700000, 3e700000, 3e700000, } + +VRSQRTE/VRSQRTEQ (2) output: +VRSQRTE/VRSQRTEQ:4:result_uint32x2 [] = { 80000000, 80000000, } +VRSQRTE/VRSQRTEQ:5:result_uint32x4 [] = { ae800000, ae800000, ae800000, ae800000, } + +VRSQRTE/VRSQRTEQ (3) output: +VRSQRTE/VRSQRTEQ:6:result_uint32x2 [] = { b4800000, b4800000, } +VRSQRTE/VRSQRTEQ:7:result_uint32x4 [] = { ed000000, ed000000, ed000000, ed000000, } + +VRSQRTE/VRSQRTEQ FP special (NaN, 0) output: +VRSQRTE/VRSQRTEQ:8:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRSQRTE/VRSQRTEQ:9:result_float32x4 [] = { 7f800000, 7f800000, 7f800000, 7f800000, } + +VRSQRTE/VRSQRTEQ FP special (negative, infinity) output: +VRSQRTE/VRSQRTEQ:10:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRSQRTE/VRSQRTEQ:11:result_float32x4 [] = { 0, 0, 0, 0, } + +VRSQRTE/VRSQRTEQ FP special (-0, -infinity) output: +VRSQRTE/VRSQRTEQ:12:result_float32x2 [] = { ff800000, ff800000, } +VRSQRTE/VRSQRTEQ:13:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } + +VCAGE/VCAGEQ output: +VCAGE/VCAGEQ:0:result_uint32x2 [] = { ffffffff, 0, } +VCAGE/VCAGEQ:1:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } + +VCAGE/VCAGEQ output: +VCAGE/VCAGEQ:2:result_uint32x2 [] = { ffffffff, ffffffff, } +VCAGE/VCAGEQ:3:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } + +VCALE/VCALEQ output: +VCALE/VCALEQ:0:result_uint32x2 [] = { ffffffff, ffffffff, } +VCALE/VCALEQ:1:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } + +VCALE/VCALEQ output: +VCALE/VCALEQ:2:result_uint32x2 [] = { 0, 0, } +VCALE/VCALEQ:3:result_uint32x4 [] = { 0, 0, 0, 0, } + +VCAGT/VCAGTQ output: +VCAGT/VCAGTQ:0:result_uint32x2 [] = { 0, 0, } +VCAGT/VCAGTQ:1:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } + +VCAGT/VCAGTQ output: +VCAGT/VCAGTQ:2:result_uint32x2 [] = { ffffffff, ffffffff, } +VCAGT/VCAGTQ:3:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } + +VCALT/VCALTQ output: +VCALT/VCALTQ:0:result_uint32x2 [] = { 0, ffffffff, } +VCALT/VCALTQ:1:result_uint32x4 [] = { 0, 0, 0, ffffffff, } + +VCALT/VCALTQ output: +VCALT/VCALTQ:2:result_uint32x2 [] = { 0, 0, } +VCALT/VCALTQ:3:result_uint32x4 [] = { 0, 0, 0, 0, } + +VCVT/VCVTQ output: +VCVT/VCVTQ:0:result_float32x2 [] = { c1800000, c1700000, } +VCVT/VCVTQ:1:result_float32x2 [] = { 4f800000, 4f800000, } +VCVT/VCVTQ:2:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VCVT/VCVTQ:3:result_float32x4 [] = { 4f800000, 4f800000, 4f800000, 4f800000, } +VCVT/VCVTQ:4:result_int32x2 [] = { fffffff1, 5, } +VCVT/VCVTQ:5:result_uint32x2 [] = { 0, 5, } +VCVT/VCVTQ:6:result_int32x4 [] = { 0, 0, f, fffffff1, } +VCVT/VCVTQ:7:result_uint32x4 [] = { 0, 0, f, 0, } + +VCVT_N/VCVTQ_N output: +VCVT_N/VCVTQ_N:8:result_float32x2 [] = { c0800000, c0700000, } +VCVT_N/VCVTQ_N:9:result_float32x2 [] = { 4c000000, 4c000000, } +VCVT_N/VCVTQ_N:10:result_float32x4 [] = { b2800000, b2700000, b2600000, b2500000, } +VCVT_N/VCVTQ_N:11:result_float32x4 [] = { 49800000, 49800000, 49800000, 49800000, } +VCVT_N/VCVTQ_N:12:result_int32x2 [] = { ff0b3333, 54cccd, } +VCVT_N/VCVTQ_N:13:result_uint32x2 [] = { 0, 15, } +VCVT_N/VCVTQ_N:14:result_int32x4 [] = { 0, 0, 1e3d7, fffe1c29, } +VCVT_N/VCVTQ_N:15:result_uint32x4 [] = { 0, 0, 1e, 0, } + +VCVT/VCVTQ (check rounding) output: +VCVT/VCVTQ:16:result_int32x2 [] = { a, a, } +VCVT/VCVTQ:17:result_uint32x2 [] = { a, a, } +VCVT/VCVTQ:18:result_int32x4 [] = { 7d, 7d, 7d, 7d, } +VCVT/VCVTQ:19:result_uint32x4 [] = { 7d, 7d, 7d, 7d, } + +VCVT_N/VCVTQ_N (check rounding) output: +VCVT_N/VCVTQ_N:20:result_int32x2 [] = { a66666, a66666, } +VCVT_N/VCVTQ_N:21:result_uint32x2 [] = { a66666, a66666, } +VCVT_N/VCVTQ_N:22:result_int32x4 [] = { fbccc, fbccc, fbccc, fbccc, } +VCVT_N/VCVTQ_N:23:result_uint32x4 [] = { fbccc, fbccc, fbccc, fbccc, } + +VCVT_N/VCVTQ_N (check saturation) output: +VCVT_N/VCVTQ_N:24:result_int32x2 [] = { 7fffffff, 7fffffff, } +VCVT_N/VCVTQ_N:25:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } + +VRECPS/VRECPSQ output: +VRECPS/VRECPSQ:0:result_float32x2 [] = { c2e19eb7, c2e19eb7, } +VRECPS/VRECPSQ:1:result_float32x4 [] = { c1db851f, c1db851f, c1db851f, c1db851f, } + +VRECPS/VRECPSQ FP special (NAN) and normal values output: +VRECPS/VRECPSQ:2:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRECPS/VRECPSQ:3:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } + +VRECPS/VRECPSQ FP special (infinity, 0) and normal values output: +VRECPS/VRECPSQ:4:result_float32x2 [] = { ff800000, ff800000, } +VRECPS/VRECPSQ:5:result_float32x4 [] = { 40000000, 40000000, 40000000, 40000000, } + +VRECPS/VRECPSQ FP special (infinity, 0) output: +VRECPS/VRECPSQ:6:result_float32x2 [] = { 40000000, 40000000, } +VRECPS/VRECPSQ:7:result_float32x4 [] = { 40000000, 40000000, 40000000, 40000000, } + +VRSQRTS/VRSQRTSQ output: +VRSQRTS/VRSQRTSQ:0:result_float32x2 [] = { c2796b84, c2796b84, } +VRSQRTS/VRSQRTSQ:1:result_float32x4 [] = { c0e4a3d8, c0e4a3d8, c0e4a3d8, c0e4a3d8, } + +VRSQRTS/VRSQRTSQ FP special (NAN) and normal values output: +VRSQRTS/VRSQRTSQ:2:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRSQRTS/VRSQRTSQ:3:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } + +VRSQRTS/VRSQRTSQ FP special (infinity, 0) and normal values output: +VRSQRTS/VRSQRTSQ:4:result_float32x2 [] = { ff800000, ff800000, } +VRSQRTS/VRSQRTSQ:5:result_float32x4 [] = { 3fc00000, 3fc00000, 3fc00000, 3fc00000, } + +VRSQRTS/VRSQRTSQ FP special (infinity, 0) output: +VRSQRTS/VRSQRTSQ:6:result_float32x2 [] = { 3fc00000, 3fc00000, } +VRSQRTS/VRSQRTSQ:7:result_float32x4 [] = { 3fc00000, 3fc00000, 3fc00000, 3fc00000, } diff --git a/ref-rvct-neon.txt b/ref-rvct-neon.txt new file mode 100644 index 0000000..0dac13f --- /dev/null +++ b/ref-rvct-neon.txt @@ -0,0 +1,7900 @@ + +VLD1/VLD1Q output: +VLD1/VLD1Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD1/VLD1Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD1/VLD1Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD1/VLD1Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD1/VLD1Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD1/VLD1Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD1/VLD1Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD1/VLD1Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD1/VLD1Q:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD1/VLD1Q:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD1/VLD1Q:10:result_float32x2 [] = { c1800000, c1700000, } +VLD1/VLD1Q:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD1/VLD1Q:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD1/VLD1Q:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD1/VLD1Q:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD1/VLD1Q:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VLD1/VLD1Q:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD1/VLD1Q:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD1/VLD1Q:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD1/VLD1Q:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VLD1/VLD1Q:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD1/VLD1Q:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD1/VLD1Q:22:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VLD1/VLD1Q:23:result_float16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } + +VADD/VADDQ output: +VADD/VADDQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, } +VADD/VADDQ:1:result_int16x4 [] = { ffffffec, ffffffed, ffffffee, ffffffef, } +VADD/VADDQ:2:result_int32x2 [] = { fffffff3, fffffff4, } +VADD/VADDQ:3:result_int64x1 [] = { 54, } +VADD/VADDQ:4:result_uint8x8 [] = { 4, 5, 6, 7, 8, 9, a, b, } +VADD/VADDQ:5:result_uint16x4 [] = { e, f, 10, 11, } +VADD/VADDQ:6:result_uint32x2 [] = { 18, 19, } +VADD/VADDQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VADD/VADDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADD/VADDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VADD/VADDQ:10:result_float32x2 [] = { 33333333, 33333333, } +VADD/VADDQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VADD/VADDQ:12:result_int8x16 [] = { ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, ffffffeb, ffffffec, ffffffed, ffffffee, ffffffef, fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, } +VADD/VADDQ:13:result_int16x8 [] = { ffffffdc, ffffffdd, ffffffde, ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, } +VADD/VADDQ:14:result_int32x4 [] = { ffffffd2, ffffffd3, ffffffd4, ffffffd5, } +VADD/VADDQ:15:result_int64x2 [] = { 8, 9, } +VADD/VADDQ:16:result_uint8x16 [] = { fc, fd, fe, ff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, } +VADD/VADDQ:17:result_uint16x8 [] = { fff3, fff4, fff5, fff6, fff7, fff8, fff9, fffa, } +VADD/VADDQ:18:result_uint32x4 [] = { 27, 28, 29, 2a, } +VADD/VADDQ:19:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff4, } +VADD/VADDQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADD/VADDQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADD/VADDQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADD/VADDQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +float32: +VADD/VADDQ:24:result_float32x2 [] = { 40d9999a, 40d9999a, } +VADD/VADDQ:25:result_float32x4 [] = { 41100000, 41100000, 41100000, 41100000, } + +VLD1_LANE/VLD1_LANEQ output: +VLD1_LANE/VLD1_LANEQ:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, ffffffaa, } +VLD1_LANE/VLD1_LANEQ:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:2:result_int32x2 [] = { aaaaaaaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:3:result_int64x1 [] = { fffffffffffffff0, } +VLD1_LANE/VLD1_LANEQ:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, f0, } +VLD1_LANE/VLD1_LANEQ:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, fff0, } +VLD1_LANE/VLD1_LANEQ:6:result_uint32x2 [] = { aaaaaaaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD1_LANE/VLD1_LANEQ:8:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, f0, } +VLD1_LANE/VLD1_LANEQ:9:result_poly16x4 [] = { aaaa, aaaa, aaaa, fff0, } +VLD1_LANE/VLD1_LANEQ:10:result_float32x2 [] = { aaaaaaaa, c1800000, } +VLD1_LANE/VLD1_LANEQ:11:result_float16x4 [] = { aaaa, aaaa, cc00, aaaa, } +VLD1_LANE/VLD1_LANEQ:12:result_int8x16 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:13:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, ffffaaaa, ffffaaaa, } +VLD1_LANE/VLD1_LANEQ:14:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, aaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:15:result_int64x2 [] = { aaaaaaaaaaaaaaaa, fffffffffffffff0, } +VLD1_LANE/VLD1_LANEQ:16:result_uint8x16 [] = { aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, f0, aa, aa, aa, } +VLD1_LANE/VLD1_LANEQ:17:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, aaaa, } +VLD1_LANE/VLD1_LANEQ:18:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, aaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:19:result_uint64x2 [] = { fffffffffffffff0, aaaaaaaaaaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:20:result_poly8x16 [] = { aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, f0, aa, aa, aa, } +VLD1_LANE/VLD1_LANEQ:21:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, aaaa, } +VLD1_LANE/VLD1_LANEQ:22:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, c1800000, aaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:23:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, cc00, aaaa, aaaa, } + +VLD1_DUP/VLD1_DUPQ output: +VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:8:result_poly8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:9:result_poly16x4 [] = { fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:10:result_float32x2 [] = { c1800000, c1800000, } +VLD1_DUP/VLD1_DUPQ:11:result_float16x4 [] = { cc00, cc00, cc00, cc00, } +VLD1_DUP/VLD1_DUPQ:12:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:13:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:14:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:16:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:17:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:18:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:20:result_poly8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:21:result_poly16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:22:result_float32x4 [] = { c1800000, c1800000, c1800000, c1800000, } +VLD1_DUP/VLD1_DUPQ:23:result_float16x8 [] = { cc00, cc00, cc00, cc00, cc00, cc00, cc00, cc00, } + +VLD1_DUP/VLD1_DUPQ output: +VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:8:result_poly8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:9:result_poly16x4 [] = { fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:10:result_float32x2 [] = { c1700000, c1700000, } +VLD1_DUP/VLD1_DUPQ:11:result_float16x4 [] = { cb80, cb80, cb80, cb80, } +VLD1_DUP/VLD1_DUPQ:12:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:13:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:14:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:15:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:16:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:17:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:18:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:19:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:20:result_poly8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:21:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:22:result_float32x4 [] = { c1700000, c1700000, c1700000, c1700000, } +VLD1_DUP/VLD1_DUPQ:23:result_float16x8 [] = { cb80, cb80, cb80, cb80, cb80, cb80, cb80, cb80, } + +VLD1_DUP/VLD1_DUPQ output: +VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:8:result_poly8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:9:result_poly16x4 [] = { fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:10:result_float32x2 [] = { c1600000, c1600000, } +VLD1_DUP/VLD1_DUPQ:11:result_float16x4 [] = { cb00, cb00, cb00, cb00, } +VLD1_DUP/VLD1_DUPQ:12:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:13:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:14:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:15:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:16:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:17:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:18:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:19:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:20:result_poly8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:21:result_poly16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:22:result_float32x4 [] = { c1600000, c1600000, c1600000, c1600000, } +VLD1_DUP/VLD1_DUPQ:23:result_float16x8 [] = { cb00, cb00, cb00, cb00, cb00, cb00, cb00, cb00, } + +VDUP/VDUPQ output: +VDUP/VDUPQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff0, } +VDUP/VDUPQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VDUP/VDUPQ:8:result_poly8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:9:result_poly16x4 [] = { fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:10:result_float32x2 [] = { c1800000, c1800000, } +VDUP/VDUPQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VDUP/VDUPQ:12:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:13:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:14:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP/VDUPQ:16:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:17:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:18:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP/VDUPQ:20:result_poly8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:21:result_poly16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:22:result_float32x4 [] = { c1800000, c1800000, c1800000, c1800000, } +VDUP/VDUPQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VDUP/VDUPQ output: +VDUP/VDUPQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff1, } +VDUP/VDUPQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff1, } +VDUP/VDUPQ:8:result_poly8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:9:result_poly16x4 [] = { fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:10:result_float32x2 [] = { c1700000, c1700000, } +VDUP/VDUPQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VDUP/VDUPQ:12:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:13:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:14:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:15:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VDUP/VDUPQ:16:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:17:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:18:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:19:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VDUP/VDUPQ:20:result_poly8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:21:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:22:result_float32x4 [] = { c1700000, c1700000, c1700000, c1700000, } +VDUP/VDUPQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VDUP/VDUPQ output: +VDUP/VDUPQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:2:result_int32x2 [] = { fffffff2, fffffff2, } +VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff2, } +VDUP/VDUPQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff2, fffffff2, } +VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VDUP/VDUPQ:8:result_poly8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:9:result_poly16x4 [] = { fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:10:result_float32x2 [] = { c1600000, c1600000, } +VDUP/VDUPQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VDUP/VDUPQ:12:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:13:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:14:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:15:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VDUP/VDUPQ:16:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:17:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:18:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:19:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VDUP/VDUPQ:20:result_poly8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:21:result_poly16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:22:result_float32x4 [] = { c1600000, c1600000, c1600000, c1600000, } +VDUP/VDUPQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMOV/VMOVQ output: +VMOV/VMOVQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff0, } +VMOV/VMOVQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VMOV/VMOVQ:8:result_poly8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:9:result_poly16x4 [] = { fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:10:result_float32x2 [] = { c1800000, c1800000, } +VMOV/VMOVQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMOV/VMOVQ:12:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:13:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:14:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VMOV/VMOVQ:16:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:17:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:18:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VMOV/VMOVQ:20:result_poly8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:21:result_poly16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:22:result_float32x4 [] = { c1800000, c1800000, c1800000, c1800000, } +VMOV/VMOVQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMOV/VMOVQ output: +VMOV/VMOVQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff1, } +VMOV/VMOVQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff1, } +VMOV/VMOVQ:8:result_poly8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:9:result_poly16x4 [] = { fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:10:result_float32x2 [] = { c1700000, c1700000, } +VMOV/VMOVQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMOV/VMOVQ:12:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:13:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:14:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:15:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VMOV/VMOVQ:16:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:17:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:18:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:19:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VMOV/VMOVQ:20:result_poly8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:21:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:22:result_float32x4 [] = { c1700000, c1700000, c1700000, c1700000, } +VMOV/VMOVQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMOV/VMOVQ output: +VMOV/VMOVQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:2:result_int32x2 [] = { fffffff2, fffffff2, } +VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff2, } +VMOV/VMOVQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff2, fffffff2, } +VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VMOV/VMOVQ:8:result_poly8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:9:result_poly16x4 [] = { fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:10:result_float32x2 [] = { c1600000, c1600000, } +VMOV/VMOVQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMOV/VMOVQ:12:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:13:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:14:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:15:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VMOV/VMOVQ:16:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:17:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:18:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:19:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VMOV/VMOVQ:20:result_poly8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:21:result_poly16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:22:result_float32x4 [] = { c1600000, c1600000, c1600000, c1600000, } +VMOV/VMOVQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VGET_HIGH output: +VGET_HIGH:0:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VGET_HIGH:1:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VGET_HIGH:2:result_int32x2 [] = { fffffff2, fffffff3, } +VGET_HIGH:3:result_int64x1 [] = { fffffffffffffff1, } +VGET_HIGH:4:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VGET_HIGH:5:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VGET_HIGH:6:result_uint32x2 [] = { fffffff2, fffffff3, } +VGET_HIGH:7:result_uint64x1 [] = { fffffffffffffff1, } +VGET_HIGH:8:result_poly8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VGET_HIGH:9:result_poly16x4 [] = { fff4, fff5, fff6, fff7, } +VGET_HIGH:10:result_float32x2 [] = { c1600000, c1500000, } +VGET_HIGH:11:result_float16x4 [] = { ca00, c980, c900, c880, } +VGET_HIGH:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_HIGH:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_HIGH:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_HIGH:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_HIGH:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_HIGH:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_HIGH:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_HIGH:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_HIGH:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_HIGH:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_HIGH:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_HIGH:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VGET_LOW output: +VGET_LOW:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VGET_LOW:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VGET_LOW:2:result_int32x2 [] = { fffffff0, fffffff1, } +VGET_LOW:3:result_int64x1 [] = { fffffffffffffff0, } +VGET_LOW:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VGET_LOW:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VGET_LOW:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VGET_LOW:7:result_uint64x1 [] = { fffffffffffffff0, } +VGET_LOW:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VGET_LOW:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VGET_LOW:10:result_float32x2 [] = { c1800000, c1700000, } +VGET_LOW:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VGET_LOW:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_LOW:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_LOW:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_LOW:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_LOW:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_LOW:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_LOW:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_LOW:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_LOW:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_LOW:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_LOW:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_LOW:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL_LANE cumulative saturation output: +VQDMLAL_LANE:0:vqdmlal_lane_s16 Neon cumulative saturation 0 +VQDMLAL_LANE:1:vqdmlal_lane_s32 Neon cumulative saturation 0 + +VQDMLAL_LANE output: +VQDMLAL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:5:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL_LANE:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:16:result_int32x4 [] = { 7c1e, 7c1f, 7c20, 7c21, } +VQDMLAL_LANE:17:result_int64x2 [] = { 7c1e, 7c1f, } +VQDMLAL_LANE:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_LANE:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL_LANE (mul with input=0) cumulative saturation output: +VQDMLAL_LANE:26:vqdmlal_lane_s16 Neon cumulative saturation 0 +VQDMLAL_LANE:27:vqdmlal_lane_s32 Neon cumulative saturation 0 + +VQDMLAL_LANE (mul with input=0) output: +VQDMLAL_LANE:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:31:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:35:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL_LANE:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:42:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VQDMLAL_LANE:43:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQDMLAL_LANE:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_LANE:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMLAL_LANE:52:vqdmlal_lane_s16 Neon cumulative saturation 1 +VQDMLAL_LANE:53:vqdmlal_lane_s32 Neon cumulative saturation 1 + +VQDMLAL_LANE (check mul cumulative saturation) output: +VQDMLAL_LANE:54:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:55:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:56:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:57:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:58:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:59:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:60:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:61:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:62:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:63:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:64:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:65:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL_LANE:66:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:67:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:68:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2, } +VQDMLAL_LANE:69:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0, } +VQDMLAL_LANE:70:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:71:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:72:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:73:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_LANE:74:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:75:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:76:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:77:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL_LANE cumulative saturation output: +VQDMLSL_LANE:0:vqdmlsl_lane_s16 Neon cumulative saturation 0 +VQDMLSL_LANE:1:vqdmlsl_lane_s32 Neon cumulative saturation 0 + +VQDMLSL_LANE output: +VQDMLSL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:5:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL_LANE:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:16:result_int32x4 [] = { ffff83c2, ffff83c3, ffff83c4, ffff83c5, } +VQDMLSL_LANE:17:result_int64x2 [] = { ffffffffffff83c2, ffffffffffff83c3, } +VQDMLSL_LANE:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_LANE:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL_LANE (mul with input=0) cumulative saturation output: +VQDMLSL_LANE:26:vqdmlsl_lane_s16 Neon cumulative saturation 0 +VQDMLSL_LANE:27:vqdmlsl_lane_s32 Neon cumulative saturation 0 + +VQDMLSL_LANE (mul with input=0) output: +VQDMLSL_LANE:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:31:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:35:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL_LANE:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:42:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VQDMLSL_LANE:43:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQDMLSL_LANE:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_LANE:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMLSL_LANE:52:vqdmlsl_lane_s16 Neon cumulative saturation 1 +VQDMLSL_LANE:53:vqdmlsl_lane_s32 Neon cumulative saturation 1 + +VQDMLSL_LANE (check mul cumulative saturation) output: +VQDMLSL_LANE:54:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:55:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:56:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:57:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:58:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:59:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:60:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:61:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:62:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:63:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:64:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:65:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL_LANE:66:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:67:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:68:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQDMLSL_LANE:69:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQDMLSL_LANE:70:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:71:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:72:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:73:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_LANE:74:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:75:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:76:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:77:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL_N cumulative saturation output: +VQDMLAL_N:0:vqdmlal_n_s16 Neon cumulative saturation 0 +VQDMLAL_N:1:vqdmlal_n_s32 Neon cumulative saturation 0 + +VQDMLAL_N output: +VQDMLAL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:5:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_N:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_N:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL_N:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:16:result_int32x4 [] = { 1684, 1685, 1686, 1687, } +VQDMLAL_N:17:result_int64x2 [] = { 21ce, 21cf, } +VQDMLAL_N:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_N:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_N:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_N:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL_N (check mul cumulative saturation) cumulative saturation output: +VQDMLAL_N:26:vqdmlal_n_s16 Neon cumulative saturation 1 +VQDMLAL_N:27:vqdmlal_n_s32 Neon cumulative saturation 1 + +VQDMLAL_N (check mul cumulative saturation) output: +VQDMLAL_N:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:31:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_N:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:35:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_N:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL_N:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:42:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2, } +VQDMLAL_N:43:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0, } +VQDMLAL_N:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_N:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_N:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_N:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL_N cumulative saturation output: +VQDMLSL_N:0:vqdmlsl_n_s16 Neon cumulative saturation 0 +VQDMLSL_N:1:vqdmlsl_n_s32 Neon cumulative saturation 0 + +VQDMLSL_N output: +VQDMLSL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:5:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_N:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_N:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL_N:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:16:result_int32x4 [] = { ffffe95c, ffffe95d, ffffe95e, ffffe95f, } +VQDMLSL_N:17:result_int64x2 [] = { ffffffffffffde12, ffffffffffffde13, } +VQDMLSL_N:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_N:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_N:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_N:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL_N (check mul cumulative saturation) cumulative saturation output: +VQDMLSL_N:26:vqdmlsl_n_s16 Neon cumulative saturation 1 +VQDMLSL_N:27:vqdmlsl_n_s32 Neon cumulative saturation 1 + +VQDMLSL_N (check mul cumulative saturation) output: +VQDMLSL_N:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:31:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_N:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:35:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_N:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL_N:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:42:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQDMLSL_N:43:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQDMLSL_N:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_N:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_N:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_N:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VEXT/VEXTQ output: +VEXT/VEXTQ:0:result_int8x8 [] = { fffffff7, 11, 11, 11, 11, 11, 11, 11, } +VEXT/VEXTQ:1:result_int16x4 [] = { fffffff3, 22, 22, 22, } +VEXT/VEXTQ:2:result_int32x2 [] = { fffffff1, 33, } +VEXT/VEXTQ:3:result_int64x1 [] = { fffffffffffffff0, } +VEXT/VEXTQ:4:result_uint8x8 [] = { f6, f7, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:5:result_uint16x4 [] = { fff2, fff3, 66, 66, } +VEXT/VEXTQ:6:result_uint32x2 [] = { fffffff1, 77, } +VEXT/VEXTQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VEXT/VEXTQ:8:result_poly8x8 [] = { f6, f7, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:9:result_poly16x4 [] = { fff2, fff3, 66, 66, } +VEXT/VEXTQ:10:result_float32x2 [] = { c1700000, 42066666, } +VEXT/VEXTQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VEXT/VEXTQ:12:result_int8x16 [] = { fffffffe, ffffffff, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, } +VEXT/VEXTQ:13:result_int16x8 [] = { fffffff7, 22, 22, 22, 22, 22, 22, 22, } +VEXT/VEXTQ:14:result_int32x4 [] = { fffffff3, 33, 33, 33, } +VEXT/VEXTQ:15:result_int64x2 [] = { fffffffffffffff1, 44, } +VEXT/VEXTQ:16:result_uint8x16 [] = { fc, fd, fe, ff, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:17:result_uint16x8 [] = { fff6, fff7, 66, 66, 66, 66, 66, 66, } +VEXT/VEXTQ:18:result_uint32x4 [] = { fffffff3, 77, 77, 77, } +VEXT/VEXTQ:19:result_uint64x2 [] = { fffffffffffffff1, 88, } +VEXT/VEXTQ:20:result_poly8x16 [] = { fc, fd, fe, ff, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:21:result_poly16x8 [] = { fff6, fff7, 66, 66, 66, 66, 66, 66, } +VEXT/VEXTQ:22:result_float32x4 [] = { c1500000, 4204cccd, 4204cccd, 4204cccd, } +VEXT/VEXTQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHR_N output: +VSHR_N:0:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VSHR_N:1:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VSHR_N:2:result_int32x2 [] = { fffffffc, fffffffc, } +VSHR_N:3:result_int64x1 [] = { ffffffffffffffff, } +VSHR_N:4:result_uint8x8 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, } +VSHR_N:5:result_uint16x4 [] = { 1ffe, 1ffe, 1ffe, 1ffe, } +VSHR_N:6:result_uint32x2 [] = { 7ffffff, 7ffffff, } +VSHR_N:7:result_uint64x1 [] = { 7fffffff, } +VSHR_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHR_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHR_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSHR_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSHR_N:12:result_int8x16 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, fffffffc, fffffffd, fffffffd, fffffffe, fffffffe, ffffffff, ffffffff, } +VSHR_N:13:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VSHR_N:14:result_int32x4 [] = { fffffffc, fffffffc, fffffffc, fffffffc, } +VSHR_N:15:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VSHR_N:16:result_uint8x16 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, } +VSHR_N:17:result_uint16x8 [] = { 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, } +VSHR_N:18:result_uint32x4 [] = { 7ffffff, 7ffffff, 7ffffff, 7ffffff, } +VSHR_N:19:result_uint64x2 [] = { 7fffffff, 7fffffff, } +VSHR_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHR_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHR_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHR_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHRN_N output: +VSHRN_N:0:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VSHRN_N:1:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9, } +VSHRN_N:2:result_int32x2 [] = { fffffffc, fffffffc, } +VSHRN_N:3:result_int64x1 [] = { 3333333333333333, } +VSHRN_N:4:result_uint8x8 [] = { fc, fc, fc, fc, fd, fd, fd, fd, } +VSHRN_N:5:result_uint16x4 [] = { fffe, fffe, fffe, fffe, } +VSHRN_N:6:result_uint32x2 [] = { fffffffe, fffffffe, } +VSHRN_N:7:result_uint64x1 [] = { 3333333333333333, } +VSHRN_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHRN_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSHRN_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSHRN_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHRN_N:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHRN_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VSHRN_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHRN_N:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHRN_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VSHRN_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHRN_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHRN_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHRN_N (with input = 0) output: +VRSHRN_N:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:1:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:2:result_int32x2 [] = { 0, 0, } +VRSHRN_N:3:result_int64x1 [] = { 3333333333333333, } +VRSHRN_N:4:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:5:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:6:result_uint32x2 [] = { 0, 0, } +VRSHRN_N:7:result_uint64x1 [] = { 3333333333333333, } +VRSHRN_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHRN_N:10:result_float32x2 [] = { 33333333, 33333333, } +VRSHRN_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHRN_N output: +VRSHRN_N:24:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VRSHRN_N:25:result_int16x4 [] = { fffffff8, fffffff9, fffffff9, fffffffa, } +VRSHRN_N:26:result_int32x2 [] = { fffffffc, fffffffc, } +VRSHRN_N:27:result_int64x1 [] = { 3333333333333333, } +VRSHRN_N:28:result_uint8x8 [] = { fc, fc, fd, fd, fd, fd, fe, fe, } +VRSHRN_N:29:result_uint16x4 [] = { fffe, fffe, fffe, fffe, } +VRSHRN_N:30:result_uint32x2 [] = { fffffffe, fffffffe, } +VRSHRN_N:31:result_uint64x1 [] = { 3333333333333333, } +VRSHRN_N:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHRN_N:34:result_float32x2 [] = { 33333333, 33333333, } +VRSHRN_N:35:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:37:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:38:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:41:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:42:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHRN_N (with large shift amount) output: +VRSHRN_N:48:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:49:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:50:result_int32x2 [] = { 0, 0, } +VRSHRN_N:51:result_int64x1 [] = { 3333333333333333, } +VRSHRN_N:52:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:53:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:54:result_uint32x2 [] = { 0, 0, } +VRSHRN_N:55:result_uint64x1 [] = { 3333333333333333, } +VRSHRN_N:56:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:57:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHRN_N:58:result_float32x2 [] = { 33333333, 33333333, } +VRSHRN_N:59:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:61:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:62:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:65:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:66:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRN_N cumulative saturation output: +VQRSHRN_N:0:vqrshrn_n_s16 Neon cumulative saturation 0 +VQRSHRN_N:1:vqrshrn_n_s32 Neon cumulative saturation 0 +VQRSHRN_N:2:vqrshrn_n_s64 Neon cumulative saturation 0 +VQRSHRN_N:3:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:4:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:5:vqrshrn_n_u64 Neon cumulative saturation 1 + +VQRSHRN_N output: +VQRSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VQRSHRN_N:7:result_int16x4 [] = { fffffff8, fffffff9, fffffff9, fffffffa, } +VQRSHRN_N:8:result_int32x2 [] = { fffffffc, fffffffc, } +VQRSHRN_N:9:result_int64x1 [] = { 3333333333333333, } +VQRSHRN_N:10:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRN_N:11:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRN_N:12:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRN_N:13:result_uint64x1 [] = { 3333333333333333, } +VQRSHRN_N:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRN_N:16:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRN_N:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRN_N:18:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:19:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:20:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRN_N (check saturation: shift by 3) cumulative saturation output: +VQRSHRN_N:30:vqrshrn_n_s16 Neon cumulative saturation 1 +VQRSHRN_N:31:vqrshrn_n_s32 Neon cumulative saturation 1 +VQRSHRN_N:32:vqrshrn_n_s64 Neon cumulative saturation 1 +VQRSHRN_N:33:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:34:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:35:vqrshrn_n_u64 Neon cumulative saturation 1 + +VQRSHRN_N (check saturation: shift by 3) output: +VQRSHRN_N:36:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHRN_N:37:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRSHRN_N:38:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRSHRN_N:39:result_int64x1 [] = { 3333333333333333, } +VQRSHRN_N:40:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRN_N:41:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRN_N:42:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRN_N:43:result_uint64x1 [] = { 3333333333333333, } +VQRSHRN_N:44:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:45:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRN_N:46:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRN_N:47:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRN_N:48:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:49:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:50:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:51:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:52:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:53:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:54:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:55:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:56:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:57:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:58:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:59:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRN_N (check saturation: shift by max) cumulative saturation output: +VQRSHRN_N:60:vqrshrn_n_s16 Neon cumulative saturation 1 +VQRSHRN_N:61:vqrshrn_n_s32 Neon cumulative saturation 1 +VQRSHRN_N:62:vqrshrn_n_s64 Neon cumulative saturation 1 +VQRSHRN_N:63:vqrshrn_n_u16 Neon cumulative saturation 1 +VQRSHRN_N:64:vqrshrn_n_u32 Neon cumulative saturation 1 +VQRSHRN_N:65:vqrshrn_n_u64 Neon cumulative saturation 1 + +VQRSHRN_N (check saturation: shift by max) output: +VQRSHRN_N:66:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHRN_N:67:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRSHRN_N:68:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRSHRN_N:69:result_int64x1 [] = { 3333333333333333, } +VQRSHRN_N:70:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRN_N:71:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRN_N:72:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRN_N:73:result_uint64x1 [] = { 3333333333333333, } +VQRSHRN_N:74:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:75:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRN_N:76:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRN_N:77:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRN_N:78:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:79:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:80:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:81:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:82:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:83:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:84:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:85:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:86:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:87:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:88:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:89:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSET_LANE/VSET_LANEQ output: +VSET_LANE/VSET_LANEQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, 11, } +VSET_LANE/VSET_LANEQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, 22, } +VSET_LANE/VSET_LANEQ:2:result_int32x2 [] = { fffffff0, 33, } +VSET_LANE/VSET_LANEQ:3:result_int64x1 [] = { 44, } +VSET_LANE/VSET_LANEQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, 55, f7, } +VSET_LANE/VSET_LANEQ:5:result_uint16x4 [] = { fff0, fff1, 66, fff3, } +VSET_LANE/VSET_LANEQ:6:result_uint32x2 [] = { fffffff0, 77, } +VSET_LANE/VSET_LANEQ:7:result_uint64x1 [] = { 88, } +VSET_LANE/VSET_LANEQ:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, 55, f7, } +VSET_LANE/VSET_LANEQ:9:result_poly16x4 [] = { fff0, fff1, 66, fff3, } +VSET_LANE/VSET_LANEQ:10:result_float32x2 [] = { c1800000, 4204cccd, } +VSET_LANE/VSET_LANEQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSET_LANE/VSET_LANEQ:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffff99, } +VSET_LANE/VSET_LANEQ:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, aa, fffffff6, fffffff7, } +VSET_LANE/VSET_LANEQ:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, bb, } +VSET_LANE/VSET_LANEQ:15:result_int64x2 [] = { fffffffffffffff0, cc, } +VSET_LANE/VSET_LANEQ:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, dd, ff, } +VSET_LANE/VSET_LANEQ:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, ee, fff7, } +VSET_LANE/VSET_LANEQ:18:result_uint32x4 [] = { fffffff0, fffffff1, ff, fffffff3, } +VSET_LANE/VSET_LANEQ:19:result_uint64x2 [] = { fffffffffffffff0, 11, } +VSET_LANE/VSET_LANEQ:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, dd, ff, } +VSET_LANE/VSET_LANEQ:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, ee, fff7, } +VSET_LANE/VSET_LANEQ:22:result_float32x4 [] = { c1800000, c1700000, c1600000, 41333333, } +VSET_LANE/VSET_LANEQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VGET_LANE/VGETQ_LANE output: +vget_lane_s8: fffffff7 +vget_lane_s16: fffffff3 +vget_lane_s32: fffffff1 +vget_lane_s64: fffffffffffffff0 +vget_lane_u8: f6 +vget_lane_u16: fff2 +vget_lane_u32: fffffff1 +vget_lane_u64: fffffffffffffff0 +vget_lane_p8: f6 +vget_lane_p16: fff2 +vget_lane_f32: c1700000 +vgetq_lane_s8: ffffffff +vgetq_lane_s16: fffffff5 +vgetq_lane_s32: fffffff3 +vgetq_lane_s64: fffffffffffffff1 +vgetq_lane_u8: fe +vgetq_lane_u16: fff6 +vgetq_lane_u32: fffffff2 +vgetq_lane_u64: fffffffffffffff1 +vgetq_lane_p8: fe +vgetq_lane_p16: fff6 +vgetq_lane_f32: c1500000 + + +VQSUB/VQSUBQ cumulative saturation output: +VQSUB/VQSUBQ:0:vqsub_s8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:1:vqsub_s16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:2:vqsub_s32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:3:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:4:vqsub_u8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:5:vqsub_u16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:6:vqsub_u32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:7:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:8:vqsubq_s8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:9:vqsubq_s16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:10:vqsubq_s32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:11:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:12:vqsubq_u8 Neon cumulative saturation 0 +VQSUB/VQSUBQ:13:vqsubq_u16 Neon cumulative saturation 0 +VQSUB/VQSUBQ:14:vqsubq_u32 Neon cumulative saturation 0 +VQSUB/VQSUBQ:15:vqsubq_u64 Neon cumulative saturation 0 + +VQSUB/VQSUBQ output: +VQSUB/VQSUBQ:16:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, } +VQSUB/VQSUBQ:17:result_int16x4 [] = { ffffffce, ffffffcf, ffffffd0, ffffffd1, } +VQSUB/VQSUBQ:18:result_int32x2 [] = { ffffffbd, ffffffbe, } +VQSUB/VQSUBQ:19:result_int64x1 [] = { ffffffffffffffac, } +VQSUB/VQSUBQ:20:result_uint8x8 [] = { 9b, 9c, 9d, 9e, 9f, a0, a1, a2, } +VQSUB/VQSUBQ:21:result_uint16x4 [] = { ff8a, ff8b, ff8c, ff8d, } +VQSUB/VQSUBQ:22:result_uint32x2 [] = { ffffff79, ffffff7a, } +VQSUB/VQSUBQ:23:result_uint64x1 [] = { ffffffffffffff68, } +VQSUB/VQSUBQ:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSUB/VQSUBQ:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSUB/VQSUBQ:26:result_float32x2 [] = { 33333333, 33333333, } +VQSUB/VQSUBQ:27:result_float16x4 [] = { 0, 0, 0, 0, } +VQSUB/VQSUBQ:28:result_int8x16 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, ffffffeb, ffffffec, ffffffed, ffffffee, } +VQSUB/VQSUBQ:29:result_int16x8 [] = { ffffffce, ffffffcf, ffffffd0, ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, } +VQSUB/VQSUBQ:30:result_int32x4 [] = { ffffffbd, ffffffbe, ffffffbf, ffffffc0, } +VQSUB/VQSUBQ:31:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad, } +VQSUB/VQSUBQ:32:result_uint8x16 [] = { 9b, 9c, 9d, 9e, 9f, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, } +VQSUB/VQSUBQ:33:result_uint16x8 [] = { ff8a, ff8b, ff8c, ff8d, ff8e, ff8f, ff90, ff91, } +VQSUB/VQSUBQ:34:result_uint32x4 [] = { ffffff79, ffffff7a, ffffff7b, ffffff7c, } +VQSUB/VQSUBQ:35:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69, } +VQSUB/VQSUBQ:36:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSUB/VQSUBQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSUB/VQSUBQ:38:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSUB/VQSUBQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:40:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:41:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:42:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:43:vqsubq_u64 Neon cumulative saturation 0 + +64 bits saturation: +VQSUB/VQSUBQ:44:result_int64x1 [] = { fffffffffffffff0, } +VQSUB/VQSUBQ:45:result_uint64x1 [] = { fffffffffffffff0, } +VQSUB/VQSUBQ:46:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQSUB/VQSUBQ:47:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } + +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:48:vqsub_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:49:vqsub_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:50:vqsubq_s64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:51:vqsubq_u64 Neon cumulative saturation 0 +VQSUB/VQSUBQ:52:result_int64x1 [] = { ffffffffffffffac, } +VQSUB/VQSUBQ:53:result_uint64x1 [] = { ffffffffffffff68, } +VQSUB/VQSUBQ:54:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad, } +VQSUB/VQSUBQ:55:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69, } + +VQSUB/VQSUBQ 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:56:vqsub_s64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:57:vqsub_u64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:58:vqsubq_s64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:59:vqsubq_u64 Neon cumulative saturation 1 +VQSUB/VQSUBQ:60:result_int64x1 [] = { 8000000000000000, } +VQSUB/VQSUBQ:61:result_uint64x1 [] = { 0, } +VQSUB/VQSUBQ:62:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSUB/VQSUBQ:63:result_uint64x2 [] = { 0, 0, } + +less than 64 bits saturation: +VQSUB/VQSUBQ:64:vqsub_s8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:65:vqsub_s16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:66:vqsub_s32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:67:vqsubq_s8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:68:vqsubq_s16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:69:vqsubq_s32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:70:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSUB/VQSUBQ:71:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQSUB/VQSUBQ:72:result_int32x2 [] = { 80000000, 80000000, } +VQSUB/VQSUBQ:73:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSUB/VQSUBQ:74:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQSUB/VQSUBQ:75:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VQSUB/VQSUBQ less than 64 bits saturation cumulative saturation output: +VQSUB/VQSUBQ:76:vqsub_u8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:77:vqsub_u16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:78:vqsub_u32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:79:vqsubq_u8 Neon cumulative saturation 1 +VQSUB/VQSUBQ:80:vqsubq_u16 Neon cumulative saturation 1 +VQSUB/VQSUBQ:81:vqsubq_u32 Neon cumulative saturation 1 +VQSUB/VQSUBQ:82:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSUB/VQSUBQ:83:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSUB/VQSUBQ:84:result_uint32x2 [] = { 0, 0, } +VQSUB/VQSUBQ:85:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSUB/VQSUBQ:86:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSUB/VQSUBQ:87:result_uint32x4 [] = { 0, 0, 0, 0, } + +VQDMULH cumulative saturation output: +VQDMULH:0:vqdmulh_s16 Neon cumulative saturation 0 +VQDMULH:1:vqdmulh_s32 Neon cumulative saturation 0 +VQDMULH:2:vqdmulhq_s16 Neon cumulative saturation 0 +VQDMULH:3:vqdmulhq_s32 Neon cumulative saturation 0 + +VQDMULH output: +VQDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:5:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH:6:result_int32x2 [] = { ffffffff, ffffffff, } +VQDMULH:7:result_int64x1 [] = { 3333333333333333, } +VQDMULH:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:10:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH:11:result_uint64x1 [] = { 3333333333333333, } +VQDMULH:12:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:13:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:14:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH:15:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULH:16:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:17:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH:18:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH:19:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:20:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:21:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:22:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH:23:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:24:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:26:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULH cumulative saturation output: +VQDMULH:28:vqdmulh_s16 Neon cumulative saturation 1 +VQDMULH:29:vqdmulh_s32 Neon cumulative saturation 1 +VQDMULH:30:vqdmulhq_s16 Neon cumulative saturation 1 +VQDMULH:31:vqdmulhq_s32 Neon cumulative saturation 1 + +VQDMULH output: +VQDMULH:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQDMULH:34:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQDMULH:35:result_int64x1 [] = { 3333333333333333, } +VQDMULH:36:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:37:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:38:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH:39:result_uint64x1 [] = { 3333333333333333, } +VQDMULH:40:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:41:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:42:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH:43:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULH:44:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:45:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQDMULH:46:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULH:47:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:48:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:49:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:50:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH:51:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:52:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:54:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULH_LANE cumulative saturation output: +VQDMULH_LANE:0:vqdmulh_lane_s16 Neon cumulative saturation 0 +VQDMULH_LANE:1:vqdmulh_lane_s32 Neon cumulative saturation 0 +VQDMULH_LANE:2:vqdmulhq_lane_s16 Neon cumulative saturation 0 +VQDMULH_LANE:3:vqdmulhq_lane_s32 Neon cumulative saturation 0 + +VQDMULH_LANE output: +VQDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:5:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH_LANE:6:result_int32x2 [] = { ffffffff, ffffffff, } +VQDMULH_LANE:7:result_int64x1 [] = { 3333333333333333, } +VQDMULH_LANE:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:10:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:11:result_uint64x1 [] = { 3333333333333333, } +VQDMULH_LANE:12:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:13:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:14:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:15:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULH_LANE:16:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:17:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH_LANE:18:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH_LANE:19:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:20:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:21:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:22:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_LANE:23:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:24:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:26:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_LANE:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULH_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMULH_LANE:28:vqdmulh_lane_s16 Neon cumulative saturation 1 +VQDMULH_LANE:29:vqdmulh_lane_s32 Neon cumulative saturation 1 +VQDMULH_LANE:30:vqdmulhq_lane_s16 Neon cumulative saturation 1 +VQDMULH_LANE:31:vqdmulhq_lane_s32 Neon cumulative saturation 1 + +VQDMULH_LANE (check mul cumulative saturation) output: +VQDMULH_LANE:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQDMULH_LANE:34:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQDMULH_LANE:35:result_int64x1 [] = { 3333333333333333, } +VQDMULH_LANE:36:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:37:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:38:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:39:result_uint64x1 [] = { 3333333333333333, } +VQDMULH_LANE:40:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:41:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:42:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:43:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULH_LANE:44:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:45:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQDMULH_LANE:46:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULH_LANE:47:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:48:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:49:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:50:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_LANE:51:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:52:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:54:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_LANE:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULH_N cumulative saturation output: +VQDMULH_N:0:vqdmulh_n_s16 Neon cumulative saturation 0 +VQDMULH_N:1:vqdmulh_n_s32 Neon cumulative saturation 0 +VQDMULH_N:2:vqdmulhq_n_s16 Neon cumulative saturation 0 +VQDMULH_N:3:vqdmulhq_n_s32 Neon cumulative saturation 0 + +VQDMULH_N output: +VQDMULH_N:4:result_int16x4 [] = { 19, 19, 19, 19, } +VQDMULH_N:5:result_int32x2 [] = { 4, 4, } +VQDMULH_N:6:result_int16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VQDMULH_N:7:result_int32x4 [] = { a, a, a, a, } + +VQDMULH_N (check mul cumulative saturation) cumulative saturation output: +VQDMULH_N:8:vqdmulh_n_s16 Neon cumulative saturation 1 +VQDMULH_N:9:vqdmulh_n_s32 Neon cumulative saturation 1 +VQDMULH_N:10:vqdmulhq_n_s16 Neon cumulative saturation 1 +VQDMULH_N:11:vqdmulhq_n_s32 Neon cumulative saturation 1 + +VQDMULH_N (check mul cumulative saturation) output: +VQDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQDMULH_N:15:result_int64x1 [] = { 3333333333333333, } +VQDMULH_N:16:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:17:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_N:18:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH_N:19:result_uint64x1 [] = { 3333333333333333, } +VQDMULH_N:20:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:21:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_N:22:result_float32x2 [] = { 33333333, 33333333, } +VQDMULH_N:23:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULH_N:24:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:25:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQDMULH_N:26:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULH_N:27:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_N:28:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:29:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_N:30:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_N:31:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_N:32:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:33:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_N:34:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_N:35:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULL cumulative saturation output: +VQDMULL:0:vqdmull_s16 Neon cumulative saturation 0 +VQDMULL:1:vqdmull_s32 Neon cumulative saturation 0 + +VQDMULL output: +VQDMULL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMULL:5:result_int64x1 [] = { 3333333333333333, } +VQDMULL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULL:9:result_uint64x1 [] = { 3333333333333333, } +VQDMULL:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMULL:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULL:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:16:result_int32x4 [] = { 200, 1c2, 188, 152, } +VQDMULL:17:result_int64x2 [] = { 200, 1c2, } +VQDMULL:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULL:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULL:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULL:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULL (check mul cumulative saturation) cumulative saturation output: +VQDMULL:26:vqdmull_s16 Neon cumulative saturation 1 +VQDMULL:27:vqdmull_s32 Neon cumulative saturation 1 + +VQDMULL (check mul cumulative saturation) output: +VQDMULL:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMULL:31:result_int64x1 [] = { 3333333333333333, } +VQDMULL:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULL:35:result_uint64x1 [] = { 3333333333333333, } +VQDMULL:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMULL:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMULL:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:42:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULL:43:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQDMULL:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULL:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULL:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULL:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL cumulative saturation output: +VQDMLAL:0:vqdmlal_s16 Neon cumulative saturation 0 +VQDMLAL:1:vqdmlal_s32 Neon cumulative saturation 0 + +VQDMLAL output: +VQDMLAL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL:5:result_int64x1 [] = { 3333333333333333, } +VQDMLAL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:16:result_int32x4 [] = { 7c1e, 7c1f, 7c20, 7c21, } +VQDMLAL:17:result_int64x2 [] = { 7c1e, 7c1f, } +VQDMLAL:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLAL (check mul cumulative saturation) cumulative saturation output: +VQDMLAL:26:vqdmlal_s16 Neon cumulative saturation 1 +VQDMLAL:27:vqdmlal_s32 Neon cumulative saturation 1 + +VQDMLAL (check mul cumulative saturation) output: +VQDMLAL:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL:31:result_int64x1 [] = { 3333333333333333, } +VQDMLAL:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL:35:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMLAL:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLAL:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:42:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2, } +VQDMLAL:43:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0, } +VQDMLAL:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL cumulative saturation output: +VQDMLSL:0:vqdmlsl_s16 Neon cumulative saturation 0 +VQDMLSL:1:vqdmlsl_s32 Neon cumulative saturation 0 + +VQDMLSL output: +VQDMLSL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL:5:result_int64x1 [] = { 3333333333333333, } +VQDMLSL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL:10:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:11:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:12:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL:13:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL:14:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:15:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:16:result_int32x4 [] = { ffff83c2, ffff83c3, ffff83c4, ffff83c5, } +VQDMLSL:17:result_int64x2 [] = { ffffffffffff83c2, ffffffffffff83c3, } +VQDMLSL:18:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:19:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:20:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL:21:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL:22:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:23:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:24:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL:25:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMLSL (check mul cumulative saturation) cumulative saturation output: +VQDMLSL:26:vqdmlsl_s16 Neon cumulative saturation 1 +VQDMLSL:27:vqdmlsl_s32 Neon cumulative saturation 1 + +VQDMLSL (check mul cumulative saturation) output: +VQDMLSL:28:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:29:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:30:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL:31:result_int64x1 [] = { 3333333333333333, } +VQDMLSL:32:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:33:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:34:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL:35:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL:36:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:37:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:38:result_float32x2 [] = { 33333333, 33333333, } +VQDMLSL:39:result_float16x4 [] = { 0, 0, 0, 0, } +VQDMLSL:40:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:41:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:42:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQDMLSL:43:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQDMLSL:44:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:45:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:46:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL:47:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL:48:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:49:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:50:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL:51:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCEQ/VCEQQ output: +VCEQ/VCEQQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, ff, 0, } +VCEQ/VCEQQ:1:result_uint16x4 [] = { 0, 0, ffff, 0, } +VCEQ/VCEQQ:2:result_uint32x2 [] = { ffffffff, 0, } +VCEQ/VCEQQ:3:result_uint8x8 [] = { 0, 0, 0, ff, 0, 0, 0, 0, } +VCEQ/VCEQQ:4:result_uint16x4 [] = { 0, 0, ffff, 0, } +VCEQ/VCEQQ:5:result_uint32x2 [] = { 0, ffffffff, } +VCEQ/VCEQQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, 0, 0, 0, } +VCEQ/VCEQQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, 0, } +VCEQ/VCEQQ:8:result_uint32x4 [] = { 0, 0, ffffffff, 0, } +VCEQ/VCEQQ:9:result_uint8x16 [] = { 0, 0, 0, 0, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCEQ/VCEQQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, 0, } +VCEQ/VCEQQ:11:result_uint32x4 [] = { 0, 0, ffffffff, 0, } +VCEQ/VCEQQ:12:result_uint32x2 [] = { 0, ffffffff, } +VCEQ/VCEQQ:13:result_uint32x4 [] = { 0, 0, ffffffff, 0, } +VCEQ/VCEQQ:14:result_uint32x2 [] = { ffffffff, 0, } +VCEQ/VCEQQ:15:result_uint32x2 [] = { 0, ffffffff, } +VCEQ/VCEQQ:16:result_uint32x2 [] = { ffffffff, 0, } +VCEQ/VCEQQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (inf):20:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (-inf):21:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (inf):22:result_uint32x2 [] = { 0, 0, } +VCEQ/VCEQQ FP special (-0.0):23:result_uint32x2 [] = { ffffffff, ffffffff, } + +VCEQ/VCEQQ p8 output: +VCEQ/VCEQQ:0:result_uint8x8 [] = { 0, 0, 0, ff, 0, 0, 0, 0, } +VCEQ/VCEQQ:1:result_uint8x16 [] = { 0, 0, 0, 0, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } + +VCGE/VCGEQ output: +VCGE/VCGEQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, ff, ff, } +VCGE/VCGEQ:1:result_uint16x4 [] = { 0, 0, ffff, ffff, } +VCGE/VCGEQ:2:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ:3:result_uint8x8 [] = { 0, 0, 0, ff, ff, ff, ff, ff, } +VCGE/VCGEQ:4:result_uint16x4 [] = { 0, 0, ffff, ffff, } +VCGE/VCGEQ:5:result_uint32x2 [] = { 0, ffffffff, } +VCGE/VCGEQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, ff, ff, ff, } +VCGE/VCGEQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, ffff, } +VCGE/VCGEQ:8:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } +VCGE/VCGEQ:9:result_uint8x16 [] = { 0, 0, 0, 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VCGE/VCGEQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, ffff, } +VCGE/VCGEQ:11:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } +VCGE/VCGEQ:12:result_uint32x2 [] = { 0, ffffffff, } +VCGE/VCGEQ:13:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } +VCGE/VCGEQ:14:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ:15:result_uint32x2 [] = { 0, ffffffff, } +VCGE/VCGEQ:16:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCGE/VCGEQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCGE/VCGEQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCGE/VCGEQ FP special (inf):20:result_uint32x2 [] = { 0, 0, } +VCGE/VCGEQ FP special (-inf):21:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ FP special (inf):22:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ FP special (-0.0):23:result_uint32x2 [] = { ffffffff, ffffffff, } + +VCLE/VCLEQ output: +VCLE/VCLEQ:0:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, 0, } +VCLE/VCLEQ:1:result_uint16x4 [] = { ffff, ffff, ffff, 0, } +VCLE/VCLEQ:2:result_uint32x2 [] = { ffffffff, 0, } +VCLE/VCLEQ:3:result_uint8x8 [] = { ff, ff, ff, ff, 0, 0, 0, 0, } +VCLE/VCLEQ:4:result_uint16x4 [] = { ffff, ffff, ffff, 0, } +VCLE/VCLEQ:5:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ:6:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, 0, 0, 0, } +VCLE/VCLEQ:7:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, 0, } +VCLE/VCLEQ:8:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } +VCLE/VCLEQ:9:result_uint8x16 [] = { ff, ff, ff, ff, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCLE/VCLEQ:10:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, 0, } +VCLE/VCLEQ:11:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } +VCLE/VCLEQ:12:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ:13:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } +VCLE/VCLEQ:14:result_uint32x2 [] = { ffffffff, 0, } +VCLE/VCLEQ:15:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ:16:result_uint32x2 [] = { ffffffff, 0, } +VCLE/VCLEQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (inf):20:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ FP special (-inf):21:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (inf):22:result_uint32x2 [] = { 0, 0, } +VCLE/VCLEQ FP special (-0.0):23:result_uint32x2 [] = { ffffffff, ffffffff, } + +VCGT/VCGTQ output: +VCGT/VCGTQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, ff, } +VCGT/VCGTQ:1:result_uint16x4 [] = { 0, 0, 0, ffff, } +VCGT/VCGTQ:2:result_uint32x2 [] = { 0, ffffffff, } +VCGT/VCGTQ:3:result_uint8x8 [] = { 0, 0, 0, 0, ff, ff, ff, ff, } +VCGT/VCGTQ:4:result_uint16x4 [] = { 0, 0, 0, ffff, } +VCGT/VCGTQ:5:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, ff, ff, } +VCGT/VCGTQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, ffff, } +VCGT/VCGTQ:8:result_uint32x4 [] = { 0, 0, 0, ffffffff, } +VCGT/VCGTQ:9:result_uint8x16 [] = { 0, 0, 0, 0, 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VCGT/VCGTQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, ffff, } +VCGT/VCGTQ:11:result_uint32x4 [] = { 0, 0, 0, ffffffff, } +VCGT/VCGTQ:12:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ:13:result_uint32x4 [] = { 0, 0, 0, ffffffff, } +VCGT/VCGTQ:14:result_uint32x2 [] = { 0, ffffffff, } +VCGT/VCGTQ:15:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ:16:result_uint32x2 [] = { 0, ffffffff, } +VCGT/VCGTQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ FP special (inf):20:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ FP special (-inf):21:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGT/VCGTQ FP special (inf):22:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGT/VCGTQ FP special (-0.0):23:result_uint32x2 [] = { 0, 0, } + +VCLT/VCLTQ output: +VCLT/VCLTQ:0:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, 0, 0, } +VCLT/VCLTQ:1:result_uint16x4 [] = { ffff, ffff, 0, 0, } +VCLT/VCLTQ:2:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ:3:result_uint8x8 [] = { ff, ff, ff, 0, 0, 0, 0, 0, } +VCLT/VCLTQ:4:result_uint16x4 [] = { ffff, ffff, 0, 0, } +VCLT/VCLTQ:5:result_uint32x2 [] = { ffffffff, 0, } +VCLT/VCLTQ:6:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, 0, 0, 0, 0, } +VCLT/VCLTQ:7:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, 0, 0, } +VCLT/VCLTQ:8:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } +VCLT/VCLTQ:9:result_uint8x16 [] = { ff, ff, ff, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCLT/VCLTQ:10:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, 0, 0, } +VCLT/VCLTQ:11:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } +VCLT/VCLTQ:12:result_uint32x2 [] = { ffffffff, 0, } +VCLT/VCLTQ:13:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } +VCLT/VCLTQ:14:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ:15:result_uint32x2 [] = { ffffffff, 0, } +VCLT/VCLTQ:16:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (NaN):17:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (-NaN):18:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (NaN):19:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (inf):20:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLT/VCLTQ FP special (-inf):21:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (inf):22:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ FP special (-0.0):23:result_uint32x2 [] = { 0, 0, } + +VBSL/VBSLQ output: +VBSL/VBSLQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, } +VBSL/VBSLQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2, } +VBSL/VBSLQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VBSL/VBSLQ:3:result_int64x1 [] = { fffffffffffffffd, } +VBSL/VBSLQ:4:result_uint8x8 [] = { f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:5:result_uint16x4 [] = { fff0, fff0, fff2, fff2, } +VBSL/VBSLQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VBSL/VBSLQ:7:result_uint64x1 [] = { fffffff1, } +VBSL/VBSLQ:8:result_poly8x8 [] = { f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:9:result_poly16x4 [] = { fff0, fff0, fff2, fff2, } +VBSL/VBSLQ:10:result_float32x2 [] = { c1800004, c1700004, } +VBSL/VBSLQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VBSL/VBSLQ:12:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, } +VBSL/VBSLQ:13:result_int16x8 [] = { fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, } +VBSL/VBSLQ:14:result_int32x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2, } +VBSL/VBSLQ:15:result_int64x2 [] = { fffffffffffffffd, fffffffffffffffd, } +VBSL/VBSLQ:16:result_uint8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:17:result_uint16x8 [] = { fff0, fff0, fff2, fff2, fff4, fff4, fff6, fff6, } +VBSL/VBSLQ:18:result_uint32x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2, } +VBSL/VBSLQ:19:result_uint64x2 [] = { fffffff1, fffffff1, } +VBSL/VBSLQ:20:result_poly8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:21:result_poly16x8 [] = { fff0, fff0, fff2, fff2, fff4, fff4, fff6, fff6, } +VBSL/VBSLQ:22:result_float32x4 [] = { c1800001, c1700001, c1600001, c1500001, } +VBSL/VBSLQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHL/VSHLQ output: +VSHL/VSHLQ:0:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHL/VSHLQ:1:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VSHL/VSHLQ:2:result_int32x2 [] = { fffff000, fffff100, } +VSHL/VSHLQ:3:result_int64x1 [] = { ffffffffffffff80, } +VSHL/VSHLQ:4:result_uint8x8 [] = { e0, e2, e4, e6, e8, ea, ec, ee, } +VSHL/VSHLQ:5:result_uint16x4 [] = { ff80, ff88, ff90, ff98, } +VSHL/VSHLQ:6:result_uint32x2 [] = { fffff000, fffff100, } +VSHL/VSHLQ:7:result_uint64x1 [] = { ffffffffffffff80, } +VSHL/VSHLQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:10:result_float32x2 [] = { 33333333, 33333333, } +VSHL/VSHLQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:12:result_int8x16 [] = { 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, } +VSHL/VSHLQ:13:result_int16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VSHL/VSHLQ:14:result_int32x4 [] = { 0, 40000000, 80000000, c0000000, } +VSHL/VSHLQ:15:result_int64x2 [] = { 0, 8000000000000000, } +VSHL/VSHLQ:16:result_uint8x16 [] = { 0, 20, 40, 60, 80, a0, c0, e0, 0, 20, 40, 60, 80, a0, c0, e0, } +VSHL/VSHLQ:17:result_uint16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VSHL/VSHLQ:18:result_uint32x4 [] = { 0, 40000000, 80000000, c0000000, } +VSHL/VSHLQ:19:result_uint64x2 [] = { 0, 8000000000000000, } +VSHL/VSHLQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHL/VSHLQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHL/VSHLQ (large shift amount) output: +VSHL/VSHLQ:24:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:25:result_int16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:26:result_int32x2 [] = { 0, 0, } +VSHL/VSHLQ:27:result_int64x1 [] = { 0, } +VSHL/VSHLQ:28:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:29:result_uint16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:30:result_uint32x2 [] = { 0, 0, } +VSHL/VSHLQ:31:result_uint64x1 [] = { 0, } +VSHL/VSHLQ:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:34:result_float32x2 [] = { 33333333, 33333333, } +VSHL/VSHLQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:36:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:37:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:38:result_int32x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:39:result_int64x2 [] = { 0, 0, } +VSHL/VSHLQ:40:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:41:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:42:result_uint32x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:43:result_uint64x2 [] = { 0, 0, } +VSHL/VSHLQ:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHL/VSHLQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHL/VSHLQ (negative shift amount) output: +VSHL/VSHLQ:48:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VSHL/VSHLQ:49:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9, } +VSHL/VSHLQ:50:result_int32x2 [] = { fffffffc, fffffffc, } +VSHL/VSHLQ:51:result_int64x1 [] = { ffffffffffffffff, } +VSHL/VSHLQ:52:result_uint8x8 [] = { 78, 78, 79, 79, 7a, 7a, 7b, 7b, } +VSHL/VSHLQ:53:result_uint16x4 [] = { 7ff8, 7ff8, 7ff9, 7ff9, } +VSHL/VSHLQ:54:result_uint32x2 [] = { 3ffffffc, 3ffffffc, } +VSHL/VSHLQ:55:result_uint64x1 [] = { fffffffffffffff, } +VSHL/VSHLQ:56:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:57:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:58:result_float32x2 [] = { 33333333, 33333333, } +VSHL/VSHLQ:59:result_float16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:60:result_int8x16 [] = { fffffffc, fffffffc, fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe, fffffffe, fffffffe, ffffffff, ffffffff, ffffffff, ffffffff, } +VSHL/VSHLQ:61:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VSHL/VSHLQ:62:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } +VSHL/VSHLQ:63:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VSHL/VSHLQ:64:result_uint8x16 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, } +VSHL/VSHLQ:65:result_uint16x8 [] = { 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, } +VSHL/VSHLQ:66:result_uint32x4 [] = { 1ffffffe, 1ffffffe, 1ffffffe, 1ffffffe, } +VSHL/VSHLQ:67:result_uint64x2 [] = { 7ffffffffffffff, 7ffffffffffffff, } +VSHL/VSHLQ:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL/VSHLQ:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHL/VSHLQ:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHL/VSHLQ:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHL_N output: +VSHL_N:0:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHL_N:1:result_int16x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VSHL_N:2:result_int32x2 [] = { ffffff80, ffffff88, } +VSHL_N:3:result_int64x1 [] = { ffffffffffffffc0, } +VSHL_N:4:result_uint8x8 [] = { c0, c4, c8, cc, d0, d4, d8, dc, } +VSHL_N:5:result_uint16x4 [] = { ff00, ff10, ff20, ff30, } +VSHL_N:6:result_uint32x2 [] = { ffffff80, ffffff88, } +VSHL_N:7:result_uint64x1 [] = { ffffffffffffffe0, } +VSHL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHL_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSHL_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSHL_N:12:result_int8x16 [] = { 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, } +VSHL_N:13:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHL_N:14:result_int32x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, } +VSHL_N:15:result_int64x2 [] = { ffffffffffffffc0, ffffffffffffffc4, } +VSHL_N:16:result_uint8x16 [] = { c0, c4, c8, cc, d0, d4, d8, dc, e0, e4, e8, ec, f0, f4, f8, fc, } +VSHL_N:17:result_uint16x8 [] = { ff80, ff88, ff90, ff98, ffa0, ffa8, ffb0, ffb8, } +VSHL_N:18:result_uint32x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, } +VSHL_N:19:result_uint64x2 [] = { ffffffffffffffe0, ffffffffffffffe2, } +VSHL_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHL_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHL_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHL_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (with input = 0) cumulative saturation output: +VQSHL/VQSHLQ:0:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:1:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:2:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:3:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:4:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:5:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:6:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:7:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:8:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:9:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:10:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:11:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:12:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:13:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:14:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:15:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (with input = 0) output: +VQSHL/VQSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:17:result_int16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:18:result_int32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:19:result_int64x1 [] = { 0, } +VQSHL/VQSHLQ:20:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:21:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:22:result_uint32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:23:result_uint64x1 [] = { 0, } +VQSHL/VQSHLQ:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:26:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:27:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:28:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:29:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:30:result_int32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:31:result_int64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:32:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:33:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:34:result_uint32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:35:result_uint64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:36:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:38:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (input 0 and negative shift amount) cumulative saturation output: +VQSHL/VQSHLQ:40:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:41:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:42:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:43:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:44:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:45:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:46:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:47:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:48:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:49:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:50:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:51:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:52:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:53:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:54:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:55:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (input 0 and negative shift amount) output: +VQSHL/VQSHLQ:56:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:57:result_int16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:58:result_int32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:59:result_int64x1 [] = { 0, } +VQSHL/VQSHLQ:60:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:61:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:62:result_uint32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:63:result_uint64x1 [] = { 0, } +VQSHL/VQSHLQ:64:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:65:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:66:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:67:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:68:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:69:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:70:result_int32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:71:result_int64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:72:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:73:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:74:result_uint32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:75:result_uint64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:76:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:77:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:78:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:79:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ cumulative saturation output: +VQSHL/VQSHLQ:80:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:81:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:82:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:83:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:84:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:85:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:86:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:87:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:88:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:89:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:90:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:91:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:92:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:93:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:94:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:95:vqshlq_u64 Neon cumulative saturation 1 + +VQSHL/VQSHLQ output: +VQSHL/VQSHLQ:96:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VQSHL/VQSHLQ:97:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VQSHL/VQSHLQ:98:result_int32x2 [] = { fffff000, fffff100, } +VQSHL/VQSHLQ:99:result_int64x1 [] = { fffffffffffffffe, } +VQSHL/VQSHLQ:100:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:101:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:102:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:103:result_uint64x1 [] = { 1ffffffffffffffe, } +VQSHL/VQSHLQ:104:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:105:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:106:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:107:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:108:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSHL/VQSHLQ:109:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQSHL/VQSHLQ:110:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQSHL/VQSHLQ:111:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQSHL/VQSHLQ:112:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:113:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:114:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:115:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:116:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:118:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (negative shift amount) cumulative saturation output: +VQSHL/VQSHLQ:120:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:121:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:122:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:123:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:124:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:125:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:126:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:127:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:128:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:129:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:130:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:131:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:132:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:133:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:134:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:135:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (negative shift amount) output: +VQSHL/VQSHLQ:136:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VQSHL/VQSHLQ:137:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffc, } +VQSHL/VQSHLQ:138:result_int32x2 [] = { fffffffe, fffffffe, } +VQSHL/VQSHLQ:139:result_int64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:140:result_uint8x8 [] = { 78, 78, 79, 79, 7a, 7a, 7b, 7b, } +VQSHL/VQSHLQ:141:result_uint16x4 [] = { 3ffc, 3ffc, 3ffc, 3ffc, } +VQSHL/VQSHLQ:142:result_uint32x2 [] = { 1ffffffe, 1ffffffe, } +VQSHL/VQSHLQ:143:result_uint64x1 [] = { fffffffffffffff, } +VQSHL/VQSHLQ:144:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:145:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:146:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:147:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:148:result_int8x16 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:149:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:150:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:151:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:152:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VQSHL/VQSHLQ:153:result_uint16x8 [] = { 1f, 1f, 1f, 1f, 1f, 1f, 1f, 1f, } +VQSHL/VQSHLQ:154:result_uint32x4 [] = { 7ffff, 7ffff, 7ffff, 7ffff, } +VQSHL/VQSHLQ:155:result_uint64x2 [] = { fffffffffff, fffffffffff, } +VQSHL/VQSHLQ:156:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:157:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:158:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:159:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (large shift amount, negative input) cumulative saturation output: +VQSHL/VQSHLQ:160:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:161:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:162:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:163:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:164:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:165:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:166:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:167:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:168:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:169:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:170:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:171:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:172:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:173:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:174:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:175:vqshlq_u64 Neon cumulative saturation 1 + +VQSHL/VQSHLQ (large shift amount, negative input) output: +VQSHL/VQSHLQ:176:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSHL/VQSHLQ:177:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQSHL/VQSHLQ:178:result_int32x2 [] = { 80000000, 80000000, } +VQSHL/VQSHLQ:179:result_int64x1 [] = { 8000000000000000, } +VQSHL/VQSHLQ:180:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:181:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:182:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:183:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:184:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:185:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:186:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:187:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:188:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSHL/VQSHLQ:189:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQSHL/VQSHLQ:190:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQSHL/VQSHLQ:191:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQSHL/VQSHLQ:192:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:193:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:194:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:195:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:196:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:197:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:198:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:199:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (check cumulative saturation) cumulative saturation output: +VQSHL/VQSHLQ:200:vqshl_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:201:vqshl_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:202:vqshl_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:203:vqshl_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:204:vqshl_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:205:vqshl_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:206:vqshl_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:207:vqshl_u64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:208:vqshlq_s8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:209:vqshlq_s16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:210:vqshlq_s32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:211:vqshlq_s64 Neon cumulative saturation 0 +VQSHL/VQSHLQ:212:vqshlq_u8 Neon cumulative saturation 0 +VQSHL/VQSHLQ:213:vqshlq_u16 Neon cumulative saturation 0 +VQSHL/VQSHLQ:214:vqshlq_u32 Neon cumulative saturation 0 +VQSHL/VQSHLQ:215:vqshlq_u64 Neon cumulative saturation 0 + +VQSHL/VQSHLQ (check cumulative saturation) output: +VQSHL/VQSHLQ:216:result_int8x8 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, } +VQSHL/VQSHLQ:217:result_int16x4 [] = { 3fff, 3fff, 3fff, 3fff, } +VQSHL/VQSHLQ:218:result_int32x2 [] = { 3fffffff, 3fffffff, } +VQSHL/VQSHLQ:219:result_int64x1 [] = { 3fffffffffffffff, } +VQSHL/VQSHLQ:220:result_uint8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:221:result_uint16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:222:result_uint32x2 [] = { 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:223:result_uint64x1 [] = { 7fffffffffffffff, } +VQSHL/VQSHLQ:224:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:225:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:226:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:227:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:228:result_int8x16 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, } +VQSHL/VQSHLQ:229:result_int16x8 [] = { 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, } +VQSHL/VQSHLQ:230:result_int32x4 [] = { 3fffffff, 3fffffff, 3fffffff, 3fffffff, } +VQSHL/VQSHLQ:231:result_int64x2 [] = { 3fffffffffffffff, 3fffffffffffffff, } +VQSHL/VQSHLQ:232:result_uint8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:233:result_uint16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:234:result_uint32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:235:result_uint64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL/VQSHLQ:236:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:237:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:238:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:239:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (large shift amount, positive input) cumulative saturation output: +VQSHL/VQSHLQ:240:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:241:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:242:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:243:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:244:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:245:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:246:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:247:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:248:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:249:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:250:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:251:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:252:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:253:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:254:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:255:vqshlq_u64 Neon cumulative saturation 1 + +VQSHL/VQSHLQ (large shift amount, positive input) output: +VQSHL/VQSHLQ:256:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:257:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:258:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:259:result_int64x1 [] = { 7fffffffffffffff, } +VQSHL/VQSHLQ:260:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:261:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:262:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:263:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:264:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:265:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:266:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:267:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:268:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:269:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:270:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:271:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL/VQSHLQ:272:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:273:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:274:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:275:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:276:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:277:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:278:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:279:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL/VQSHLQ (check saturation on 64 bits) cumulative saturation output: +VQSHL/VQSHLQ:280:vqshl_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:281:vqshl_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:282:vqshl_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:283:vqshl_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:284:vqshl_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:285:vqshl_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:286:vqshl_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:287:vqshl_u64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:288:vqshlq_s8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:289:vqshlq_s16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:290:vqshlq_s32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:291:vqshlq_s64 Neon cumulative saturation 1 +VQSHL/VQSHLQ:292:vqshlq_u8 Neon cumulative saturation 1 +VQSHL/VQSHLQ:293:vqshlq_u16 Neon cumulative saturation 1 +VQSHL/VQSHLQ:294:vqshlq_u32 Neon cumulative saturation 1 +VQSHL/VQSHLQ:295:vqshlq_u64 Neon cumulative saturation 1 + +VQSHL/VQSHLQ (check saturation on 64 bits) output: +VQSHL/VQSHLQ:296:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:297:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:298:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:299:result_int64x1 [] = { 8000000000000000, } +VQSHL/VQSHLQ:300:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:301:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:302:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:303:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:304:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:305:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:306:result_float32x2 [] = { 33333333, 33333333, } +VQSHL/VQSHLQ:307:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:308:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:309:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:310:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:311:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL/VQSHLQ:312:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:313:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:314:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:315:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:316:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL/VQSHLQ:317:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL/VQSHLQ:318:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL/VQSHLQ:319:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL_N/VQSHLQ_N cumulative saturation output: +VQSHL_N/VQSHLQ_N:0:vqshl_n_s8 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:1:vqshl_n_s16 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:2:vqshl_n_s32 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:3:vqshl_n_s64 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:4:vqshl_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:5:vqshl_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:6:vqshl_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:7:vqshl_n_u64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:8:vqshlq_n_s8 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:9:vqshlq_n_s16 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:10:vqshlq_n_s32 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:11:vqshlq_n_s64 Neon cumulative saturation 0 +VQSHL_N/VQSHLQ_N:12:vqshlq_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:13:vqshlq_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:14:vqshlq_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:15:vqshlq_n_u64 Neon cumulative saturation 1 + +VQSHL_N/VQSHLQ_N output: +VQSHL_N/VQSHLQ_N:16:result_int8x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, } +VQSHL_N/VQSHLQ_N:17:result_int16x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VQSHL_N/VQSHLQ_N:18:result_int32x2 [] = { ffffffe0, ffffffe2, } +VQSHL_N/VQSHLQ_N:19:result_int64x1 [] = { ffffffffffffffc0, } +VQSHL_N/VQSHLQ_N:20:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:21:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:22:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:23:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL_N/VQSHLQ_N:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL_N/VQSHLQ_N:26:result_float32x2 [] = { 33333333, 33333333, } +VQSHL_N/VQSHLQ_N:27:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL_N/VQSHLQ_N:28:result_int8x16 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, ffffffe0, ffffffe4, ffffffe8, ffffffec, fffffff0, fffffff4, fffffff8, fffffffc, } +VQSHL_N/VQSHLQ_N:29:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VQSHL_N/VQSHLQ_N:30:result_int32x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VQSHL_N/VQSHLQ_N:31:result_int64x2 [] = { ffffffffffffffc0, ffffffffffffffc4, } +VQSHL_N/VQSHLQ_N:32:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:33:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:34:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:35:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:36:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL_N/VQSHLQ_N:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL_N/VQSHLQ_N:38:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL_N/VQSHLQ_N:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHL_N/VQSHLQ_N (check saturation with large positive input) cumulative saturation output: +VQSHL_N/VQSHLQ_N:40:vqshl_n_s8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:41:vqshl_n_s16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:42:vqshl_n_s32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:43:vqshl_n_s64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:44:vqshl_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:45:vqshl_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:46:vqshl_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:47:vqshl_n_u64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:48:vqshlq_n_s8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:49:vqshlq_n_s16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:50:vqshlq_n_s32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:51:vqshlq_n_s64 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:52:vqshlq_n_u8 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:53:vqshlq_n_u16 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:54:vqshlq_n_u32 Neon cumulative saturation 1 +VQSHL_N/VQSHLQ_N:55:vqshlq_n_u64 Neon cumulative saturation 1 + +VQSHL_N/VQSHLQ_N (check saturation with large positive input) output: +VQSHL_N/VQSHLQ_N:56:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL_N/VQSHLQ_N:57:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL_N/VQSHLQ_N:58:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHL_N/VQSHLQ_N:59:result_int64x1 [] = { 7fffffffffffffff, } +VQSHL_N/VQSHLQ_N:60:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:61:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:62:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:63:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:64:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL_N/VQSHLQ_N:65:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHL_N/VQSHLQ_N:66:result_float32x2 [] = { 33333333, 33333333, } +VQSHL_N/VQSHLQ_N:67:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHL_N/VQSHLQ_N:68:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL_N/VQSHLQ_N:69:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL_N/VQSHLQ_N:70:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL_N/VQSHLQ_N:71:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL_N/VQSHLQ_N:72:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:73:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:74:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:75:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:76:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHL_N/VQSHLQ_N:77:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHL_N/VQSHLQ_N:78:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHL_N/VQSHLQ_N:79:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (with input = 0) output: +VRSHL/VRSHLQ:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:1:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:2:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:3:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:4:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:5:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:6:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:7:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:10:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:12:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:13:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:14:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:15:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:16:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:17:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:18:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:19:result_uint64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (input 0 and negative shift amount) output: +VRSHL/VRSHLQ:24:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:25:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:26:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:27:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:28:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:29:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:30:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:31:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:34:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:36:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:37:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:38:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:39:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:40:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:41:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:42:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:43:result_uint64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ output: +VRSHL/VRSHLQ:48:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VRSHL/VRSHLQ:49:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VRSHL/VRSHLQ:50:result_int32x2 [] = { fffff000, fffff100, } +VRSHL/VRSHLQ:51:result_int64x1 [] = { fffffffffffffffe, } +VRSHL/VRSHLQ:52:result_uint8x8 [] = { e0, e2, e4, e6, e8, ea, ec, ee, } +VRSHL/VRSHLQ:53:result_uint16x4 [] = { ff80, ff88, ff90, ff98, } +VRSHL/VRSHLQ:54:result_uint32x2 [] = { fffff000, fffff100, } +VRSHL/VRSHLQ:55:result_uint64x1 [] = { 1ffffffffffffffe, } +VRSHL/VRSHLQ:56:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:57:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:58:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:59:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:60:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:61:result_int16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VRSHL/VRSHLQ:62:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:63:result_int64x2 [] = { 0, 8000000000000000, } +VRSHL/VRSHLQ:64:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:65:result_uint16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VRSHL/VRSHLQ:66:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:67:result_uint64x2 [] = { 0, 8000000000000000, } +VRSHL/VRSHLQ:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (negative shift amount) output: +VRSHL/VRSHLQ:72:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VRSHL/VRSHLQ:73:result_int16x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd, } +VRSHL/VRSHLQ:74:result_int32x2 [] = { fffffffe, fffffffe, } +VRSHL/VRSHLQ:75:result_int64x1 [] = { ffffffffffffffff, } +VRSHL/VRSHLQ:76:result_uint8x8 [] = { 78, 79, 79, 7a, 7a, 7b, 7b, 7c, } +VRSHL/VRSHLQ:77:result_uint16x4 [] = { 3ffc, 3ffc, 3ffd, 3ffd, } +VRSHL/VRSHLQ:78:result_uint32x2 [] = { 1ffffffe, 1ffffffe, } +VRSHL/VRSHLQ:79:result_uint64x1 [] = { fffffffffffffff, } +VRSHL/VRSHLQ:80:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:81:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:82:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:83:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:84:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:85:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:86:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:87:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:88:result_uint8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, } +VRSHL/VRSHLQ:89:result_uint16x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHL/VRSHLQ:90:result_uint32x4 [] = { 80000, 80000, 80000, 80000, } +VRSHL/VRSHLQ:91:result_uint64x2 [] = { 100000000000, 100000000000, } +VRSHL/VRSHLQ:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:93:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:94:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (checking round_const overflow: shift by -1) output: +VRSHL/VRSHLQ:96:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHL/VRSHLQ:97:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VRSHL/VRSHLQ:98:result_int32x2 [] = { 40000000, 40000000, } +VRSHL/VRSHLQ:99:result_int64x1 [] = { 4000000000000000, } +VRSHL/VRSHLQ:100:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHL/VRSHLQ:101:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSHL/VRSHLQ:102:result_uint32x2 [] = { 80000000, 80000000, } +VRSHL/VRSHLQ:103:result_uint64x1 [] = { 8000000000000000, } +VRSHL/VRSHLQ:104:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:105:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:106:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:107:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:108:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHL/VRSHLQ:109:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VRSHL/VRSHLQ:110:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VRSHL/VRSHLQ:111:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VRSHL/VRSHLQ:112:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHL/VRSHLQ:113:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSHL/VRSHLQ:114:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSHL/VRSHLQ:115:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSHL/VRSHLQ:116:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:118:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (checking round_const overflow: shift by -3) output: +VRSHL/VRSHLQ:120:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHL/VRSHLQ:121:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VRSHL/VRSHLQ:122:result_int32x2 [] = { 10000000, 10000000, } +VRSHL/VRSHLQ:123:result_int64x1 [] = { 1000000000000000, } +VRSHL/VRSHLQ:124:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHL/VRSHLQ:125:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSHL/VRSHLQ:126:result_uint32x2 [] = { 20000000, 20000000, } +VRSHL/VRSHLQ:127:result_uint64x1 [] = { 2000000000000000, } +VRSHL/VRSHLQ:128:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:129:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:130:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:131:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:132:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHL/VRSHLQ:133:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VRSHL/VRSHLQ:134:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VRSHL/VRSHLQ:135:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VRSHL/VRSHLQ:136:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHL/VRSHLQ:137:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSHL/VRSHLQ:138:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSHL/VRSHLQ:139:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSHL/VRSHLQ:140:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:141:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:142:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:143:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (checking negative shift amount as large as input vector width) output: +VRSHL/VRSHLQ:144:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:145:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:146:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:147:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:148:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHL/VRSHLQ:149:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSHL/VRSHLQ:150:result_uint32x2 [] = { 1, 1, } +VRSHL/VRSHLQ:151:result_uint64x1 [] = { 1, } +VRSHL/VRSHLQ:152:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:153:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:154:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:155:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:156:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:157:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:158:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:159:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:160:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHL/VRSHLQ:161:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHL/VRSHLQ:162:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSHL/VRSHLQ:163:result_uint64x2 [] = { 1, 1, } +VRSHL/VRSHLQ:164:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:165:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:166:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:167:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (large shift amount) output: +VRSHL/VRSHLQ:168:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:169:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:170:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:171:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:172:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:173:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:174:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:175:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:176:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:177:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:178:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:179:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:180:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:181:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:182:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:183:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:184:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:185:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:186:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:187:result_uint64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:188:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:189:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:190:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:191:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHL/VRSHLQ (large negative shift amount) output: +VRSHL/VRSHLQ:192:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:193:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:194:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:195:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:196:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:197:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:198:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:199:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:200:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:201:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:202:result_float32x2 [] = { 33333333, 33333333, } +VRSHL/VRSHLQ:203:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:204:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:205:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:206:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:207:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:208:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:209:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHL/VRSHLQ:210:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSHL/VRSHLQ:211:result_uint64x2 [] = { 1, 1, } +VRSHL/VRSHLQ:212:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHL/VRSHLQ:213:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHL/VRSHLQ:214:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHL/VRSHLQ:215:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD2/VLD2Q chunk 0 output: +VLD2/VLD2Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD2/VLD2Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2/VLD2Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD2/VLD2Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD2/VLD2Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD2/VLD2Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2/VLD2Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD2/VLD2Q:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD2/VLD2Q:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD2/VLD2Q:10:result_float32x2 [] = { c1800000, c1700000, } +VLD2/VLD2Q:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD2/VLD2Q:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD2/VLD2Q:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD2/VLD2Q:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD2/VLD2Q:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:22:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VLD2/VLD2Q:23:result_float16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } + +VLD2/VLD2Q chunk 1 output: +VLD2/VLD2Q:24:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD2/VLD2Q:25:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:26:result_int32x2 [] = { fffffff2, fffffff3, } +VLD2/VLD2Q:27:result_int64x1 [] = { fffffffffffffff1, } +VLD2/VLD2Q:28:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:29:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:30:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD2/VLD2Q:31:result_uint64x1 [] = { fffffffffffffff1, } +VLD2/VLD2Q:32:result_poly8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:33:result_poly16x4 [] = { fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:34:result_float32x2 [] = { c1600000, c1500000, } +VLD2/VLD2Q:35:result_float16x4 [] = { ca00, c980, c900, c880, } +VLD2/VLD2Q:36:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD2/VLD2Q:37:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD2/VLD2Q:38:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:40:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD2/VLD2Q:41:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD2/VLD2Q:42:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:44:result_poly8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD2/VLD2Q:45:result_poly16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD2/VLD2Q:46:result_float32x4 [] = { c1400000, c1300000, c1200000, c1100000, } +VLD2/VLD2Q:47:result_float16x8 [] = { c800, c700, c600, c500, c400, c200, c000, bc00, } + +VLD3/VLD3Q chunk 0 output: +VLD3/VLD3Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD3/VLD3Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD3/VLD3Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD3/VLD3Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD3/VLD3Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD3/VLD3Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD3/VLD3Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD3/VLD3Q:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD3/VLD3Q:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD3/VLD3Q:10:result_float32x2 [] = { c1800000, c1700000, } +VLD3/VLD3Q:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD3/VLD3Q:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD3/VLD3Q:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD3/VLD3Q:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD3/VLD3Q:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:22:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VLD3/VLD3Q:23:result_float16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } + +VLD3/VLD3Q chunk 1 output: +VLD3/VLD3Q:24:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD3/VLD3Q:25:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:26:result_int32x2 [] = { fffffff2, fffffff3, } +VLD3/VLD3Q:27:result_int64x1 [] = { fffffffffffffff1, } +VLD3/VLD3Q:28:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:29:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:30:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD3/VLD3Q:31:result_uint64x1 [] = { fffffffffffffff1, } +VLD3/VLD3Q:32:result_poly8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:33:result_poly16x4 [] = { fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:34:result_float32x2 [] = { c1600000, c1500000, } +VLD3/VLD3Q:35:result_float16x4 [] = { ca00, c980, c900, c880, } +VLD3/VLD3Q:36:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD3/VLD3Q:37:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD3/VLD3Q:38:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:40:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD3/VLD3Q:41:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD3/VLD3Q:42:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:44:result_poly8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD3/VLD3Q:45:result_poly16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD3/VLD3Q:46:result_float32x4 [] = { c1400000, c1300000, c1200000, c1100000, } +VLD3/VLD3Q:47:result_float16x8 [] = { c800, c700, c600, c500, c400, c200, c000, bc00, } + +VLD3/VLD3Q chunk 2 output: +VLD3/VLD3Q:48:result_int8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:49:result_int16x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD3/VLD3Q:50:result_int32x2 [] = { fffffff4, fffffff5, } +VLD3/VLD3Q:51:result_int64x1 [] = { fffffffffffffff2, } +VLD3/VLD3Q:52:result_uint8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:53:result_uint16x4 [] = { fff8, fff9, fffa, fffb, } +VLD3/VLD3Q:54:result_uint32x2 [] = { fffffff4, fffffff5, } +VLD3/VLD3Q:55:result_uint64x1 [] = { fffffffffffffff2, } +VLD3/VLD3Q:56:result_poly8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:57:result_poly16x4 [] = { fff8, fff9, fffa, fffb, } +VLD3/VLD3Q:58:result_float32x2 [] = { c1400000, c1300000, } +VLD3/VLD3Q:59:result_float16x4 [] = { c800, c700, c600, c500, } +VLD3/VLD3Q:60:result_int8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD3/VLD3Q:61:result_int16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:62:result_int32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD3/VLD3Q:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:64:result_uint8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD3/VLD3Q:65:result_uint16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:66:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD3/VLD3Q:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:68:result_poly8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD3/VLD3Q:69:result_poly16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:70:result_float32x4 [] = { c1000000, c0e00000, c0c00000, c0a00000, } +VLD3/VLD3Q:71:result_float16x8 [] = { 0, 3c00, 4000, 4200, 4400, 4500, 4600, 4700, } + +VLD4/VLD4Q chunk 0 output: +VLD4/VLD4Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4/VLD4Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4/VLD4Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD4/VLD4Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD4/VLD4Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4/VLD4Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4/VLD4Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD4/VLD4Q:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD4/VLD4Q:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4/VLD4Q:10:result_float32x2 [] = { c1800000, c1700000, } +VLD4/VLD4Q:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD4/VLD4Q:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4/VLD4Q:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4/VLD4Q:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:22:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VLD4/VLD4Q:23:result_float16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } + +VLD4/VLD4Q chunk 1 output: +VLD4/VLD4Q:24:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:25:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:26:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4/VLD4Q:27:result_int64x1 [] = { fffffffffffffff1, } +VLD4/VLD4Q:28:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:29:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:30:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4/VLD4Q:31:result_uint64x1 [] = { fffffffffffffff1, } +VLD4/VLD4Q:32:result_poly8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:33:result_poly16x4 [] = { fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:34:result_float32x2 [] = { c1600000, c1500000, } +VLD4/VLD4Q:35:result_float16x4 [] = { ca00, c980, c900, c880, } +VLD4/VLD4Q:36:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:37:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:38:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:40:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:41:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:42:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:44:result_poly8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:45:result_poly16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:46:result_float32x4 [] = { c1400000, c1300000, c1200000, c1100000, } +VLD4/VLD4Q:47:result_float16x8 [] = { c800, c700, c600, c500, c400, c200, c000, bc00, } + +VLD4/VLD4Q chunk 2 output: +VLD4/VLD4Q:48:result_int8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:49:result_int16x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD4/VLD4Q:50:result_int32x2 [] = { fffffff4, fffffff5, } +VLD4/VLD4Q:51:result_int64x1 [] = { fffffffffffffff2, } +VLD4/VLD4Q:52:result_uint8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:53:result_uint16x4 [] = { fff8, fff9, fffa, fffb, } +VLD4/VLD4Q:54:result_uint32x2 [] = { fffffff4, fffffff5, } +VLD4/VLD4Q:55:result_uint64x1 [] = { fffffffffffffff2, } +VLD4/VLD4Q:56:result_poly8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:57:result_poly16x4 [] = { fff8, fff9, fffa, fffb, } +VLD4/VLD4Q:58:result_float32x2 [] = { c1400000, c1300000, } +VLD4/VLD4Q:59:result_float16x4 [] = { c800, c700, c600, c500, } +VLD4/VLD4Q:60:result_int8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD4/VLD4Q:61:result_int16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:62:result_int32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD4/VLD4Q:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:64:result_uint8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD4/VLD4Q:65:result_uint16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:66:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD4/VLD4Q:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:68:result_poly8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, } +VLD4/VLD4Q:69:result_poly16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:70:result_float32x4 [] = { c1000000, c0e00000, c0c00000, c0a00000, } +VLD4/VLD4Q:71:result_float16x8 [] = { 0, 3c00, 4000, 4200, 4400, 4500, 4600, 4700, } + +VLD4/VLD4Q chunk 3 output: +VLD4/VLD4Q:72:result_int8x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:73:result_int16x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:74:result_int32x2 [] = { fffffff6, fffffff7, } +VLD4/VLD4Q:75:result_int64x1 [] = { fffffffffffffff3, } +VLD4/VLD4Q:76:result_uint8x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:77:result_uint16x4 [] = { fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:78:result_uint32x2 [] = { fffffff6, fffffff7, } +VLD4/VLD4Q:79:result_uint64x1 [] = { fffffffffffffff3, } +VLD4/VLD4Q:80:result_poly8x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:81:result_poly16x4 [] = { fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:82:result_float32x2 [] = { c1200000, c1100000, } +VLD4/VLD4Q:83:result_float16x4 [] = { c400, c200, c000, bc00, } +VLD4/VLD4Q:84:result_int8x16 [] = { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 2a, 2b, 2c, 2d, 2e, 2f, } +VLD4/VLD4Q:85:result_int16x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:86:result_int32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:87:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:88:result_uint8x16 [] = { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 2a, 2b, 2c, 2d, 2e, 2f, } +VLD4/VLD4Q:89:result_uint16x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:90:result_uint32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:91:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:92:result_poly8x16 [] = { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 2a, 2b, 2c, 2d, 2e, 2f, } +VLD4/VLD4Q:93:result_poly16x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:94:result_float32x4 [] = { c0800000, c0400000, c0000000, bf800000, } +VLD4/VLD4Q:95:result_float16x8 [] = { 4800, 4880, 4900, 4980, 4a00, 4a80, 4b00, 4b80, } + +VDUP_LANE/VDUP_LANEQ output: +VDUP_LANE/VDUP_LANEQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP_LANE/VDUP_LANEQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:3:result_int64x1 [] = { fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:4:result_uint8x8 [] = { f7, f7, f7, f7, f7, f7, f7, f7, } +VDUP_LANE/VDUP_LANEQ:5:result_uint16x4 [] = { fff3, fff3, fff3, fff3, } +VDUP_LANE/VDUP_LANEQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:8:result_poly8x8 [] = { f7, f7, f7, f7, f7, f7, f7, f7, } +VDUP_LANE/VDUP_LANEQ:9:result_poly16x4 [] = { fff3, fff3, fff3, fff3, } +VDUP_LANE/VDUP_LANEQ:10:result_float32x2 [] = { c1700000, c1700000, } +VDUP_LANE/VDUP_LANEQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VDUP_LANE/VDUP_LANEQ:12:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP_LANE/VDUP_LANEQ:13:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, } +VDUP_LANE/VDUP_LANEQ:14:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:16:result_uint8x16 [] = { f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, } +VDUP_LANE/VDUP_LANEQ:17:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP_LANE/VDUP_LANEQ:18:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP_LANE/VDUP_LANEQ:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:20:result_poly8x16 [] = { f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, } +VDUP_LANE/VDUP_LANEQ:21:result_poly16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP_LANE/VDUP_LANEQ:22:result_float32x4 [] = { c1700000, c1700000, c1700000, c1700000, } +VDUP_LANE/VDUP_LANEQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQDMULL_LANE cumulative saturation output: +VQDMULL_LANE:0:vqdmull_lane_s16 Neon cumulative saturation 0 +VQDMULL_LANE:1:vqdmull_lane_s32 Neon cumulative saturation 0 + +VQDMULL_LANE output: +VQDMULL_LANE:2:result_int32x4 [] = { 8000, 8000, 8000, 8000, } +VQDMULL_LANE:3:result_int64x2 [] = { 4000, 4000, } + +VQDMULL_LANE (check mul cumulative saturation) cumulative saturation output: +VQDMULL_LANE:4:vqdmull_lane_s16 Neon cumulative saturation 1 +VQDMULL_LANE:5:vqdmull_lane_s32 Neon cumulative saturation 1 + +VQDMULL_LANE (check mul cumulative saturation) output: +VQDMULL_LANE:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULL_LANE:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } + +VQDMULL_N cumulative saturation output: +VQDMULL_N:0:vqdmull_n_s16 Neon cumulative saturation 0 +VQDMULL_N:1:vqdmull_n_s32 Neon cumulative saturation 0 + +VQDMULL_N output: +VQDMULL_N:2:result_int32x4 [] = { 44000, 44000, 44000, 44000, } +VQDMULL_N:3:result_int64x2 [] = { aa000, aa000, } + +VQDMULL_N (check mul cumulative saturation) cumulative saturation output: +VQDMULL_N:4:vqdmull_n_s16 Neon cumulative saturation 1 +VQDMULL_N:5:vqdmull_n_s32 Neon cumulative saturation 1 + +VQDMULL_N (check mul cumulative saturation) output: +VQDMULL_N:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULL_N:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } + +VST1_LANE/VST1_LANEQ output: +VST1_LANE/VST1_LANEQ:0:result_int8x8 [] = { fffffff7, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:1:result_int16x4 [] = { fffffff3, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:2:result_int32x2 [] = { fffffff1, 33333333, } +VST1_LANE/VST1_LANEQ:3:result_int64x1 [] = { fffffffffffffff0, } +VST1_LANE/VST1_LANEQ:4:result_uint8x8 [] = { f6, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:5:result_uint16x4 [] = { fff2, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:6:result_uint32x2 [] = { fffffff0, 33333333, } +VST1_LANE/VST1_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VST1_LANE/VST1_LANEQ:8:result_poly8x8 [] = { f6, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:9:result_poly16x4 [] = { fff2, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:10:result_float32x2 [] = { c1700000, 33333333, } +VST1_LANE/VST1_LANEQ:11:result_float16x4 [] = { cb00, 0, 0, 0, } +VST1_LANE/VST1_LANEQ:12:result_int8x16 [] = { ffffffff, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:13:result_int16x8 [] = { fffffff5, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:14:result_int32x4 [] = { fffffff1, 33333333, 33333333, 33333333, } +VST1_LANE/VST1_LANEQ:15:result_int64x2 [] = { fffffffffffffff1, 3333333333333333, } +VST1_LANE/VST1_LANEQ:16:result_uint8x16 [] = { fa, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:17:result_uint16x8 [] = { fff4, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:18:result_uint32x4 [] = { fffffff3, 33333333, 33333333, 33333333, } +VST1_LANE/VST1_LANEQ:19:result_uint64x2 [] = { fffffffffffffff0, 3333333333333333, } +VST1_LANE/VST1_LANEQ:20:result_poly8x16 [] = { fa, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:21:result_poly16x8 [] = { fff4, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:22:result_float32x4 [] = { c1700000, 33333333, 33333333, 33333333, } +VST1_LANE/VST1_LANEQ:23:result_float16x8 [] = { c980, 0, 0, 0, 0, 0, 0, 0, } + +VSUB/VSUBQ output: +VSUB/VSUBQ:0:result_int8x8 [] = { ffffffee, ffffffef, fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, } +VSUB/VSUBQ:1:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VSUB/VSUBQ:2:result_int32x2 [] = { ffffffed, ffffffee, } +VSUB/VSUBQ:3:result_int64x1 [] = { ffffffffffffff8c, } +VSUB/VSUBQ:4:result_uint8x8 [] = { dc, dd, de, df, e0, e1, e2, e3, } +VSUB/VSUBQ:5:result_uint16x4 [] = { ffd2, ffd3, ffd4, ffd5, } +VSUB/VSUBQ:6:result_uint32x2 [] = { ffffffc8, ffffffc9, } +VSUB/VSUBQ:7:result_uint64x1 [] = { ffffffffffffffee, } +VSUB/VSUBQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUB/VSUBQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSUB/VSUBQ:10:result_float32x2 [] = { 33333333, 33333333, } +VSUB/VSUBQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSUB/VSUBQ:12:result_int8x16 [] = { fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, } +VSUB/VSUBQ:13:result_int16x8 [] = { 4, 5, 6, 7, 8, 9, a, b, } +VSUB/VSUBQ:14:result_int32x4 [] = { e, f, 10, 11, } +VSUB/VSUBQ:15:result_int64x2 [] = { ffffffffffffffd8, ffffffffffffffd9, } +VSUB/VSUBQ:16:result_uint8x16 [] = { e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef, f0, f1, f2, f3, } +VSUB/VSUBQ:17:result_uint16x8 [] = { ffed, ffee, ffef, fff0, fff1, fff2, fff3, fff4, } +VSUB/VSUBQ:18:result_uint32x4 [] = { ffffffb9, ffffffba, ffffffbb, ffffffbc, } +VSUB/VSUBQ:19:result_uint64x2 [] = { ffffffffffffffed, ffffffffffffffee, } +VSUB/VSUBQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUB/VSUBQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUB/VSUBQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUB/VSUBQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +float32: +VSUB/VSUBQ:24:result_float32x2 [] = { c00ccccd, c00ccccd, } +VSUB/VSUBQ:25:result_float32x4 [] = { c00ccccc, c00ccccc, c00ccccc, c00ccccc, } + +VQADD/VQADDQ cumulative saturation output: +VQADD/VQADDQ:0:vqadd_s8 Neon cumulative saturation 0 +VQADD/VQADDQ:1:vqadd_s16 Neon cumulative saturation 0 +VQADD/VQADDQ:2:vqadd_s32 Neon cumulative saturation 0 +VQADD/VQADDQ:3:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:4:vqadd_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:5:vqadd_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:6:vqadd_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:7:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:8:vqaddq_s8 Neon cumulative saturation 0 +VQADD/VQADDQ:9:vqaddq_s16 Neon cumulative saturation 0 +VQADD/VQADDQ:10:vqaddq_s32 Neon cumulative saturation 0 +VQADD/VQADDQ:11:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:12:vqaddq_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:13:vqaddq_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:14:vqaddq_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:15:vqaddq_u64 Neon cumulative saturation 1 + +VQADD/VQADDQ output: +VQADD/VQADDQ:16:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8, } +VQADD/VQADDQ:17:result_int16x4 [] = { 12, 13, 14, 15, } +VQADD/VQADDQ:18:result_int32x2 [] = { 23, 24, } +VQADD/VQADDQ:19:result_int64x1 [] = { 34, } +VQADD/VQADDQ:20:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:21:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:22:result_uint32x2 [] = { ffffffff, ffffffff, } +VQADD/VQADDQ:23:result_uint64x1 [] = { ffffffffffffffff, } +VQADD/VQADDQ:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQADD/VQADDQ:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQADD/VQADDQ:26:result_float32x2 [] = { 33333333, 33333333, } +VQADD/VQADDQ:27:result_float16x4 [] = { 0, 0, 0, 0, } +VQADD/VQADDQ:28:result_int8x16 [] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, } +VQADD/VQADDQ:29:result_int16x8 [] = { 12, 13, 14, 15, 16, 17, 18, 19, } +VQADD/VQADDQ:30:result_int32x4 [] = { 23, 24, 25, 26, } +VQADD/VQADDQ:31:result_int64x2 [] = { 34, 35, } +VQADD/VQADDQ:32:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:33:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:34:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQADD/VQADDQ:35:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQADD/VQADDQ:36:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQADD/VQADDQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQADD/VQADDQ:38:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQADD/VQADDQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:40:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:41:vqadd_u64 Neon cumulative saturation 0 +VQADD/VQADDQ:42:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:43:vqaddq_u64 Neon cumulative saturation 0 + +64 bits saturation: +VQADD/VQADDQ:44:result_int64x1 [] = { fffffffffffffff0, } +VQADD/VQADDQ:45:result_uint64x1 [] = { fffffffffffffff0, } +VQADD/VQADDQ:46:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQADD/VQADDQ:47:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } + +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:48:vqadd_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:49:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:50:vqaddq_s64 Neon cumulative saturation 0 +VQADD/VQADDQ:51:vqaddq_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:52:result_int64x1 [] = { 34, } +VQADD/VQADDQ:53:result_uint64x1 [] = { ffffffffffffffff, } +VQADD/VQADDQ:54:result_int64x2 [] = { 34, 35, } +VQADD/VQADDQ:55:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } + +VQADD/VQADDQ 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:56:vqadd_s64 Neon cumulative saturation 1 +VQADD/VQADDQ:57:vqadd_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:58:vqaddq_s64 Neon cumulative saturation 1 +VQADD/VQADDQ:59:vqaddq_u64 Neon cumulative saturation 1 +VQADD/VQADDQ:60:result_int64x1 [] = { 8000000000000000, } +VQADD/VQADDQ:61:result_uint64x1 [] = { ffffffffffffffff, } +VQADD/VQADDQ:62:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQADD/VQADDQ:63:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } + +less than 64 bits saturation: +VQADD/VQADDQ:64:vqadd_s8 Neon cumulative saturation 1 +VQADD/VQADDQ:65:vqadd_s16 Neon cumulative saturation 1 +VQADD/VQADDQ:66:vqadd_s32 Neon cumulative saturation 1 +VQADD/VQADDQ:67:vqaddq_s8 Neon cumulative saturation 1 +VQADD/VQADDQ:68:vqaddq_s16 Neon cumulative saturation 1 +VQADD/VQADDQ:69:vqaddq_s32 Neon cumulative saturation 1 +VQADD/VQADDQ:70:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQADD/VQADDQ:71:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQADD/VQADDQ:72:result_int32x2 [] = { 80000000, 80000000, } +VQADD/VQADDQ:73:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQADD/VQADDQ:74:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQADD/VQADDQ:75:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VQADD/VQADDQ less than 64 bits saturation cumulative saturation output: +VQADD/VQADDQ:76:vqadd_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:77:vqadd_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:78:vqadd_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:79:vqaddq_u8 Neon cumulative saturation 1 +VQADD/VQADDQ:80:vqaddq_u16 Neon cumulative saturation 1 +VQADD/VQADDQ:81:vqaddq_u32 Neon cumulative saturation 1 +VQADD/VQADDQ:82:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:83:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:84:result_uint32x2 [] = { ffffffff, ffffffff, } +VQADD/VQADDQ:85:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:86:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:87:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } + +VABS/VABSQ output: +VABS/VABSQ:0:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VABS/VABSQ:1:result_int16x4 [] = { 10, f, e, d, } +VABS/VABSQ:2:result_int32x2 [] = { 10, f, } +VABS/VABSQ:3:result_int64x1 [] = { 3333333333333333, } +VABS/VABSQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABS/VABSQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VABS/VABSQ:7:result_uint64x1 [] = { 3333333333333333, } +VABS/VABSQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABS/VABSQ:10:result_float32x2 [] = { 33333333, 33333333, } +VABS/VABSQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VABS/VABSQ:12:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VABS/VABSQ:13:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VABS/VABSQ:14:result_int32x4 [] = { 10, f, e, d, } +VABS/VABSQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VABS/VABSQ:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABS/VABSQ:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABS/VABSQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VABS/VABSQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABS/VABSQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABS/VABSQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +float32: +VABS/VABSQ:24:result_float32x2 [] = { 40133333, 40133333, } +VABS/VABSQ:25:result_float32x4 [] = { 4059999a, 4059999a, 4059999a, 4059999a, } + +VQABS/VQABSQ cumulative saturation output: +VQABS/VQABSQ:0:vqabs_s8 Neon cumulative saturation 0 +VQABS/VQABSQ:1:vqabs_s16 Neon cumulative saturation 0 +VQABS/VQABSQ:2:vqabs_s32 Neon cumulative saturation 0 +VQABS/VQABSQ:3:vqabsq_s8 Neon cumulative saturation 0 +VQABS/VQABSQ:4:vqabsq_s16 Neon cumulative saturation 0 +VQABS/VQABSQ:5:vqabsq_s32 Neon cumulative saturation 0 + +VQABS/VQABSQ output: +VQABS/VQABSQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VQABS/VQABSQ:7:result_int16x4 [] = { 10, f, e, d, } +VQABS/VQABSQ:8:result_int32x2 [] = { 10, f, } +VQABS/VQABSQ:9:result_int64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:13:result_uint64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:16:result_float32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQABS/VQABSQ:18:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VQABS/VQABSQ:19:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VQABS/VQABSQ:20:result_int32x4 [] = { 10, f, e, d, } +VQABS/VQABSQ:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQABS/VQABSQ:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQABS/VQABSQ:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQABS/VQABSQ cumulative saturation output: +VQABS/VQABSQ:0:vqabs_s8 Neon cumulative saturation 1 +VQABS/VQABSQ:1:vqabs_s16 Neon cumulative saturation 1 +VQABS/VQABSQ:2:vqabs_s32 Neon cumulative saturation 1 +VQABS/VQABSQ:3:vqabsq_s8 Neon cumulative saturation 1 +VQABS/VQABSQ:4:vqabsq_s16 Neon cumulative saturation 1 +VQABS/VQABSQ:5:vqabsq_s32 Neon cumulative saturation 1 + +VQABS/VQABSQ output: +VQABS/VQABSQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQABS/VQABSQ:7:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQABS/VQABSQ:8:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQABS/VQABSQ:9:result_int64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:13:result_uint64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:16:result_float32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQABS/VQABSQ:18:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQABS/VQABSQ:19:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQABS/VQABSQ:20:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQABS/VQABSQ:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQABS/VQABSQ:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQABS/VQABSQ:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCOMBINE output: +VCOMBINE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCOMBINE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VCOMBINE:2:result_int32x2 [] = { 33333333, 33333333, } +VCOMBINE:3:result_int64x1 [] = { 3333333333333333, } +VCOMBINE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCOMBINE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCOMBINE:6:result_uint32x2 [] = { 33333333, 33333333, } +VCOMBINE:7:result_uint64x1 [] = { 3333333333333333, } +VCOMBINE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCOMBINE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCOMBINE:10:result_float32x2 [] = { 33333333, 33333333, } +VCOMBINE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VCOMBINE:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 11, 11, 11, 11, 11, 11, 11, 11, } +VCOMBINE:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 22, 22, 22, 22, } +VCOMBINE:14:result_int32x4 [] = { fffffff0, fffffff1, 33, 33, } +VCOMBINE:15:result_int64x2 [] = { fffffffffffffff0, 44, } +VCOMBINE:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, 55, 55, 55, 55, 55, 55, 55, 55, } +VCOMBINE:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, 66, 66, 66, 66, } +VCOMBINE:18:result_uint32x4 [] = { fffffff0, fffffff1, 77, 77, } +VCOMBINE:19:result_uint64x2 [] = { fffffffffffffff0, 88, } +VCOMBINE:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, 55, 55, 55, 55, 55, 55, 55, 55, } +VCOMBINE:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, 66, 66, 66, 66, } +VCOMBINE:22:result_float32x4 [] = { c1800000, c1700000, 40533333, 40533333, } +VCOMBINE:23:result_float16x8 [] = { cc00, cb80, cb00, ca80, 4b80, 4b80, 4b80, 4b80, } + +VMAX/VMAXQ output: +VMAX/VMAXQ:0:result_int8x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMAX/VMAXQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff3, } +VMAX/VMAXQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VMAX/VMAXQ:3:result_int64x1 [] = { 3333333333333333, } +VMAX/VMAXQ:4:result_uint8x8 [] = { f3, f3, f3, f3, f4, f5, f6, f7, } +VMAX/VMAXQ:5:result_uint16x4 [] = { fff1, fff1, fff2, fff3, } +VMAX/VMAXQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VMAX/VMAXQ:7:result_uint64x1 [] = { 3333333333333333, } +VMAX/VMAXQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMAX/VMAXQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMAX/VMAXQ:10:result_float32x2 [] = { c1780000, c1700000, } +VMAX/VMAXQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMAX/VMAXQ:12:result_int8x16 [] = { fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VMAX/VMAXQ:13:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMAX/VMAXQ:14:result_int32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff3, } +VMAX/VMAXQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMAX/VMAXQ:16:result_uint8x16 [] = { f9, f9, f9, f9, f9, f9, f9, f9, f9, f9, fa, fb, fc, fd, fe, ff, } +VMAX/VMAXQ:17:result_uint16x8 [] = { fff2, fff2, fff2, fff3, fff4, fff5, fff6, fff7, } +VMAX/VMAXQ:18:result_uint32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff3, } +VMAX/VMAXQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMAX/VMAXQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMAX/VMAXQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMAX/VMAXQ:22:result_float32x4 [] = { c1680000, c1680000, c1600000, c1500000, } +VMAX/VMAXQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VMAX/VMAXQ FP special (NaN):24:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } +VMAX/VMAXQ FP special (-NaN):25:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } +VMAX/VMAXQ FP special (inf):26:result_float32x4 [] = { 7f800000, 7f800000, 7f800000, 7f800000, } +VMAX/VMAXQ FP special (-inf):27:result_float32x4 [] = { 3f800000, 3f800000, 3f800000, 3f800000, } +VMAX/VMAXQ FP special (-0.0):28:result_float32x4 [] = { 0, 0, 0, 0, } +VMAX/VMAXQ FP special (-0.0):29:result_float32x4 [] = { 0, 0, 0, 0, } + +VMIN/VMINQ output: +VMIN/VMINQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, } +VMIN/VMINQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff2, } +VMIN/VMINQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VMIN/VMINQ:3:result_int64x1 [] = { 3333333333333333, } +VMIN/VMINQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f3, f3, f3, f3, } +VMIN/VMINQ:5:result_uint16x4 [] = { fff0, fff1, fff1, fff1, } +VMIN/VMINQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VMIN/VMINQ:7:result_uint64x1 [] = { 3333333333333333, } +VMIN/VMINQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMIN/VMINQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMIN/VMINQ:10:result_float32x2 [] = { c1800000, c1780000, } +VMIN/VMINQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMIN/VMINQ:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, } +VMIN/VMINQ:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, } +VMIN/VMINQ:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff1, } +VMIN/VMINQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMIN/VMINQ:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f9, f9, f9, f9, f9, f9, } +VMIN/VMINQ:17:result_uint16x8 [] = { fff0, fff1, fff2, fff2, fff2, fff2, fff2, fff2, } +VMIN/VMINQ:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff1, } +VMIN/VMINQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMIN/VMINQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMIN/VMINQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMIN/VMINQ:22:result_float32x4 [] = { c1800000, c1700000, c1680000, c1680000, } +VMIN/VMINQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VMIN/VMINQ FP special (NaN):24:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } +VMIN/VMINQ FP special (-NaN):25:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } +VMIN/VMINQ FP special (inf):26:result_float32x4 [] = { 3f800000, 3f800000, 3f800000, 3f800000, } +VMIN/VMINQ FP special (-inf):27:result_float32x4 [] = { ff800000, ff800000, ff800000, ff800000, } +VMIN/VMINQ FP special (-0.0):28:result_float32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VMIN/VMINQ FP special (-0.0):29:result_float32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VNEG/VNEGQ output: +VNEG/VNEGQ:0:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VNEG/VNEGQ:1:result_int16x4 [] = { 10, f, e, d, } +VNEG/VNEGQ:2:result_int32x2 [] = { 10, f, } +VNEG/VNEGQ:3:result_int64x1 [] = { 3333333333333333, } +VNEG/VNEGQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VNEG/VNEGQ:7:result_uint64x1 [] = { 3333333333333333, } +VNEG/VNEGQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:10:result_float32x2 [] = { 33333333, 33333333, } +VNEG/VNEGQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VNEG/VNEGQ:12:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VNEG/VNEGQ:13:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VNEG/VNEGQ:14:result_int32x4 [] = { 10, f, e, d, } +VNEG/VNEGQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VNEG/VNEGQ:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VNEG/VNEGQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VNEG/VNEGQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VNEG/VNEGQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +float32: +VNEG/VNEGQ:24:result_float32x2 [] = { c0133333, c0133333, } +VNEG/VNEGQ:25:result_float32x4 [] = { c059999a, c059999a, c059999a, c059999a, } + +VQNEG/VQNEGQ cumulative saturation output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon cumulative saturation 0 +VQNEG/VQNEGQ:1:vqneg_s16 Neon cumulative saturation 0 +VQNEG/VQNEGQ:2:vqneg_s32 Neon cumulative saturation 0 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon cumulative saturation 0 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon cumulative saturation 0 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon cumulative saturation 0 + +VQNEG/VQNEGQ output: +VQNEG/VQNEGQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VQNEG/VQNEGQ:7:result_int16x4 [] = { 10, f, e, d, } +VQNEG/VQNEGQ:8:result_int32x2 [] = { 10, f, } +VQNEG/VQNEGQ:9:result_int64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:13:result_uint64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:16:result_float32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQNEG/VQNEGQ:18:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VQNEG/VQNEGQ:19:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VQNEG/VQNEGQ:20:result_int32x4 [] = { 10, f, e, d, } +VQNEG/VQNEGQ:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQNEG/VQNEGQ:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQNEG/VQNEGQ:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQNEG/VQNEGQ cumulative saturation output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon cumulative saturation 1 +VQNEG/VQNEGQ:1:vqneg_s16 Neon cumulative saturation 1 +VQNEG/VQNEGQ:2:vqneg_s32 Neon cumulative saturation 1 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon cumulative saturation 1 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon cumulative saturation 1 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon cumulative saturation 1 + +VQNEG/VQNEGQ output: +VQNEG/VQNEGQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQNEG/VQNEGQ:7:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQNEG/VQNEGQ:8:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQNEG/VQNEGQ:9:result_int64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:13:result_uint64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:16:result_float32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQNEG/VQNEGQ:18:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQNEG/VQNEGQ:19:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQNEG/VQNEGQ:20:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQNEG/VQNEGQ:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQNEG/VQNEGQ:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQNEG/VQNEGQ:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLAL output: +VMLAL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL:2:result_int32x2 [] = { 33333333, 33333333, } +VMLAL:3:result_int64x1 [] = { 3333333333333333, } +VMLAL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLAL:7:result_uint64x1 [] = { 3333333333333333, } +VMLAL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL:10:result_float32x2 [] = { 33333333, 33333333, } +VMLAL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLAL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:13:result_int16x8 [] = { ffffe907, ffffe908, ffffe909, ffffe90a, ffffe90b, ffffe90c, ffffe90d, ffffe90e, } +VMLAL:14:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL:15:result_int64x2 [] = { 3e07, 3e08, } +VMLAL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:17:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLAL:18:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL:19:result_uint64x2 [] = { 3e07, 3e08, } +VMLAL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMLAL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLSL output: +VMLSL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL:2:result_int32x2 [] = { 33333333, 33333333, } +VMLSL:3:result_int64x1 [] = { 3333333333333333, } +VMLSL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLSL:7:result_uint64x1 [] = { 3333333333333333, } +VMLSL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL:10:result_float32x2 [] = { 33333333, 33333333, } +VMLSL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLSL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:13:result_int16x8 [] = { 16d9, 16da, 16db, 16dc, 16dd, 16de, 16df, 16e0, } +VMLSL:14:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL:15:result_int64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:17:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0, } +VMLSL:18:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL:19:result_uint64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMLSL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLAL_LANE output: +VMLAL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_LANE:2:result_int32x2 [] = { 33333333, 33333333, } +VMLAL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLAL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_LANE:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLAL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLAL_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_LANE:10:result_float32x2 [] = { 33333333, 33333333, } +VMLAL_LANE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLAL_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_LANE:14:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL_LANE:15:result_int64x2 [] = { 3e07, 3e08, } +VMLAL_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_LANE:18:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL_LANE:19:result_uint64x2 [] = { 3e07, 3e08, } +VMLAL_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_LANE:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMLAL_LANE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLSL_LANE output: +VMLSL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_LANE:2:result_int32x2 [] = { 33333333, 33333333, } +VMLSL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLSL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_LANE:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLSL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLSL_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_LANE:10:result_float32x2 [] = { 33333333, 33333333, } +VMLSL_LANE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLSL_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_LANE:14:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL_LANE:15:result_int64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_LANE:18:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL_LANE:19:result_uint64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_LANE:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMLSL_LANE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLAL_N output: +VMLAL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_N:2:result_int32x2 [] = { 33333333, 33333333, } +VMLAL_N:3:result_int64x1 [] = { 3333333333333333, } +VMLAL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_N:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLAL_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLAL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_N:10:result_float32x2 [] = { 33333333, 33333333, } +VMLAL_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLAL_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_N:14:result_int32x4 [] = { 595, 596, 597, 598, } +VMLAL_N:15:result_int64x2 [] = { b3a, b3b, } +VMLAL_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_N:18:result_uint32x4 [] = { 10df, 10e0, 10e1, 10e2, } +VMLAL_N:19:result_uint64x2 [] = { 10df, 10e0, } +VMLAL_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMLAL_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLSL_N output: +VMLSL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_N:2:result_int32x2 [] = { 33333333, 33333333, } +VMLSL_N:3:result_int64x1 [] = { 3333333333333333, } +VMLSL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_N:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLSL_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLSL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_N:10:result_float32x2 [] = { 33333333, 33333333, } +VMLSL_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLSL_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_N:14:result_int32x4 [] = { fffffa4b, fffffa4c, fffffa4d, fffffa4e, } +VMLSL_N:15:result_int64x2 [] = { fffffffffffff4a6, fffffffffffff4a7, } +VMLSL_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_N:18:result_uint32x4 [] = { ffffef01, ffffef02, ffffef03, ffffef04, } +VMLSL_N:19:result_uint64x2 [] = { ffffffffffffef01, ffffffffffffef02, } +VMLSL_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMLSL_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMOVL output: +VMOVL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVL:2:result_int32x2 [] = { 33333333, 33333333, } +VMOVL:3:result_int64x1 [] = { 3333333333333333, } +VMOVL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMOVL:7:result_uint64x1 [] = { 3333333333333333, } +VMOVL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVL:10:result_float32x2 [] = { 33333333, 33333333, } +VMOVL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMOVL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMOVL:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VMOVL:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VMOVL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:17:result_uint16x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VMOVL:18:result_uint32x4 [] = { fff0, fff1, fff2, fff3, } +VMOVL:19:result_uint64x2 [] = { fffffff0, fffffff1, } +VMOVL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMOVL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMOVN output: +VMOVN:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMOVN:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VMOVN:2:result_int32x2 [] = { fffffff0, fffffff1, } +VMOVN:3:result_int64x1 [] = { 3333333333333333, } +VMOVN:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VMOVN:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VMOVN:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VMOVN:7:result_uint64x1 [] = { 3333333333333333, } +VMOVN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVN:10:result_float32x2 [] = { 33333333, 33333333, } +VMOVN:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMOVN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMOVN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMOVN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMOVN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMOVN:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVN:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMOVN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMULL output: +VMULL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL:2:result_int32x2 [] = { 33333333, 33333333, } +VMULL:3:result_int64x1 [] = { 3333333333333333, } +VMULL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMULL:7:result_uint64x1 [] = { 3333333333333333, } +VMULL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL:10:result_float32x2 [] = { 33333333, 33333333, } +VMULL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMULL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:13:result_int16x8 [] = { 100, e1, c4, a9, 90, 79, 64, 51, } +VMULL:14:result_int32x4 [] = { 100, e1, c4, a9, } +VMULL:15:result_int64x2 [] = { 100, e1, } +VMULL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:17:result_uint16x8 [] = { e100, e2e1, e4c4, e6a9, e890, ea79, ec64, ee51, } +VMULL:18:result_uint32x4 [] = { ffe00100, ffe200e1, ffe400c4, ffe600a9, } +VMULL:19:result_uint64x2 [] = { ffffffe000000100, ffffffe2000000e1, } +VMULL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:21:result_poly16x8 [] = { 5500, 5501, 5504, 5505, 5510, 5511, 5514, 5515, } +VMULL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMULL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMULL_LANE output: +VMULL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL_LANE:2:result_int32x2 [] = { 33333333, 33333333, } +VMULL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMULL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL_LANE:6:result_uint32x2 [] = { 33333333, 33333333, } +VMULL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMULL_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL_LANE:10:result_float32x2 [] = { 33333333, 33333333, } +VMULL_LANE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMULL_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMULL_LANE:14:result_int32x4 [] = { 4000, 4000, 4000, 4000, } +VMULL_LANE:15:result_int64x2 [] = { 2000, 2000, } +VMULL_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMULL_LANE:18:result_uint32x4 [] = { 4000, 4000, 4000, 4000, } +VMULL_LANE:19:result_uint64x2 [] = { 2000, 2000, } +VMULL_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMULL_LANE:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMULL_LANE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VREV16 output: +VREV16:0:result_int8x8 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, } +VREV16:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VREV16:2:result_int32x2 [] = { 33333333, 33333333, } +VREV16:3:result_int64x1 [] = { 3333333333333333, } +VREV16:4:result_uint8x8 [] = { f1, f0, f3, f2, f5, f4, f7, f6, } +VREV16:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VREV16:6:result_uint32x2 [] = { 33333333, 33333333, } +VREV16:7:result_uint64x1 [] = { 3333333333333333, } +VREV16:8:result_poly8x8 [] = { f1, f0, f3, f2, f5, f4, f7, f6, } +VREV16:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VREV16:10:result_float32x2 [] = { 33333333, 33333333, } +VREV16:11:result_float16x4 [] = { 0, 0, 0, 0, } +VREV16:12:result_int8x16 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, fffffff9, fffffff8, fffffffb, fffffffa, fffffffd, fffffffc, ffffffff, fffffffe, } +VREV16:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VREV16:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV16:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VREV16:16:result_uint8x16 [] = { f1, f0, f3, f2, f5, f4, f7, f6, f9, f8, fb, fa, fd, fc, ff, fe, } +VREV16:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VREV16:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV16:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VREV16:20:result_poly8x16 [] = { f1, f0, f3, f2, f5, f4, f7, f6, f9, f8, fb, fa, fd, fc, ff, fe, } +VREV16:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VREV16:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV16:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VREV32 output: +VREV32:24:result_int8x8 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, } +VREV32:25:result_int16x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2, } +VREV32:26:result_int32x2 [] = { 33333333, 33333333, } +VREV32:27:result_int64x1 [] = { 3333333333333333, } +VREV32:28:result_uint8x8 [] = { f3, f2, f1, f0, f7, f6, f5, f4, } +VREV32:29:result_uint16x4 [] = { fff1, fff0, fff3, fff2, } +VREV32:30:result_uint32x2 [] = { 33333333, 33333333, } +VREV32:31:result_uint64x1 [] = { 3333333333333333, } +VREV32:32:result_poly8x8 [] = { f3, f2, f1, f0, f7, f6, f5, f4, } +VREV32:33:result_poly16x4 [] = { fff1, fff0, fff3, fff2, } +VREV32:34:result_float32x2 [] = { 33333333, 33333333, } +VREV32:35:result_float16x4 [] = { 0, 0, 0, 0, } +VREV32:36:result_int8x16 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, fffffffb, fffffffa, fffffff9, fffffff8, ffffffff, fffffffe, fffffffd, fffffffc, } +VREV32:37:result_int16x8 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, } +VREV32:38:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV32:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VREV32:40:result_uint8x16 [] = { f3, f2, f1, f0, f7, f6, f5, f4, fb, fa, f9, f8, ff, fe, fd, fc, } +VREV32:41:result_uint16x8 [] = { fff1, fff0, fff3, fff2, fff5, fff4, fff7, fff6, } +VREV32:42:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV32:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VREV32:44:result_poly8x16 [] = { f3, f2, f1, f0, f7, f6, f5, f4, fb, fa, f9, f8, ff, fe, fd, fc, } +VREV32:45:result_poly16x8 [] = { fff1, fff0, fff3, fff2, fff5, fff4, fff7, fff6, } +VREV32:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV32:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VREV64 output: +VREV64:48:result_int8x8 [] = { fffffff7, fffffff6, fffffff5, fffffff4, fffffff3, fffffff2, fffffff1, fffffff0, } +VREV64:49:result_int16x4 [] = { fffffff3, fffffff2, fffffff1, fffffff0, } +VREV64:50:result_int32x2 [] = { fffffff1, fffffff0, } +VREV64:51:result_int64x1 [] = { 3333333333333333, } +VREV64:52:result_uint8x8 [] = { f7, f6, f5, f4, f3, f2, f1, f0, } +VREV64:53:result_uint16x4 [] = { fff3, fff2, fff1, fff0, } +VREV64:54:result_uint32x2 [] = { fffffff1, fffffff0, } +VREV64:55:result_uint64x1 [] = { 3333333333333333, } +VREV64:56:result_poly8x8 [] = { f7, f6, f5, f4, f3, f2, f1, f0, } +VREV64:57:result_poly16x4 [] = { fff3, fff2, fff1, fff0, } +VREV64:58:result_float32x2 [] = { c1700000, c1800000, } +VREV64:59:result_float16x4 [] = { 0, 0, 0, 0, } +VREV64:60:result_int8x16 [] = { fffffff7, fffffff6, fffffff5, fffffff4, fffffff3, fffffff2, fffffff1, fffffff0, ffffffff, fffffffe, fffffffd, fffffffc, fffffffb, fffffffa, fffffff9, fffffff8, } +VREV64:61:result_int16x8 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, } +VREV64:62:result_int32x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2, } +VREV64:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VREV64:64:result_uint8x16 [] = { f7, f6, f5, f4, f3, f2, f1, f0, ff, fe, fd, fc, fb, fa, f9, f8, } +VREV64:65:result_uint16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4, } +VREV64:66:result_uint32x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2, } +VREV64:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VREV64:68:result_poly8x16 [] = { f7, f6, f5, f4, f3, f2, f1, f0, ff, fe, fd, fc, fb, fa, f9, f8, } +VREV64:69:result_poly16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4, } +VREV64:70:result_float32x4 [] = { c1700000, c1800000, c1500000, c1600000, } +VREV64:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSRA_N output: +VSRA_N:0:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VSRA_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRA_N:2:result_int32x2 [] = { fffffffc, fffffffd, } +VSRA_N:3:result_int64x1 [] = { fffffffffffffff0, } +VSRA_N:4:result_uint8x8 [] = { 5, 6, 7, 8, 9, a, b, c, } +VSRA_N:5:result_uint16x4 [] = { fffc, fffd, fffe, ffff, } +VSRA_N:6:result_uint32x2 [] = { fffffff3, fffffff4, } +VSRA_N:7:result_uint64x1 [] = { fffffffffffffff0, } +VSRA_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSRA_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSRA_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSRA_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSRA_N:12:result_int8x16 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, } +VSRA_N:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VSRA_N:14:result_int32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VSRA_N:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRA_N:16:result_uint8x16 [] = { 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, 11, 12, 13, 14, } +VSRA_N:17:result_uint16x8 [] = { fffc, fffd, fffe, ffff, 0, 1, 2, 3, } +VSRA_N:18:result_uint32x4 [] = { fffffff3, fffffff4, fffffff5, fffffff6, } +VSRA_N:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRA_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSRA_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSRA_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSRA_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTRN/VTRNQ chunk 0 output: +VTRN/VTRNQ:0:result_int8x8 [] = { fffffff0, fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, } +VTRN/VTRNQ:1:result_int16x4 [] = { fffffff0, fffffff1, 22, 22, } +VTRN/VTRNQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VTRN/VTRNQ:3:result_int64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:4:result_uint8x8 [] = { f0, f1, 55, 55, f2, f3, 55, 55, } +VTRN/VTRNQ:5:result_uint16x4 [] = { fff0, fff1, 66, 66, } +VTRN/VTRNQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VTRN/VTRNQ:7:result_uint64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:8:result_poly8x8 [] = { f0, f1, 55, 55, f2, f3, 55, 55, } +VTRN/VTRNQ:9:result_poly16x4 [] = { fff0, fff1, 66, 66, } +VTRN/VTRNQ:10:result_float32x2 [] = { c1800000, c1700000, } +VTRN/VTRNQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTRN/VTRNQ:12:result_int8x16 [] = { fffffff0, fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, } +VTRN/VTRNQ:13:result_int16x8 [] = { fffffff0, fffffff1, 22, 22, fffffff2, fffffff3, 22, 22, } +VTRN/VTRNQ:14:result_int32x4 [] = { fffffff0, fffffff1, 33, 33, } +VTRN/VTRNQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:16:result_uint8x16 [] = { f0, f1, 55, 55, f2, f3, 55, 55, f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:17:result_uint16x8 [] = { fff0, fff1, 66, 66, fff2, fff3, 66, 66, } +VTRN/VTRNQ:18:result_uint32x4 [] = { fffffff0, fffffff1, 77, 77, } +VTRN/VTRNQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:20:result_poly8x16 [] = { f0, f1, 55, 55, f2, f3, 55, 55, f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:21:result_poly16x8 [] = { fff0, fff1, 66, 66, fff2, fff3, 66, 66, } +VTRN/VTRNQ:22:result_float32x4 [] = { c1800000, c1700000, 42073333, 42073333, } +VTRN/VTRNQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTRN/VTRNQ chunk 1 output: +VTRN/VTRNQ:24:result_int8x8 [] = { fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, } +VTRN/VTRNQ:25:result_int16x4 [] = { fffffff2, fffffff3, 22, 22, } +VTRN/VTRNQ:26:result_int32x2 [] = { 33, 33, } +VTRN/VTRNQ:27:result_int64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:28:result_uint8x8 [] = { f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:29:result_uint16x4 [] = { fff2, fff3, 66, 66, } +VTRN/VTRNQ:30:result_uint32x2 [] = { 77, 77, } +VTRN/VTRNQ:31:result_uint64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:32:result_poly8x8 [] = { f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:33:result_poly16x4 [] = { fff2, fff3, 66, 66, } +VTRN/VTRNQ:34:result_float32x2 [] = { 42066666, 42066666, } +VTRN/VTRNQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VTRN/VTRNQ:36:result_int8x16 [] = { fffffff8, fffffff9, 11, 11, fffffffa, fffffffb, 11, 11, fffffffc, fffffffd, 11, 11, fffffffe, ffffffff, 11, 11, } +VTRN/VTRNQ:37:result_int16x8 [] = { fffffff4, fffffff5, 22, 22, fffffff6, fffffff7, 22, 22, } +VTRN/VTRNQ:38:result_int32x4 [] = { fffffff2, fffffff3, 33, 33, } +VTRN/VTRNQ:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:40:result_uint8x16 [] = { f8, f9, 55, 55, fa, fb, 55, 55, fc, fd, 55, 55, fe, ff, 55, 55, } +VTRN/VTRNQ:41:result_uint16x8 [] = { fff4, fff5, 66, 66, fff6, fff7, 66, 66, } +VTRN/VTRNQ:42:result_uint32x4 [] = { fffffff2, fffffff3, 77, 77, } +VTRN/VTRNQ:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:44:result_poly8x16 [] = { f8, f9, 55, 55, fa, fb, 55, 55, fc, fd, 55, 55, fe, ff, 55, 55, } +VTRN/VTRNQ:45:result_poly16x8 [] = { fff4, fff5, 66, 66, fff6, fff7, 66, 66, } +VTRN/VTRNQ:46:result_float32x4 [] = { c1600000, c1500000, 42073333, 42073333, } +VTRN/VTRNQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VUZP/VUZPQ chunk 0 output: +VUZP/VUZPQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VUZP/VUZPQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VUZP/VUZPQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VUZP/VUZPQ:3:result_int64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VUZP/VUZPQ:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VUZP/VUZPQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VUZP/VUZPQ:7:result_uint64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VUZP/VUZPQ:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VUZP/VUZPQ:10:result_float32x2 [] = { c1800000, c1700000, } +VUZP/VUZPQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VUZP/VUZPQ:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VUZP/VUZPQ:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VUZP/VUZPQ:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VUZP/VUZPQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VUZP/VUZPQ:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VUZP/VUZPQ:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VUZP/VUZPQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VUZP/VUZPQ:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VUZP/VUZPQ:22:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VUZP/VUZPQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VUZP/VUZPQ chunk 1 output: +VUZP/VUZPQ:24:result_int8x8 [] = { 11, 11, 11, 11, 11, 11, 11, 11, } +VUZP/VUZPQ:25:result_int16x4 [] = { 22, 22, 22, 22, } +VUZP/VUZPQ:26:result_int32x2 [] = { 33, 33, } +VUZP/VUZPQ:27:result_int64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:28:result_uint8x8 [] = { 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:29:result_uint16x4 [] = { 66, 66, 66, 66, } +VUZP/VUZPQ:30:result_uint32x2 [] = { 77, 77, } +VUZP/VUZPQ:31:result_uint64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:32:result_poly8x8 [] = { 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:33:result_poly16x4 [] = { 66, 66, 66, 66, } +VUZP/VUZPQ:34:result_float32x2 [] = { 42066666, 42066666, } +VUZP/VUZPQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VUZP/VUZPQ:36:result_int8x16 [] = { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, } +VUZP/VUZPQ:37:result_int16x8 [] = { 22, 22, 22, 22, 22, 22, 22, 22, } +VUZP/VUZPQ:38:result_int32x4 [] = { 33, 33, 33, 33, } +VUZP/VUZPQ:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:40:result_uint8x16 [] = { 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:41:result_uint16x8 [] = { 66, 66, 66, 66, 66, 66, 66, 66, } +VUZP/VUZPQ:42:result_uint32x4 [] = { 77, 77, 77, 77, } +VUZP/VUZPQ:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:44:result_poly8x16 [] = { 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VUZP/VUZPQ:45:result_poly16x8 [] = { 66, 66, 66, 66, 66, 66, 66, 66, } +VUZP/VUZPQ:46:result_float32x4 [] = { 42073333, 42073333, 42073333, 42073333, } +VUZP/VUZPQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VZIP/VZIPQ chunk 0 output: +VZIP/VZIPQ:0:result_int8x8 [] = { fffffff0, fffffff4, 11, 11, fffffff1, fffffff5, 11, 11, } +VZIP/VZIPQ:1:result_int16x4 [] = { fffffff0, fffffff2, 22, 22, } +VZIP/VZIPQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VZIP/VZIPQ:3:result_int64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:4:result_uint8x8 [] = { f0, f4, 55, 55, f1, f5, 55, 55, } +VZIP/VZIPQ:5:result_uint16x4 [] = { fff0, fff2, 66, 66, } +VZIP/VZIPQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VZIP/VZIPQ:7:result_uint64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:8:result_poly8x8 [] = { f0, f4, 55, 55, f1, f5, 55, 55, } +VZIP/VZIPQ:9:result_poly16x4 [] = { fff0, fff2, 66, 66, } +VZIP/VZIPQ:10:result_float32x2 [] = { c1800000, c1700000, } +VZIP/VZIPQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VZIP/VZIPQ:12:result_int8x16 [] = { fffffff0, fffffff8, 11, 11, fffffff1, fffffff9, 11, 11, fffffff2, fffffffa, 11, 11, fffffff3, fffffffb, 11, 11, } +VZIP/VZIPQ:13:result_int16x8 [] = { fffffff0, fffffff4, 22, 22, fffffff1, fffffff5, 22, 22, } +VZIP/VZIPQ:14:result_int32x4 [] = { fffffff0, fffffff2, 33, 33, } +VZIP/VZIPQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:16:result_uint8x16 [] = { f0, f8, 55, 55, f1, f9, 55, 55, f2, fa, 55, 55, f3, fb, 55, 55, } +VZIP/VZIPQ:17:result_uint16x8 [] = { fff0, fff4, 66, 66, fff1, fff5, 66, 66, } +VZIP/VZIPQ:18:result_uint32x4 [] = { fffffff0, fffffff2, 77, 77, } +VZIP/VZIPQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:20:result_poly8x16 [] = { f0, f8, 55, 55, f1, f9, 55, 55, f2, fa, 55, 55, f3, fb, 55, 55, } +VZIP/VZIPQ:21:result_poly16x8 [] = { fff0, fff4, 66, 66, fff1, fff5, 66, 66, } +VZIP/VZIPQ:22:result_float32x4 [] = { c1800000, c1600000, 42073333, 42073333, } +VZIP/VZIPQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VZIP/VZIPQ chunk 1 output: +VZIP/VZIPQ:24:result_int8x8 [] = { fffffff2, fffffff6, 11, 11, fffffff3, fffffff7, 11, 11, } +VZIP/VZIPQ:25:result_int16x4 [] = { fffffff1, fffffff3, 22, 22, } +VZIP/VZIPQ:26:result_int32x2 [] = { 33, 33, } +VZIP/VZIPQ:27:result_int64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:28:result_uint8x8 [] = { f2, f6, 55, 55, f3, f7, 55, 55, } +VZIP/VZIPQ:29:result_uint16x4 [] = { fff1, fff3, 66, 66, } +VZIP/VZIPQ:30:result_uint32x2 [] = { 77, 77, } +VZIP/VZIPQ:31:result_uint64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:32:result_poly8x8 [] = { f2, f6, 55, 55, f3, f7, 55, 55, } +VZIP/VZIPQ:33:result_poly16x4 [] = { fff1, fff3, 66, 66, } +VZIP/VZIPQ:34:result_float32x2 [] = { 42066666, 42066666, } +VZIP/VZIPQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VZIP/VZIPQ:36:result_int8x16 [] = { fffffff4, fffffffc, 11, 11, fffffff5, fffffffd, 11, 11, fffffff6, fffffffe, 11, 11, fffffff7, ffffffff, 11, 11, } +VZIP/VZIPQ:37:result_int16x8 [] = { fffffff2, fffffff6, 22, 22, fffffff3, fffffff7, 22, 22, } +VZIP/VZIPQ:38:result_int32x4 [] = { fffffff1, fffffff3, 33, 33, } +VZIP/VZIPQ:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:40:result_uint8x16 [] = { f4, fc, 55, 55, f5, fd, 55, 55, f6, fe, 55, 55, f7, ff, 55, 55, } +VZIP/VZIPQ:41:result_uint16x8 [] = { fff2, fff6, 66, 66, fff3, fff7, 66, 66, } +VZIP/VZIPQ:42:result_uint32x4 [] = { fffffff1, fffffff3, 77, 77, } +VZIP/VZIPQ:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:44:result_poly8x16 [] = { f4, fc, 55, 55, f5, fd, 55, 55, f6, fe, 55, 55, f7, ff, 55, 55, } +VZIP/VZIPQ:45:result_poly16x8 [] = { fff2, fff6, 66, 66, fff3, fff7, 66, 66, } +VZIP/VZIPQ:46:result_float32x4 [] = { c1700000, c1500000, 42073333, 42073333, } +VZIP/VZIPQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VREINTERPRET/VREINTERPRETQ output: +VREINTERPRET/VREINTERPRETQ:0:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:1:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:2:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:3:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:4:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:5:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:6:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:7:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:8:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:9:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, } +VREINTERPRET/VREINTERPRETQ:10:result_int16x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:11:result_int16x4 [] = { fffffff0, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:12:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, } +VREINTERPRET/VREINTERPRETQ:13:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:14:result_int16x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:15:result_int16x4 [] = { fffffff0, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:16:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, } +VREINTERPRET/VREINTERPRETQ:17:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:18:result_int32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:19:result_int32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:20:result_int32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:21:result_int32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:22:result_int32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:23:result_int32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:24:result_int32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:25:result_int32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:26:result_int32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:27:result_int64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:28:result_int64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:29:result_int64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:30:result_int64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:31:result_int64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:32:result_int64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:33:result_int64x1 [] = { fffffffffffffff0, } +VREINTERPRET/VREINTERPRETQ:34:result_int64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:35:result_int64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:36:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:37:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:38:result_uint8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:39:result_uint8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:40:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:41:result_uint8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:42:result_uint8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:43:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:44:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:45:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:46:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:47:result_uint16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:48:result_uint16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:49:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:50:result_uint16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:51:result_uint16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:52:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:53:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:54:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:55:result_uint32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:56:result_uint32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:57:result_uint32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:58:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:59:result_uint32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:60:result_uint32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:61:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:62:result_uint32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:63:result_uint64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:64:result_uint64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:65:result_uint64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:66:result_uint64x1 [] = { fffffffffffffff0, } +VREINTERPRET/VREINTERPRETQ:67:result_uint64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:68:result_uint64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:69:result_uint64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:70:result_uint64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:71:result_uint64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:72:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:73:result_poly8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:74:result_poly8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:75:result_poly8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:76:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:77:result_poly8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:78:result_poly8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:79:result_poly8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:80:result_poly8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:81:result_poly16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:82:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:83:result_poly16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:84:result_poly16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:85:result_poly16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:86:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:87:result_poly16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:88:result_poly16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:89:result_poly16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:90:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VREINTERPRET/VREINTERPRETQ:91:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, fffffff2, ffffffff, ffffffff, ffffffff, fffffff3, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:92:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:93:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VREINTERPRET/VREINTERPRETQ:94:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VREINTERPRET/VREINTERPRETQ:95:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, fffffff2, ffffffff, ffffffff, ffffffff, fffffff3, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:96:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:97:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VREINTERPRET/VREINTERPRETQ:98:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VREINTERPRET/VREINTERPRETQ:99:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe, } +VREINTERPRET/VREINTERPRETQ:100:result_int16x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:101:result_int16x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:102:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe, } +VREINTERPRET/VREINTERPRETQ:103:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:104:result_int16x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:105:result_int16x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:106:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe, } +VREINTERPRET/VREINTERPRETQ:107:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:108:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:109:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:110:result_int32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:111:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:112:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:113:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:114:result_int32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:115:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:116:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:117:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:118:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:119:result_int64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:120:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:121:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:122:result_int64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:123:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VREINTERPRET/VREINTERPRETQ:124:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:125:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:126:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:127:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:128:result_uint16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff, } +VREINTERPRET/VREINTERPRETQ:129:result_uint16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:130:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:131:result_uint16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff, } +VREINTERPRET/VREINTERPRETQ:132:result_uint16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:133:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:134:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:135:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:136:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:137:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:138:result_uint32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:139:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:140:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:141:result_uint32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:142:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:143:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:144:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:145:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:146:result_uint64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:147:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VREINTERPRET/VREINTERPRETQ:148:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:149:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:150:result_uint64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:151:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:152:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:153:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VREINTERPRET/VREINTERPRETQ:154:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VREINTERPRET/VREINTERPRETQ:155:result_uint8x16 [] = { f0, ff, ff, ff, f1, ff, ff, ff, f2, ff, ff, ff, f3, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:156:result_uint8x16 [] = { f0, ff, ff, ff, ff, ff, ff, ff, f1, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:157:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VREINTERPRET/VREINTERPRETQ:158:result_uint8x16 [] = { f0, ff, ff, ff, f1, ff, ff, ff, f2, ff, ff, ff, f3, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:159:result_uint8x16 [] = { f0, ff, ff, ff, ff, ff, ff, ff, f1, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:160:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VREINTERPRET/VREINTERPRETQ:161:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VREINTERPRET/VREINTERPRETQ:162:result_float32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:163:result_float32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:164:result_float32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:165:result_float32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:166:result_float32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:167:result_float32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:168:result_float32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:169:result_float32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:170:result_float32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:171:result_float32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:172:result_float32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:173:result_float32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:174:result_float32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:175:result_float32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:176:result_float32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:177:result_float32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:178:result_float32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:179:result_float32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:180:result_float32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:181:result_float32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:182:result_int8x8 [] = { 0, 0, ffffff80, ffffffc1, 0, 0, 70, ffffffc1, } +VREINTERPRET/VREINTERPRETQ:183:result_int16x4 [] = { 0, ffffc180, 0, ffffc170, } +VREINTERPRET/VREINTERPRETQ:184:result_int32x2 [] = { c1800000, c1700000, } +VREINTERPRET/VREINTERPRETQ:185:result_int64x1 [] = { c1700000c1800000, } +VREINTERPRET/VREINTERPRETQ:186:result_uint8x8 [] = { 0, 0, 80, c1, 0, 0, 70, c1, } +VREINTERPRET/VREINTERPRETQ:187:result_uint16x4 [] = { 0, c180, 0, c170, } +VREINTERPRET/VREINTERPRETQ:188:result_uint32x2 [] = { c1800000, c1700000, } +VREINTERPRET/VREINTERPRETQ:189:result_uint64x1 [] = { c1700000c1800000, } +VREINTERPRET/VREINTERPRETQ:190:result_poly8x8 [] = { 0, 0, 80, c1, 0, 0, 70, c1, } +VREINTERPRET/VREINTERPRETQ:191:result_poly16x4 [] = { 0, c180, 0, c170, } +VREINTERPRET/VREINTERPRETQ:192:result_int8x16 [] = { 0, 0, ffffff80, ffffffc1, 0, 0, 70, ffffffc1, 0, 0, 60, ffffffc1, 0, 0, 50, ffffffc1, } +VREINTERPRET/VREINTERPRETQ:193:result_int16x8 [] = { 0, ffffc180, 0, ffffc170, 0, ffffc160, 0, ffffc150, } +VREINTERPRET/VREINTERPRETQ:194:result_int32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VREINTERPRET/VREINTERPRETQ:195:result_int64x2 [] = { c1700000c1800000, c1500000c1600000, } +VREINTERPRET/VREINTERPRETQ:196:result_uint8x16 [] = { 0, 0, 80, c1, 0, 0, 70, c1, 0, 0, 60, c1, 0, 0, 50, c1, } +VREINTERPRET/VREINTERPRETQ:197:result_uint16x8 [] = { 0, c180, 0, c170, 0, c160, 0, c150, } +VREINTERPRET/VREINTERPRETQ:198:result_uint32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VREINTERPRET/VREINTERPRETQ:199:result_uint64x2 [] = { c1700000c1800000, c1500000c1600000, } +VREINTERPRET/VREINTERPRETQ:200:result_poly8x16 [] = { 0, 0, 80, c1, 0, 0, 70, c1, 0, 0, 60, c1, 0, 0, 50, c1, } +VREINTERPRET/VREINTERPRETQ:201:result_poly16x8 [] = { 0, c180, 0, c170, 0, c160, 0, c150, } +VREINTERPRET/VREINTERPRETQ:202:result_float16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:203:result_float16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:204:result_float16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:205:result_float16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:206:result_float16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:207:result_float16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:208:result_float16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:209:result_float16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:210:result_float16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:211:result_float16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:212:result_float16x4 [] = { 0, c180, 0, c170, } +VREINTERPRET/VREINTERPRETQ:213:result_float16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:214:result_float16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:215:result_float16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff, } +VREINTERPRET/VREINTERPRETQ:216:result_float16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:217:result_float16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:218:result_float16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:219:result_float16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff, } +VREINTERPRET/VREINTERPRETQ:220:result_float16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:221:result_float16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:222:result_float16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:223:result_float16x8 [] = { 0, c180, 0, c170, 0, c160, 0, c150, } +VREINTERPRET/VREINTERPRETQ:224:result_int8x8 [] = { 0, ffffffcc, ffffff80, ffffffcb, 0, ffffffcb, ffffff80, ffffffca, } +VREINTERPRET/VREINTERPRETQ:225:result_int16x4 [] = { ffffcc00, ffffcb80, ffffcb00, ffffca80, } +VREINTERPRET/VREINTERPRETQ:226:result_int32x2 [] = { cb80cc00, ca80cb00, } +VREINTERPRET/VREINTERPRETQ:227:result_int64x1 [] = { ca80cb00cb80cc00, } +VREINTERPRET/VREINTERPRETQ:228:result_uint8x8 [] = { 0, cc, 80, cb, 0, cb, 80, ca, } +VREINTERPRET/VREINTERPRETQ:229:result_uint16x4 [] = { cc00, cb80, cb00, ca80, } +VREINTERPRET/VREINTERPRETQ:230:result_uint32x2 [] = { cb80cc00, ca80cb00, } +VREINTERPRET/VREINTERPRETQ:231:result_uint64x1 [] = { ca80cb00cb80cc00, } +VREINTERPRET/VREINTERPRETQ:232:result_poly8x8 [] = { 0, cc, 80, cb, 0, cb, 80, ca, } +VREINTERPRET/VREINTERPRETQ:233:result_poly16x4 [] = { cc00, cb80, cb00, ca80, } +VREINTERPRET/VREINTERPRETQ:234:result_float32x2 [] = { cb80cc00, ca80cb00, } +VREINTERPRET/VREINTERPRETQ:235:result_int8x16 [] = { 0, ffffffcc, ffffff80, ffffffcb, 0, ffffffcb, ffffff80, ffffffca, 0, ffffffca, ffffff80, ffffffc9, 0, ffffffc9, ffffff80, ffffffc8, } +VREINTERPRET/VREINTERPRETQ:236:result_int16x8 [] = { ffffcc00, ffffcb80, ffffcb00, ffffca80, ffffca00, ffffc980, ffffc900, ffffc880, } +VREINTERPRET/VREINTERPRETQ:237:result_int32x4 [] = { cb80cc00, ca80cb00, c980ca00, c880c900, } +VREINTERPRET/VREINTERPRETQ:238:result_int64x2 [] = { ca80cb00cb80cc00, c880c900c980ca00, } +VREINTERPRET/VREINTERPRETQ:239:result_uint8x16 [] = { 0, cc, 80, cb, 0, cb, 80, ca, 0, ca, 80, c9, 0, c9, 80, c8, } +VREINTERPRET/VREINTERPRETQ:240:result_uint16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } +VREINTERPRET/VREINTERPRETQ:241:result_uint32x4 [] = { cb80cc00, ca80cb00, c980ca00, c880c900, } +VREINTERPRET/VREINTERPRETQ:242:result_uint64x2 [] = { ca80cb00cb80cc00, c880c900c980ca00, } +VREINTERPRET/VREINTERPRETQ:243:result_poly8x16 [] = { 0, cc, 80, cb, 0, cb, 80, ca, 0, ca, 80, c9, 0, c9, 80, c8, } +VREINTERPRET/VREINTERPRETQ:244:result_poly16x8 [] = { cc00, cb80, cb00, ca80, ca00, c980, c900, c880, } +VREINTERPRET/VREINTERPRETQ:245:result_float32x4 [] = { cb80cc00, ca80cb00, c980ca00, c880c900, } + +VQRDMULH cumulative saturation output: +VQRDMULH:0:vqrdmulh_s16 Neon cumulative saturation 0 +VQRDMULH:1:vqrdmulh_s32 Neon cumulative saturation 0 +VQRDMULH:2:vqrdmulhq_s16 Neon cumulative saturation 0 +VQRDMULH:3:vqrdmulhq_s32 Neon cumulative saturation 0 + +VQRDMULH output: +VQRDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:5:result_int16x4 [] = { fffffff5, fffffff6, fffffff7, fffffff7, } +VQRDMULH:6:result_int32x2 [] = { 0, 0, } +VQRDMULH:7:result_int64x1 [] = { 3333333333333333, } +VQRDMULH:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:10:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH:11:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH:12:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:13:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:14:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH:15:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH:16:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:17:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRDMULH:18:result_int32x4 [] = { 0, 0, 0, 0, } +VQRDMULH:19:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:20:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:21:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:22:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:23:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:24:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:26:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH (check mul cumulative saturation) cumulative saturation output: +VQRDMULH:28:vqrdmulh_s16 Neon cumulative saturation 1 +VQRDMULH:29:vqrdmulh_s32 Neon cumulative saturation 1 +VQRDMULH:30:vqrdmulhq_s16 Neon cumulative saturation 1 +VQRDMULH:31:vqrdmulhq_s32 Neon cumulative saturation 1 + +VQRDMULH (check mul cumulative saturation) output: +VQRDMULH:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:34:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH:35:result_int64x1 [] = { 3333333333333333, } +VQRDMULH:36:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:37:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:38:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH:39:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH:40:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:41:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:42:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH:43:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH:44:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:45:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:46:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH:47:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:48:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:49:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:50:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:51:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:52:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:54:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH:56:vqrdmulh_s16 Neon cumulative saturation 0 +VQRDMULH:57:vqrdmulh_s32 Neon cumulative saturation 0 +VQRDMULH:58:vqrdmulhq_s16 Neon cumulative saturation 0 +VQRDMULH:59:vqrdmulhq_s32 Neon cumulative saturation 0 + +VQRDMULH (check rounding cumulative saturation) output: +VQRDMULH:60:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:61:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:62:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH:63:result_int64x1 [] = { 3333333333333333, } +VQRDMULH:64:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:65:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:66:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH:67:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH:68:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:69:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:70:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH:71:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH:72:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:73:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:74:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH:75:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:76:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:77:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:78:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:79:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:80:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:81:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:82:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:83:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH_LANE cumulative saturation output: +VQRDMULH_LANE:0:vqrdmulh_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:1:vqrdmulh_lane_s32 Neon cumulative saturation 0 +VQRDMULH_LANE:2:vqrdmulhq_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:3:vqrdmulhq_lane_s32 Neon cumulative saturation 0 + +VQRDMULH_LANE output: +VQRDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:5:result_int16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:6:result_int32x2 [] = { 0, 0, } +VQRDMULH_LANE:7:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:10:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:11:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:12:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:13:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:14:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:15:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:16:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:17:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRDMULH_LANE:18:result_int32x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:19:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:20:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:21:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:22:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:23:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:24:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:25:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:26:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:27:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH_LANE (check mul cumulative saturation) cumulative saturation output: +VQRDMULH_LANE:28:vqrdmulh_lane_s16 Neon cumulative saturation 1 +VQRDMULH_LANE:29:vqrdmulh_lane_s32 Neon cumulative saturation 1 +VQRDMULH_LANE:30:vqrdmulhq_lane_s16 Neon cumulative saturation 1 +VQRDMULH_LANE:31:vqrdmulhq_lane_s32 Neon cumulative saturation 1 + +VQRDMULH_LANE (check mul cumulative saturation) output: +VQRDMULH_LANE:32:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:33:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:34:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_LANE:35:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:36:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:37:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:38:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:39:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:40:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:41:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:42:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:43:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:44:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:45:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:46:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_LANE:47:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:48:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:49:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:50:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:51:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:52:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:53:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:54:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:55:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH_LANE (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH_LANE:56:vqrdmulh_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:57:vqrdmulh_lane_s32 Neon cumulative saturation 0 +VQRDMULH_LANE:58:vqrdmulhq_lane_s16 Neon cumulative saturation 0 +VQRDMULH_LANE:59:vqrdmulhq_lane_s32 Neon cumulative saturation 0 + +VQRDMULH_LANE (check rounding cumulative saturation) output: +VQRDMULH_LANE:60:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:61:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:62:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_LANE:63:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:64:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:65:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:66:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:67:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:68:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:69:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:70:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:71:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:72:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:73:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:74:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_LANE:75:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:76:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:77:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:78:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:79:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:80:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:81:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:82:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:83:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH_N cumulative saturation output: +VQRDMULH_N:0:vqrdmulh_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:1:vqrdmulh_n_s32 Neon cumulative saturation 0 +VQRDMULH_N:2:vqrdmulhq_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:3:vqrdmulhq_n_s32 Neon cumulative saturation 0 + +VQRDMULH_N output: +VQRDMULH_N:4:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffd, } +VQRDMULH_N:5:result_int32x2 [] = { fffffffe, fffffffe, } +VQRDMULH_N:6:result_int16x8 [] = { 6, 6, 6, 5, 5, 4, 4, 4, } +VQRDMULH_N:7:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } + +VQRDMULH_N (check mul cumulative saturation) cumulative saturation output: +VQRDMULH_N:8:vqrdmulh_n_s16 Neon cumulative saturation 1 +VQRDMULH_N:9:vqrdmulh_n_s32 Neon cumulative saturation 1 +VQRDMULH_N:10:vqrdmulhq_n_s16 Neon cumulative saturation 1 +VQRDMULH_N:11:vqrdmulhq_n_s32 Neon cumulative saturation 1 + +VQRDMULH_N (check mul cumulative saturation) output: +VQRDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_N:15:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_N:16:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:17:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:18:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:19:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_N:20:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:21:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:22:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:23:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_N:24:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:25:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:26:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_N:27:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:28:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:29:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:30:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_N:31:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:32:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:33:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:34:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_N:35:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRDMULH_N (check rounding cumulative saturation) cumulative saturation output: +VQRDMULH_N:36:vqrdmulh_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:37:vqrdmulh_n_s32 Neon cumulative saturation 0 +VQRDMULH_N:38:vqrdmulhq_n_s16 Neon cumulative saturation 0 +VQRDMULH_N:39:vqrdmulhq_n_s32 Neon cumulative saturation 0 + +VQRDMULH_N (check rounding cumulative saturation) output: +VQRDMULH_N:40:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:41:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:42:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_N:43:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_N:44:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:45:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:46:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:47:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_N:48:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:49:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:50:result_float32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:51:result_float16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_N:52:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:53:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:54:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_N:55:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:56:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:57:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:58:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_N:59:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:60:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:61:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:62:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_N:63:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (with input = 0) cumulative saturation output: +VQRSHL/VQRSHLQ:0:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:1:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:2:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:3:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:4:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:5:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:6:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:7:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:8:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:9:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:10:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:11:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:12:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:13:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:14:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:15:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (with input = 0) output: +VQRSHL/VQRSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:17:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:18:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:19:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:20:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:21:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:22:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:23:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:24:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:25:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:26:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:27:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:28:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:29:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:30:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:31:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:32:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:33:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:34:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:35:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:36:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:37:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:38:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:39:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (input 0 and negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:40:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:41:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:42:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:43:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:44:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:45:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:46:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:47:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:48:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:49:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:50:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:51:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:52:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:53:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:54:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:55:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (input 0 and negative shift amount) output: +VQRSHL/VQRSHLQ:56:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:57:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:58:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:59:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:60:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:61:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:62:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:63:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:64:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:65:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:66:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:67:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:68:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:69:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:70:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:71:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:72:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:73:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:74:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:75:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:76:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:77:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:78:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:79:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ cumulative saturation output: +VQRSHL/VQRSHLQ:80:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:81:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:82:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:83:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:84:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:85:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:86:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:87:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:88:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:89:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:90:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:91:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:92:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:93:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:94:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:95:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ output: +VQRSHL/VQRSHLQ:96:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VQRSHL/VQRSHLQ:97:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VQRSHL/VQRSHLQ:98:result_int32x2 [] = { fffff000, fffff100, } +VQRSHL/VQRSHLQ:99:result_int64x1 [] = { ffffffffffffff80, } +VQRSHL/VQRSHLQ:100:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:101:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:102:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:103:result_uint64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:104:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:105:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:106:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:107:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:108:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQRSHL/VQRSHLQ:109:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQRSHL/VQRSHLQ:110:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQRSHL/VQRSHLQ:111:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQRSHL/VQRSHLQ:112:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:113:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:114:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:115:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQRSHL/VQRSHLQ:116:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:118:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:120:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:121:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:122:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:123:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:124:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:125:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:126:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:127:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:128:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:129:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:130:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:131:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:132:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:133:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:134:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:135:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (negative shift amount) output: +VQRSHL/VQRSHLQ:136:result_int8x8 [] = { fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe, } +VQRSHL/VQRSHLQ:137:result_int16x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd, } +VQRSHL/VQRSHLQ:138:result_int32x2 [] = { fffffffe, fffffffe, } +VQRSHL/VQRSHLQ:139:result_int64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:140:result_uint8x8 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, } +VQRSHL/VQRSHLQ:141:result_uint16x4 [] = { 3ffc, 3ffc, 3ffd, 3ffd, } +VQRSHL/VQRSHLQ:142:result_uint32x2 [] = { 1ffffffe, 1ffffffe, } +VQRSHL/VQRSHLQ:143:result_uint64x1 [] = { fffffffffffffff, } +VQRSHL/VQRSHLQ:144:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:145:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:146:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:147:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:148:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:149:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:150:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:151:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:152:result_uint8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, } +VQRSHL/VQRSHLQ:153:result_uint16x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VQRSHL/VQRSHLQ:154:result_uint32x4 [] = { 80000, 80000, 80000, 80000, } +VQRSHL/VQRSHLQ:155:result_uint64x2 [] = { 100000000000, 100000000000, } +VQRSHL/VQRSHLQ:156:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:157:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:158:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:159:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) cumulative saturation output: +VQRSHL/VQRSHLQ:160:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:161:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:162:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:163:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:164:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:165:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:166:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:167:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:168:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:169:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:170:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:171:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:172:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:173:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:174:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:175:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -1) output: +VQRSHL/VQRSHLQ:176:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VQRSHL/VQRSHLQ:177:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VQRSHL/VQRSHLQ:178:result_int32x2 [] = { 40000000, 40000000, } +VQRSHL/VQRSHLQ:179:result_int64x1 [] = { 4000000000000000, } +VQRSHL/VQRSHLQ:180:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VQRSHL/VQRSHLQ:181:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VQRSHL/VQRSHLQ:182:result_uint32x2 [] = { 80000000, 80000000, } +VQRSHL/VQRSHLQ:183:result_uint64x1 [] = { 8000000000000000, } +VQRSHL/VQRSHLQ:184:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:185:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:186:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:187:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:188:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VQRSHL/VQRSHLQ:189:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VQRSHL/VQRSHLQ:190:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VQRSHL/VQRSHLQ:191:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VQRSHL/VQRSHLQ:192:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VQRSHL/VQRSHLQ:193:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VQRSHL/VQRSHLQ:194:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQRSHL/VQRSHLQ:195:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VQRSHL/VQRSHLQ:196:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:197:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:198:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:199:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) cumulative saturation output: +VQRSHL/VQRSHLQ:200:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:201:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:202:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:203:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:204:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:205:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:206:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:207:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:208:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:209:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:210:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:211:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:212:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:213:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:214:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:215:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: shift by -3) output: +VQRSHL/VQRSHLQ:216:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VQRSHL/VQRSHLQ:217:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VQRSHL/VQRSHLQ:218:result_int32x2 [] = { 10000000, 10000000, } +VQRSHL/VQRSHLQ:219:result_int64x1 [] = { 1000000000000000, } +VQRSHL/VQRSHLQ:220:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VQRSHL/VQRSHLQ:221:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VQRSHL/VQRSHLQ:222:result_uint32x2 [] = { 20000000, 20000000, } +VQRSHL/VQRSHLQ:223:result_uint64x1 [] = { 2000000000000000, } +VQRSHL/VQRSHLQ:224:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:225:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:226:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:227:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:228:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VQRSHL/VQRSHLQ:229:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VQRSHL/VQRSHLQ:230:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VQRSHL/VQRSHLQ:231:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VQRSHL/VQRSHLQ:232:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VQRSHL/VQRSHLQ:233:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VQRSHL/VQRSHLQ:234:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VQRSHL/VQRSHLQ:235:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VQRSHL/VQRSHLQ:236:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:237:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:238:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:239:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:240:vqrshl_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:241:vqrshl_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:242:vqrshl_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:243:vqrshl_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:244:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:245:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:246:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:247:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:248:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:249:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:250:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:251:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:252:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:253:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:254:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:255:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount) output: +VQRSHL/VQRSHLQ:256:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHL/VQRSHLQ:257:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRSHL/VQRSHLQ:258:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRSHL/VQRSHLQ:259:result_int64x1 [] = { 7fffffffffffffff, } +VQRSHL/VQRSHLQ:260:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:261:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:262:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:263:result_uint64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:264:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:265:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:266:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:267:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:268:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHL/VQRSHLQ:269:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRSHL/VQRSHLQ:270:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRSHL/VQRSHLQ:271:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQRSHL/VQRSHLQ:272:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:273:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:274:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:275:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQRSHL/VQRSHLQ:276:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:277:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:278:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:279:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) cumulative saturation output: +VQRSHL/VQRSHLQ:280:vqrshl_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:281:vqrshl_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:282:vqrshl_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:283:vqrshl_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:284:vqrshl_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:285:vqrshl_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:286:vqrshl_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:287:vqrshl_u64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:288:vqrshlq_s8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:289:vqrshlq_s16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:290:vqrshlq_s32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:291:vqrshlq_s64 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:292:vqrshlq_u8 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:293:vqrshlq_u16 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:294:vqrshlq_u32 Neon cumulative saturation 1 +VQRSHL/VQRSHLQ:295:vqrshlq_u64 Neon cumulative saturation 1 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with negative input) output: +VQRSHL/VQRSHLQ:296:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQRSHL/VQRSHLQ:297:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQRSHL/VQRSHLQ:298:result_int32x2 [] = { 80000000, 80000000, } +VQRSHL/VQRSHLQ:299:result_int64x1 [] = { 8000000000000000, } +VQRSHL/VQRSHLQ:300:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:301:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:302:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:303:result_uint64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:304:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:305:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:306:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:307:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:308:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQRSHL/VQRSHLQ:309:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQRSHL/VQRSHLQ:310:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQRSHL/VQRSHLQ:311:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQRSHL/VQRSHLQ:312:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:313:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:314:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:315:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQRSHL/VQRSHLQ:316:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:317:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:318:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:319:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) cumulative saturation output: +VQRSHL/VQRSHLQ:320:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:321:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:322:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:323:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:324:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:325:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:326:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:327:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:328:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:329:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:330:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:331:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:332:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:333:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:334:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:335:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large negative shift amount) output: +VQRSHL/VQRSHLQ:336:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:337:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:338:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:339:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:340:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:341:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:342:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:343:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:344:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:345:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:346:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:347:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:348:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:349:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:350:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:351:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:352:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:353:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:354:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:355:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:356:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:357:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:358:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:359:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) cumulative saturation output: +VQRSHL/VQRSHLQ:360:vqrshl_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:361:vqrshl_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:362:vqrshl_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:363:vqrshl_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:364:vqrshl_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:365:vqrshl_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:366:vqrshl_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:367:vqrshl_u64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:368:vqrshlq_s8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:369:vqrshlq_s16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:370:vqrshlq_s32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:371:vqrshlq_s64 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:372:vqrshlq_u8 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:373:vqrshlq_u16 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:374:vqrshlq_u32 Neon cumulative saturation 0 +VQRSHL/VQRSHLQ:375:vqrshlq_u64 Neon cumulative saturation 0 + +VQRSHL/VQRSHLQ (checking cumulative saturation: large shift amount with 0 input) output: +VQRSHL/VQRSHLQ:376:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:377:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:378:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:379:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:380:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:381:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:382:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:383:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:384:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:385:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:386:result_float32x2 [] = { 33333333, 33333333, } +VQRSHL/VQRSHLQ:387:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:388:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:389:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:390:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:391:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:392:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:393:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:394:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:395:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:396:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHL/VQRSHLQ:397:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHL/VQRSHLQ:398:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHL/VQRSHLQ:399:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VABA/VABAQ output: +VABA/VABAQ:0:result_int8x8 [] = { fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, } +VABA/VABAQ:1:result_int16x4 [] = { 16, 17, 18, 19, } +VABA/VABAQ:2:result_int32x2 [] = { 20, 21, } +VABA/VABAQ:3:result_int64x1 [] = { 3333333333333333, } +VABA/VABAQ:4:result_uint8x8 [] = { 53, 54, 55, 56, 57, 58, 59, 5a, } +VABA/VABAQ:5:result_uint16x4 [] = { 907, 908, 909, 90a, } +VABA/VABAQ:6:result_uint32x2 [] = { ffffffe7, ffffffe8, } +VABA/VABAQ:7:result_uint64x1 [] = { 3333333333333333, } +VABA/VABAQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABA/VABAQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABA/VABAQ:10:result_float32x2 [] = { 33333333, 33333333, } +VABA/VABAQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VABA/VABAQ:12:result_int8x16 [] = { 5e, 5f, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 6a, 6b, 6c, 6d, } +VABA/VABAQ:13:result_int16x8 [] = { b9c, b9d, b9e, b9f, ba0, ba1, ba2, ba3, } +VABA/VABAQ:14:result_int32x4 [] = { 26e0, 26e1, 26e2, 26e3, } +VABA/VABAQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VABA/VABAQ:16:result_uint8x16 [] = { f8, f9, fa, fb, fc, fd, fe, ff, 0, 1, 2, 3, 4, 5, 6, 7, } +VABA/VABAQ:17:result_uint16x8 [] = { fff9, fffa, fffb, fffc, fffd, fffe, ffff, 0, } +VABA/VABAQ:18:result_uint32x4 [] = { c, d, e, f, } +VABA/VABAQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VABA/VABAQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABA/VABAQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABA/VABAQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABA/VABAQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VABAL output: +VABAL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:2:result_int32x2 [] = { 33333333, 33333333, } +VABAL:3:result_int64x1 [] = { 3333333333333333, } +VABAL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:6:result_uint32x2 [] = { 33333333, 33333333, } +VABAL:7:result_uint64x1 [] = { 3333333333333333, } +VABAL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:10:result_float32x2 [] = { 33333333, 33333333, } +VABAL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VABAL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:13:result_int16x8 [] = { fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, } +VABAL:14:result_int32x4 [] = { 16, 17, 18, 19, } +VABAL:15:result_int64x2 [] = { 20, 21, } +VABAL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:17:result_uint16x8 [] = { 53, 54, 55, 56, 57, 58, 59, 5a, } +VABAL:18:result_uint32x4 [] = { 907, 908, 909, 90a, } +VABAL:19:result_uint64x2 [] = { ffffffe7, ffffffe8, } +VABAL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABAL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABAL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VABAL test intermediate overflow output: +VABAL:24:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:25:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:26:result_int32x2 [] = { 33333333, 33333333, } +VABAL:27:result_int64x1 [] = { 3333333333333333, } +VABAL:28:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:29:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:30:result_uint32x2 [] = { 33333333, 33333333, } +VABAL:31:result_uint64x1 [] = { 3333333333333333, } +VABAL:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:34:result_float32x2 [] = { 33333333, 33333333, } +VABAL:35:result_float16x4 [] = { 0, 0, 0, 0, } +VABAL:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:37:result_int16x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6, } +VABAL:38:result_int32x4 [] = { ffef, fff0, fff1, fff2, } +VABAL:39:result_int64x2 [] = { ffffffef, fffffff0, } +VABAL:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:41:result_uint16x8 [] = { ee, ef, f0, f1, f2, f3, f4, f5, } +VABAL:42:result_uint32x4 [] = { ffe2, ffe3, ffe4, ffe5, } +VABAL:43:result_uint64x2 [] = { ffffffe7, ffffffe8, } +VABAL:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABAL:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABAL:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VABD/VABDQ output: +VABD/VABDQ:0:result_int8x8 [] = { 11, 10, f, e, d, c, b, a, } +VABD/VABDQ:1:result_int16x4 [] = { 3, 2, 1, 0, } +VABD/VABDQ:2:result_int32x2 [] = { 18, 17, } +VABD/VABDQ:3:result_int64x1 [] = { 3333333333333333, } +VABD/VABDQ:4:result_uint8x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6, } +VABD/VABDQ:5:result_uint16x4 [] = { ffe3, ffe4, ffe5, ffe6, } +VABD/VABDQ:6:result_uint32x2 [] = { ffffffe8, ffffffe9, } +VABD/VABDQ:7:result_uint64x1 [] = { 3333333333333333, } +VABD/VABDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABD/VABDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABD/VABDQ:10:result_float32x2 [] = { 41c26666, 41ba6666, } +VABD/VABDQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VABD/VABDQ:12:result_int8x16 [] = { 1a, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, f, e, d, c, b, } +VABD/VABDQ:13:result_int16x8 [] = { 4, 3, 2, 1, 0, 1, 2, 3, } +VABD/VABDQ:14:result_int32x4 [] = { 30, 2f, 2e, 2d, } +VABD/VABDQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VABD/VABDQ:16:result_uint8x16 [] = { e6, e7, e8, e9, ea, eb, ec, ed, ee, ef, f0, f1, f2, f3, f4, f5, } +VABD/VABDQ:17:result_uint16x8 [] = { ffe4, ffe5, ffe6, ffe7, ffe8, ffe9, ffea, ffeb, } +VABD/VABDQ:18:result_uint32x4 [] = { ffffffd0, ffffffd1, ffffffd2, ffffffd3, } +VABD/VABDQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VABD/VABDQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABD/VABDQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABD/VABDQ:22:result_float32x4 [] = { 42407ae1, 423c7ae1, 42387ae1, 42347ae1, } +VABD/VABDQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VABD/VABDQ FP special (-0.0):24:result_float32x4 [] = { 0, 0, 0, 0, } +VABD/VABDQ FP special (-0.0):25:result_float32x4 [] = { 0, 0, 0, 0, } + +VABDL output: +VABDL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VABDL:2:result_int32x2 [] = { 33333333, 33333333, } +VABDL:3:result_int64x1 [] = { 3333333333333333, } +VABDL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABDL:6:result_uint32x2 [] = { 33333333, 33333333, } +VABDL:7:result_uint64x1 [] = { 3333333333333333, } +VABDL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VABDL:10:result_float32x2 [] = { 33333333, 33333333, } +VABDL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VABDL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:13:result_int16x8 [] = { 11, 10, f, e, d, c, b, a, } +VABDL:14:result_int32x4 [] = { 3, 2, 1, 0, } +VABDL:15:result_int64x2 [] = { 18, 17, } +VABDL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:17:result_uint16x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6, } +VABDL:18:result_uint32x4 [] = { ffe3, ffe4, ffe5, ffe6, } +VABDL:19:result_uint64x2 [] = { ffffffe8, ffffffe9, } +VABDL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABDL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABDL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VAND/VANDQ output: +VAND/VANDQ:0:result_int8x8 [] = { 0, 0, 2, 2, 0, 0, 2, 2, } +VAND/VANDQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VAND/VANDQ:2:result_int32x2 [] = { 0, 1, } +VAND/VANDQ:3:result_int64x1 [] = { 60, } +VAND/VANDQ:4:result_uint8x8 [] = { 10, 10, 10, 10, 14, 14, 14, 14, } +VAND/VANDQ:5:result_uint16x4 [] = { 10, 10, 12, 12, } +VAND/VANDQ:6:result_uint32x2 [] = { 20, 20, } +VAND/VANDQ:7:result_uint64x1 [] = { 0, } +VAND/VANDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VAND/VANDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VAND/VANDQ:10:result_float32x2 [] = { 33333333, 33333333, } +VAND/VANDQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VAND/VANDQ:12:result_int8x16 [] = { fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, } +VAND/VANDQ:13:result_int16x8 [] = { ffffffe0, ffffffe0, ffffffe0, ffffffe0, ffffffe4, ffffffe4, ffffffe4, ffffffe4, } +VAND/VANDQ:14:result_int32x4 [] = { ffffffe0, ffffffe0, ffffffe2, ffffffe2, } +VAND/VANDQ:15:result_int64x2 [] = { 10, 10, } +VAND/VANDQ:16:result_uint8x16 [] = { 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, c, c, c, c, } +VAND/VANDQ:17:result_uint16x8 [] = { 0, 1, 2, 3, 0, 1, 2, 3, } +VAND/VANDQ:18:result_uint32x4 [] = { 30, 31, 32, 33, } +VAND/VANDQ:19:result_uint64x2 [] = { 0, 1, } +VAND/VANDQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VAND/VANDQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VAND/VANDQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VAND/VANDQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VORR/VORRQ output: +VORR/VORRQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff2, fffffff3, fffffff6, fffffff7, fffffff6, fffffff7, } +VORR/VORRQ:1:result_int16x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VORR/VORRQ:2:result_int32x2 [] = { fffffff3, fffffff3, } +VORR/VORRQ:3:result_int64x1 [] = { fffffffffffffff4, } +VORR/VORRQ:4:result_uint8x8 [] = { f4, f5, f6, f7, f4, f5, f6, f7, } +VORR/VORRQ:5:result_uint16x4 [] = { fffe, ffff, fffe, ffff, } +VORR/VORRQ:6:result_uint32x2 [] = { fffffff8, fffffff9, } +VORR/VORRQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VORR/VORRQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VORR/VORRQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VORR/VORRQ:10:result_float32x2 [] = { 33333333, 33333333, } +VORR/VORRQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VORR/VORRQ:12:result_int8x16 [] = { fffffff6, fffffff7, fffffff6, fffffff7, fffffff6, fffffff7, fffffff6, fffffff7, fffffffe, ffffffff, fffffffe, ffffffff, fffffffe, ffffffff, fffffffe, ffffffff, } +VORR/VORRQ:13:result_int16x8 [] = { fffffffc, fffffffd, fffffffe, ffffffff, fffffffc, fffffffd, fffffffe, ffffffff, } +VORR/VORRQ:14:result_int32x4 [] = { fffffff2, fffffff3, fffffff2, fffffff3, } +VORR/VORRQ:15:result_int64x2 [] = { fffffffffffffff8, fffffffffffffff9, } +VORR/VORRQ:16:result_uint8x16 [] = { fc, fd, fe, ff, fc, fd, fe, ff, fc, fd, fe, ff, fc, fd, fe, ff, } +VORR/VORRQ:17:result_uint16x8 [] = { fff3, fff3, fff3, fff3, fff7, fff7, fff7, fff7, } +VORR/VORRQ:18:result_uint32x4 [] = { fffffff7, fffffff7, fffffff7, fffffff7, } +VORR/VORRQ:19:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff3, } +VORR/VORRQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VORR/VORRQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VORR/VORRQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VORR/VORRQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VORN/VORNQ output: +VORN/VORNQ:0:result_int8x8 [] = { fffffffd, fffffffd, ffffffff, ffffffff, fffffffd, fffffffd, ffffffff, ffffffff, } +VORN/VORNQ:1:result_int16x4 [] = { fffffff3, fffffff3, fffffff3, fffffff3, } +VORN/VORNQ:2:result_int32x2 [] = { fffffffc, fffffffd, } +VORN/VORNQ:3:result_int64x1 [] = { fffffffffffffffb, } +VORN/VORNQ:4:result_uint8x8 [] = { fb, fb, fb, fb, ff, ff, ff, ff, } +VORN/VORNQ:5:result_uint16x4 [] = { fff1, fff1, fff3, fff3, } +VORN/VORNQ:6:result_uint32x2 [] = { fffffff7, fffffff7, } +VORN/VORNQ:7:result_uint64x1 [] = { fffffffffffffffd, } +VORN/VORNQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VORN/VORNQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VORN/VORNQ:10:result_float32x2 [] = { 33333333, 33333333, } +VORN/VORNQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VORN/VORNQ:12:result_int8x16 [] = { fffffff9, fffffff9, fffffffb, fffffffb, fffffffd, fffffffd, ffffffff, ffffffff, fffffff9, fffffff9, fffffffb, fffffffb, fffffffd, fffffffd, ffffffff, ffffffff, } +VORN/VORNQ:13:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff7, fffffff7, fffffff7, fffffff7, } +VORN/VORNQ:14:result_int32x4 [] = { fffffffd, fffffffd, ffffffff, ffffffff, } +VORN/VORNQ:15:result_int64x2 [] = { fffffffffffffff7, fffffffffffffff7, } +VORN/VORNQ:16:result_uint8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, fb, fb, fb, fb, ff, ff, ff, ff, } +VORN/VORNQ:17:result_uint16x8 [] = { fffc, fffd, fffe, ffff, fffc, fffd, fffe, ffff, } +VORN/VORNQ:18:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VORN/VORNQ:19:result_uint64x2 [] = { fffffffffffffffc, fffffffffffffffd, } +VORN/VORNQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VORN/VORNQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VORN/VORNQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VORN/VORNQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VEOR/VEORQ output: +VEOR/VEORQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff0, fffffff1, fffffff6, fffffff7, fffffff4, fffffff5, } +VEOR/VEORQ:1:result_int16x4 [] = { c, d, e, f, } +VEOR/VEORQ:2:result_int32x2 [] = { fffffff3, fffffff2, } +VEOR/VEORQ:3:result_int64x1 [] = { ffffffffffffff94, } +VEOR/VEORQ:4:result_uint8x8 [] = { e4, e5, e6, e7, e0, e1, e2, e3, } +VEOR/VEORQ:5:result_uint16x4 [] = { ffee, ffef, ffec, ffed, } +VEOR/VEORQ:6:result_uint32x2 [] = { ffffffd8, ffffffd9, } +VEOR/VEORQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VEOR/VEORQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VEOR/VEORQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VEOR/VEORQ:10:result_float32x2 [] = { 33333333, 33333333, } +VEOR/VEORQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VEOR/VEORQ:12:result_int8x16 [] = { 6, 7, 4, 5, 2, 3, 0, 1, e, f, c, d, a, b, 8, 9, } +VEOR/VEORQ:13:result_int16x8 [] = { 1c, 1d, 1e, 1f, 18, 19, 1a, 1b, } +VEOR/VEORQ:14:result_int32x4 [] = { 12, 13, 10, 11, } +VEOR/VEORQ:15:result_int64x2 [] = { ffffffffffffffe8, ffffffffffffffe9, } +VEOR/VEORQ:16:result_uint8x16 [] = { fc, fd, fe, ff, f8, f9, fa, fb, f4, f5, f6, f7, f0, f1, f2, f3, } +VEOR/VEORQ:17:result_uint16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4, } +VEOR/VEORQ:18:result_uint32x4 [] = { ffffffc7, ffffffc6, ffffffc5, ffffffc4, } +VEOR/VEORQ:19:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff2, } +VEOR/VEORQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VEOR/VEORQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VEOR/VEORQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VEOR/VEORQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VBIC/VBICQ output: +VBIC/VBICQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff4, fffffff5, fffffff4, fffffff5, } +VBIC/VBICQ:1:result_int16x4 [] = { 0, 1, 2, 3, } +VBIC/VBICQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VBIC/VBICQ:3:result_int64x1 [] = { ffffffffffffff90, } +VBIC/VBICQ:4:result_uint8x8 [] = { e0, e1, e2, e3, e0, e1, e2, e3, } +VBIC/VBICQ:5:result_uint16x4 [] = { ffe0, ffe1, ffe0, ffe1, } +VBIC/VBICQ:6:result_uint32x2 [] = { ffffffd0, ffffffd1, } +VBIC/VBICQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VBIC/VBICQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VBIC/VBICQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VBIC/VBICQ:10:result_float32x2 [] = { 33333333, 33333333, } +VBIC/VBICQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VBIC/VBICQ:12:result_int8x16 [] = { 0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9, } +VBIC/VBICQ:13:result_int16x8 [] = { 10, 11, 12, 13, 10, 11, 12, 13, } +VBIC/VBICQ:14:result_int32x4 [] = { 10, 11, 10, 11, } +VBIC/VBICQ:15:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1, } +VBIC/VBICQ:16:result_uint8x16 [] = { f0, f1, f2, f3, f0, f1, f2, f3, f0, f1, f2, f3, f0, f1, f2, f3, } +VBIC/VBICQ:17:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff4, fff4, fff4, fff4, } +VBIC/VBICQ:18:result_uint32x4 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VBIC/VBICQ:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VBIC/VBICQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VBIC/VBICQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VBIC/VBICQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VBIC/VBICQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCREATE output: +VCREATE:0:result_int8x8 [] = { fffffff0, ffffffde, ffffffbc, ffffff9a, 78, 56, 34, 12, } +VCREATE:1:result_int16x4 [] = { ffffdef0, ffff9abc, 5678, 1234, } +VCREATE:2:result_int32x2 [] = { 9abcdef0, 12345678, } +VCREATE:3:result_int64x1 [] = { 123456789abcdef0, } +VCREATE:4:result_uint8x8 [] = { f0, de, bc, 9a, 78, 56, 34, 12, } +VCREATE:5:result_uint16x4 [] = { def0, 9abc, 5678, 1234, } +VCREATE:6:result_uint32x2 [] = { 9abcdef0, 12345678, } +VCREATE:7:result_uint64x1 [] = { 123456789abcdef0, } +VCREATE:8:result_poly8x8 [] = { f0, de, bc, 9a, 78, 56, 34, 12, } +VCREATE:9:result_poly16x4 [] = { def0, 9abc, 5678, 1234, } +VCREATE:10:result_float32x2 [] = { 9abcdef0, 12345678, } +VCREATE:11:result_float16x4 [] = { def0, 9abc, 5678, 1234, } +VCREATE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCREATE:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCREATE:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCREATE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCREATE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCREATE:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCREATE:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCREATE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCREATE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCREATE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCREATE:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCREATE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD2_LANE/VLD2Q_LANE chunk 0 output: +VLD2_LANE/VLD2Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD2_LANE/VLD2Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2_LANE/VLD2Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:8:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:9:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VLD2_LANE/VLD2Q_LANE:11:result_float16x4 [] = { cc00, cb80, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:13:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:14:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:17:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:18:result_uint32x4 [] = { fffffff0, fffffff1, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:21:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:22:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:23:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, cc00, cb80, aaaa, aaaa, } + +VLD2_LANE/VLD2Q_LANE chunk 1 output: +VLD2_LANE/VLD2Q_LANE:24:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, } +VLD2_LANE/VLD2Q_LANE:25:result_int16x4 [] = { fffffff0, fffffff1, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:26:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:27:result_int64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:28:result_uint8x8 [] = { f0, f1, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:29:result_uint16x4 [] = { aaaa, aaaa, fff0, fff1, } +VLD2_LANE/VLD2Q_LANE:30:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2_LANE/VLD2Q_LANE:31:result_uint64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:32:result_poly8x8 [] = { f0, f1, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:33:result_poly16x4 [] = { aaaa, aaaa, fff0, fff1, } +VLD2_LANE/VLD2Q_LANE:34:result_float32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:35:result_float16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:37:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, fffffff1, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:38:result_int32x4 [] = { fffffff0, fffffff1, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:41:result_uint16x8 [] = { aaaa, aaaa, fff0, fff1, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:42:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:45:result_poly16x8 [] = { aaaa, aaaa, fff0, fff1, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:46:result_float32x4 [] = { c1800000, c1700000, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:47:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } + +VLD3_LANE/VLD3Q_LANE chunk 0 output: +VLD3_LANE/VLD3Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD3_LANE/VLD3Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD3_LANE/VLD3Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:8:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:9:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VLD3_LANE/VLD3Q_LANE:11:result_float16x4 [] = { cc00, cb80, cb00, aaaa, } +VLD3_LANE/VLD3Q_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:13:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:14:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:17:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:21:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:22:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:23:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, cc00, cb80, } + +VLD3_LANE/VLD3Q_LANE chunk 1 output: +VLD3_LANE/VLD3Q_LANE:24:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD3_LANE/VLD3Q_LANE:25:result_int16x4 [] = { ffffaaaa, ffffaaaa, fffffff0, fffffff1, } +VLD3_LANE/VLD3Q_LANE:26:result_int32x2 [] = { fffffff2, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:27:result_int64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:28:result_uint8x8 [] = { aa, aa, aa, aa, f0, f1, f2, aa, } +VLD3_LANE/VLD3Q_LANE:29:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:30:result_uint32x2 [] = { aaaaaaaa, fffffff0, } +VLD3_LANE/VLD3Q_LANE:31:result_uint64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:32:result_poly8x8 [] = { aa, aa, aa, aa, f0, f1, f2, aa, } +VLD3_LANE/VLD3Q_LANE:33:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:34:result_float32x2 [] = { c1600000, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:35:result_float16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:37:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:38:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, fffffff1, } +VLD3_LANE/VLD3Q_LANE:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:41:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, } +VLD3_LANE/VLD3Q_LANE:42:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:45:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, } +VLD3_LANE/VLD3Q_LANE:46:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, c1800000, c1700000, } +VLD3_LANE/VLD3Q_LANE:47:result_float16x8 [] = { cb00, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } + +VLD3_LANE/VLD3Q_LANE chunk 2 output: +VLD3_LANE/VLD3Q_LANE:48:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, fffffff2, } +VLD3_LANE/VLD3Q_LANE:49:result_int16x4 [] = { fffffff2, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:50:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:51:result_int64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:52:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:53:result_uint16x4 [] = { aaaa, fff0, fff1, fff2, } +VLD3_LANE/VLD3Q_LANE:54:result_uint32x2 [] = { fffffff1, fffffff2, } +VLD3_LANE/VLD3Q_LANE:55:result_uint64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:56:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:57:result_poly16x4 [] = { aaaa, fff0, fff1, fff2, } +VLD3_LANE/VLD3Q_LANE:58:result_float32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:59:result_float16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:61:result_int16x8 [] = { ffffaaaa, ffffaaaa, fffffff0, fffffff1, fffffff2, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:62:result_int32x4 [] = { fffffff2, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:65:result_uint16x8 [] = { fff1, fff2, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:66:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:69:result_poly16x8 [] = { fff1, fff2, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:70:result_float32x4 [] = { c1600000, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:71:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } + +VLD4_LANE/VLD4Q_LANE chunk 0 output: +VLD4_LANE/VLD4Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD4_LANE/VLD4Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4_LANE/VLD4Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:8:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:9:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VLD4_LANE/VLD4Q_LANE:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD4_LANE/VLD4Q_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:13:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:14:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:17:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:21:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:22:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:23:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } + +VLD4_LANE/VLD4Q_LANE chunk 1 output: +VLD4_LANE/VLD4Q_LANE:24:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD4_LANE/VLD4Q_LANE:25:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:26:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:27:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:28:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:29:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:30:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:31:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:32:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:33:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:34:result_float32x2 [] = { c1600000, c1500000, } +VLD4_LANE/VLD4Q_LANE:35:result_float16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:37:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:38:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:41:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:42:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:45:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:46:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:47:result_float16x8 [] = { cc00, cb80, cb00, ca80, aaaa, aaaa, aaaa, aaaa, } + +VLD4_LANE/VLD4Q_LANE chunk 2 output: +VLD4_LANE/VLD4Q_LANE:48:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD4_LANE/VLD4Q_LANE:49:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:50:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:51:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:52:result_uint8x8 [] = { f0, f1, f2, f3, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:53:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:54:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4_LANE/VLD4Q_LANE:55:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:56:result_poly8x8 [] = { f0, f1, f2, f3, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:57:result_poly16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:58:result_float32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:59:result_float16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:61:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:62:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:65:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:66:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:69:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:70:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VLD4_LANE/VLD4Q_LANE:71:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } + +VLD4_LANE/VLD4Q_LANE chunk 3 output: +VLD4_LANE/VLD4Q_LANE:72:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:73:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:74:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:75:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:76:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:77:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:78:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:79:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:80:result_poly8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:81:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:82:result_float32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:83:result_float16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:84:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:85:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:86:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:87:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:88:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:89:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:90:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:91:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:93:result_poly16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:94:result_float32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:95:result_float16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } + +VLD2_DUP/VLD2Q_DUP chunk 0 output: +VLD2_DUP/VLD2Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:3:result_int64x1 [] = { fffffffffffffff0, } +VLD2_DUP/VLD2Q_DUP:4:result_uint8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD2_DUP/VLD2Q_DUP:8:result_poly8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:9:result_poly16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:10:result_float32x2 [] = { c1800000, c1700000, } +VLD2_DUP/VLD2Q_DUP:11:result_float16x4 [] = { cc00, cb80, cc00, cb80, } +VLD2_DUP/VLD2Q_DUP:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD2_DUP/VLD2Q_DUP chunk 1 output: +VLD2_DUP/VLD2Q_DUP:24:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:25:result_int16x4 [] = { fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:26:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:27:result_int64x1 [] = { fffffffffffffff1, } +VLD2_DUP/VLD2Q_DUP:28:result_uint8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:29:result_uint16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:30:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:31:result_uint64x1 [] = { fffffffffffffff1, } +VLD2_DUP/VLD2Q_DUP:32:result_poly8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:33:result_poly16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:34:result_float32x2 [] = { c1800000, c1700000, } +VLD2_DUP/VLD2Q_DUP:35:result_float16x4 [] = { cc00, cb80, cc00, cb80, } +VLD2_DUP/VLD2Q_DUP:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:37:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:38:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:41:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:42:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD3_DUP/VLD3Q_DUP chunk 0 output: +VLD3_DUP/VLD3Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:3:result_int64x1 [] = { fffffffffffffff0, } +VLD3_DUP/VLD3Q_DUP:4:result_uint8x8 [] = { f0, f1, f2, f0, f1, f2, f0, f1, } +VLD3_DUP/VLD3Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff2, fff0, } +VLD3_DUP/VLD3Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD3_DUP/VLD3Q_DUP:8:result_poly8x8 [] = { f0, f1, f2, f0, f1, f2, f0, f1, } +VLD3_DUP/VLD3Q_DUP:9:result_poly16x4 [] = { fff0, fff1, fff2, fff0, } +VLD3_DUP/VLD3Q_DUP:10:result_float32x2 [] = { c1800000, c1700000, } +VLD3_DUP/VLD3Q_DUP:11:result_float16x4 [] = { cc00, cb80, cb00, cc00, } +VLD3_DUP/VLD3Q_DUP:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD3_DUP/VLD3Q_DUP chunk 1 output: +VLD3_DUP/VLD3Q_DUP:24:result_int8x8 [] = { fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:25:result_int16x4 [] = { fffffff1, fffffff2, fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:26:result_int32x2 [] = { fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:27:result_int64x1 [] = { fffffffffffffff1, } +VLD3_DUP/VLD3Q_DUP:28:result_uint8x8 [] = { f2, f0, f1, f2, f0, f1, f2, f0, } +VLD3_DUP/VLD3Q_DUP:29:result_uint16x4 [] = { fff1, fff2, fff0, fff1, } +VLD3_DUP/VLD3Q_DUP:30:result_uint32x2 [] = { fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:31:result_uint64x1 [] = { fffffffffffffff1, } +VLD3_DUP/VLD3Q_DUP:32:result_poly8x8 [] = { f2, f0, f1, f2, f0, f1, f2, f0, } +VLD3_DUP/VLD3Q_DUP:33:result_poly16x4 [] = { fff1, fff2, fff0, fff1, } +VLD3_DUP/VLD3Q_DUP:34:result_float32x2 [] = { c1600000, c1800000, } +VLD3_DUP/VLD3Q_DUP:35:result_float16x4 [] = { cb80, cb00, cc00, cb80, } +VLD3_DUP/VLD3Q_DUP:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:37:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:38:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:41:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:42:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD3_DUP/VLD3Q_DUP chunk 2 output: +VLD3_DUP/VLD3Q_DUP:48:result_int8x8 [] = { fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:49:result_int16x4 [] = { fffffff2, fffffff0, fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:50:result_int32x2 [] = { fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:51:result_int64x1 [] = { fffffffffffffff2, } +VLD3_DUP/VLD3Q_DUP:52:result_uint8x8 [] = { f1, f2, f0, f1, f2, f0, f1, f2, } +VLD3_DUP/VLD3Q_DUP:53:result_uint16x4 [] = { fff2, fff0, fff1, fff2, } +VLD3_DUP/VLD3Q_DUP:54:result_uint32x2 [] = { fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:55:result_uint64x1 [] = { fffffffffffffff2, } +VLD3_DUP/VLD3Q_DUP:56:result_poly8x8 [] = { f1, f2, f0, f1, f2, f0, f1, f2, } +VLD3_DUP/VLD3Q_DUP:57:result_poly16x4 [] = { fff2, fff0, fff1, fff2, } +VLD3_DUP/VLD3Q_DUP:58:result_float32x2 [] = { c1700000, c1600000, } +VLD3_DUP/VLD3Q_DUP:59:result_float16x4 [] = { cb00, cc00, cb80, cb00, } +VLD3_DUP/VLD3Q_DUP:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:61:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:62:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:65:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:66:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD4_DUP/VLD4Q_DUP chunk 0 output: +VLD4_DUP/VLD4Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:3:result_int64x1 [] = { fffffffffffffff0, } +VLD4_DUP/VLD4Q_DUP:4:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD4_DUP/VLD4Q_DUP:8:result_poly8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:10:result_float32x2 [] = { c1800000, c1700000, } +VLD4_DUP/VLD4Q_DUP:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD4_DUP/VLD4Q_DUP:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD4_DUP/VLD4Q_DUP chunk 1 output: +VLD4_DUP/VLD4Q_DUP:24:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:25:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:26:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:27:result_int64x1 [] = { fffffffffffffff1, } +VLD4_DUP/VLD4Q_DUP:28:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:29:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:30:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:31:result_uint64x1 [] = { fffffffffffffff1, } +VLD4_DUP/VLD4Q_DUP:32:result_poly8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:33:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:34:result_float32x2 [] = { c1600000, c1500000, } +VLD4_DUP/VLD4Q_DUP:35:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD4_DUP/VLD4Q_DUP:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:37:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:38:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:41:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:42:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD4_DUP/VLD4Q_DUP chunk 2 output: +VLD4_DUP/VLD4Q_DUP:48:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:49:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:50:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:51:result_int64x1 [] = { fffffffffffffff2, } +VLD4_DUP/VLD4Q_DUP:52:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:53:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:54:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:55:result_uint64x1 [] = { fffffffffffffff2, } +VLD4_DUP/VLD4Q_DUP:56:result_poly8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:57:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:58:result_float32x2 [] = { c1800000, c1700000, } +VLD4_DUP/VLD4Q_DUP:59:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD4_DUP/VLD4Q_DUP:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:61:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:62:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:65:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:66:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VLD4_DUP/VLD4Q_DUP chunk 3 output: +VLD4_DUP/VLD4Q_DUP:72:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:73:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:74:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:75:result_int64x1 [] = { fffffffffffffff3, } +VLD4_DUP/VLD4Q_DUP:76:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:77:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:78:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:79:result_uint64x1 [] = { fffffffffffffff3, } +VLD4_DUP/VLD4Q_DUP:80:result_poly8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:81:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:82:result_float32x2 [] = { c1600000, c1500000, } +VLD4_DUP/VLD4Q_DUP:83:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VLD4_DUP/VLD4Q_DUP:84:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:85:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:86:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:87:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:88:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:89:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:90:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:91:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:93:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:94:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLA output: +VMLA:0:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, } +VMLA:1:result_int16x4 [] = { 1f8c, 1f8d, 1f8e, 1f8f, } +VMLA:2:result_int32x2 [] = { 2bf7, 2bf8, } +VMLA:3:result_int64x1 [] = { 3333333333333333, } +VMLA:4:result_uint8x8 [] = { 20, 21, 22, 23, 24, 25, 26, 27, } +VMLA:5:result_uint16x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA:6:result_uint32x2 [] = { 43ac, 43ad, } +VMLA:7:result_uint64x1 [] = { 3333333333333333, } +VMLA:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLA:10:result_float32x2 [] = { 43a14e76, 43a1ce76, } +VMLA:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLA:12:result_int8x16 [] = { f, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, } +VMLA:13:result_int16x8 [] = { 4830, 4831, 4832, 4833, 4834, 4835, 4836, 4837, } +VMLA:14:result_int32x4 [] = { 470f, 4710, 4711, 4712, } +VMLA:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA:16:result_uint8x16 [] = { ac, ad, ae, af, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, } +VMLA:17:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLA:18:result_uint32x4 [] = { 3620, 3621, 3622, 3623, } +VMLA:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLA:22:result_float32x4 [] = { 45f0ae15, 45f0b615, 45f0be15, 45f0c615, } +VMLA:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLS output: +VMLS:0:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8, } +VMLS:1:result_int16x4 [] = { ffffe054, ffffe055, ffffe056, ffffe057, } +VMLS:2:result_int32x2 [] = { ffffd3e9, ffffd3ea, } +VMLS:3:result_int64x1 [] = { 3333333333333333, } +VMLS:4:result_uint8x8 [] = { c0, c1, c2, c3, c4, c5, c6, c7, } +VMLS:5:result_uint16x4 [] = { c1d9, c1da, c1db, c1dc, } +VMLS:6:result_uint32x2 [] = { ffffbc34, ffffbc35, } +VMLS:7:result_uint64x1 [] = { 3333333333333333, } +VMLS:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLS:10:result_float32x2 [] = { c3b14e76, c3b0ce76, } +VMLS:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLS:12:result_int8x16 [] = { ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, ffffffd6, ffffffd7, ffffffd8, ffffffd9, ffffffda, ffffffdb, ffffffdc, ffffffdd, ffffffde, ffffffdf, ffffffe0, } +VMLS:13:result_int16x8 [] = { ffffb7b0, ffffb7b1, ffffb7b2, ffffb7b3, ffffb7b4, ffffb7b5, ffffb7b6, ffffb7b7, } +VMLS:14:result_int32x4 [] = { ffffb8d1, ffffb8d2, ffffb8d3, ffffb8d4, } +VMLS:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS:16:result_uint8x16 [] = { 34, 35, 36, 37, 38, 39, 3a, 3b, 3c, 3d, 3e, 3f, 40, 41, 42, 43, } +VMLS:17:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0, } +VMLS:18:result_uint32x4 [] = { ffffc9c0, ffffc9c1, ffffc9c2, ffffc9c3, } +VMLS:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLS:22:result_float32x4 [] = { c5f1ae15, c5f1a615, c5f19e15, c5f19615, } +VMLS:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMUL output: +VMUL:0:result_int8x8 [] = { fffffff0, 1, 12, 23, 34, 45, 56, 67, } +VMUL:1:result_int16x4 [] = { fffffde0, fffffe02, fffffe24, fffffe46, } +VMUL:2:result_int32x2 [] = { fffffcd0, fffffd03, } +VMUL:3:result_int64x1 [] = { 3333333333333333, } +VMUL:4:result_uint8x8 [] = { c0, 4, 48, 8c, d0, 14, 58, 9c, } +VMUL:5:result_uint16x4 [] = { fab0, fb05, fb5a, fbaf, } +VMUL:6:result_uint32x2 [] = { fffff9a0, fffffa06, } +VMUL:7:result_uint64x1 [] = { 3333333333333333, } +VMUL:8:result_poly8x8 [] = { c0, 84, 48, c, d0, 94, 58, 1c, } +VMUL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMUL:10:result_float32x2 [] = { c4053333, c3f9c000, } +VMUL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMUL:12:result_int8x16 [] = { ffffff90, 7, 7e, fffffff5, 6c, ffffffe3, 5a, ffffffd1, 48, ffffffbf, 36, ffffffad, 24, ffffff9b, 12, ffffff89, } +VMUL:13:result_int16x8 [] = { fffff780, fffff808, fffff890, fffff918, fffff9a0, fffffa28, fffffab0, fffffb38, } +VMUL:14:result_int32x4 [] = { fffff670, fffff709, fffff7a2, fffff83b, } +VMUL:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL:16:result_uint8x16 [] = { 60, a, b4, 5e, 8, b2, 5c, 6, b0, 5a, 4, ae, 58, 2, ac, 56, } +VMUL:17:result_uint16x8 [] = { f450, f50b, f5c6, f681, f73c, f7f7, f8b2, f96d, } +VMUL:18:result_uint32x4 [] = { fffff340, fffff40c, fffff4d8, fffff5a4, } +VMUL:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL:20:result_poly8x16 [] = { 60, ca, 34, 9e, c8, 62, 9c, 36, 30, 9a, 64, ce, 98, 32, cc, 66, } +VMUL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMUL:22:result_float32x4 [] = { c4c73333, c4bac000, c4ae4ccd, c4a1d999, } +VMUL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMUL_LANE output: +VMUL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:1:result_int16x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, } +VMUL_LANE:2:result_int32x2 [] = { fffffde0, fffffe02, } +VMUL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMUL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:5:result_uint16x4 [] = { bbc0, c004, c448, c88c, } +VMUL_LANE:6:result_uint32x2 [] = { fffface0, ffffb212, } +VMUL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMUL_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMUL_LANE:10:result_float32x2 [] = { c3b66666, c3ab0000, } +VMUL_LANE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMUL_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:13:result_int16x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, } +VMUL_LANE:14:result_int32x4 [] = { fffffde0, fffffe02, fffffe24, fffffe46, } +VMUL_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:17:result_uint16x8 [] = { bbc0, c004, c448, c88c, ccd0, d114, d558, d99c, } +VMUL_LANE:18:result_uint32x4 [] = { fffface0, ffffb212, ffffb744, ffffbc76, } +VMUL_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMUL_LANE:22:result_float32x4 [] = { c3b66666, c3ab0000, c39f9999, c3943333, } +VMUL_LANE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMUL_N output: +VMUL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:1:result_int16x4 [] = { fffffef0, ffffff01, ffffff12, ffffff23, } +VMUL_N:2:result_int32x2 [] = { fffffde0, fffffe02, } +VMUL_N:3:result_int64x1 [] = { 3333333333333333, } +VMUL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:5:result_uint16x4 [] = { fcd0, fd03, fd36, fd69, } +VMUL_N:6:result_uint32x2 [] = { fffffbc0, fffffc04, } +VMUL_N:7:result_uint64x1 [] = { 3333333333333333, } +VMUL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMUL_N:10:result_float32x2 [] = { c3b26666, c3a74000, } +VMUL_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMUL_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:13:result_int16x8 [] = { fffffab0, fffffb05, fffffb5a, fffffbaf, fffffc04, fffffc59, fffffcae, fffffd03, } +VMUL_N:14:result_int32x4 [] = { fffff9a0, fffffa06, fffffa6c, fffffad2, } +VMUL_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:17:result_uint16x8 [] = { f890, f907, f97e, f9f5, fa6c, fae3, fb5a, fbd1, } +VMUL_N:18:result_uint32x4 [] = { fffff780, fffff808, fffff890, fffff918, } +VMUL_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMUL_N:22:result_float32x4 [] = { c4b1cccd, c4a6b000, c49b9333, c4907667, } +VMUL_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMULL_N output: +VMULL_N:0:result_int32x4 [] = { 11000, 11000, 11000, 11000, } +VMULL_N:1:result_int64x2 [] = { 22000, 22000, } +VMULL_N:2:result_uint32x4 [] = { 33000, 33000, 33000, 33000, } +VMULL_N:3:result_uint64x2 [] = { 44000, 44000, } + +VMLA_LANE output: +VMLA_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:1:result_int16x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:2:result_int32x2 [] = { 3e07, 3e08, } +VMLA_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLA_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:5:result_uint16x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:6:result_uint32x2 [] = { 3e07, 3e08, } +VMLA_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLA_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLA_LANE:10:result_float32x2 [] = { 4418c687, 44190687, } +VMLA_LANE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLA_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:13:result_int16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLA_LANE:14:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:17:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLA_LANE:18:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLA_LANE:22:result_float32x4 [] = { 441a3168, 441a7168, 441ab168, 441af168, } +VMLA_LANE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLS_LANE output: +VMLS_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:1:result_int16x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLS_LANE:2:result_int32x2 [] = { ffffc1d9, ffffc1da, } +VMLS_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLS_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:5:result_uint16x4 [] = { c1d9, c1da, c1db, c1dc, } +VMLS_LANE:6:result_uint32x2 [] = { ffffc1d9, ffffc1da, } +VMLS_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLS_LANE:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLS_LANE:10:result_float32x2 [] = { c420c687, c4208687, } +VMLS_LANE:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLS_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:13:result_int16x8 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, ffffc1dd, ffffc1de, ffffc1df, ffffc1e0, } +VMLS_LANE:14:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLS_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:17:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0, } +VMLS_LANE:18:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLS_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLS_LANE:22:result_float32x4 [] = { c4223168, c421f168, c421b168, c4217168, } +VMLS_LANE:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLA_N output: +VMLA_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:1:result_int16x4 [] = { 595, 596, 597, 598, } +VMLA_N:2:result_int32x2 [] = { b3a, b3b, } +VMLA_N:3:result_int64x1 [] = { 3333333333333333, } +VMLA_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:5:result_uint16x4 [] = { 10df, 10e0, 10e1, 10e2, } +VMLA_N:6:result_uint32x2 [] = { 1684, 1685, } +VMLA_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLA_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLA_N:10:result_float32x2 [] = { 4497deb8, 4497feb8, } +VMLA_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLA_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:13:result_int16x8 [] = { 1c29, 1c2a, 1c2b, 1c2c, 1c2d, 1c2e, 1c2f, 1c30, } +VMLA_N:14:result_int32x4 [] = { 21ce, 21cf, 21d0, 21d1, } +VMLA_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:17:result_uint16x8 [] = { 2773, 2774, 2775, 2776, 2777, 2778, 2779, 277a, } +VMLA_N:18:result_uint32x4 [] = { 2d18, 2d19, 2d1a, 2d1b, } +VMLA_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLA_N:22:result_float32x4 [] = { 4568087b, 4568187b, 4568287b, 4568387b, } +VMLA_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMLS_N output: +VMLS_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:1:result_int16x4 [] = { fffffa4b, fffffa4c, fffffa4d, fffffa4e, } +VMLS_N:2:result_int32x2 [] = { fffff4a6, fffff4a7, } +VMLS_N:3:result_int64x1 [] = { 3333333333333333, } +VMLS_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:5:result_uint16x4 [] = { ef01, ef02, ef03, ef04, } +VMLS_N:6:result_uint32x2 [] = { ffffe95c, ffffe95d, } +VMLS_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLS_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMLS_N:10:result_float32x2 [] = { c49bdeb8, c49bbeb8, } +VMLS_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMLS_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:13:result_int16x8 [] = { ffffe3b7, ffffe3b8, ffffe3b9, ffffe3ba, ffffe3bb, ffffe3bc, ffffe3bd, ffffe3be, } +VMLS_N:14:result_int32x4 [] = { ffffde12, ffffde13, ffffde14, ffffde15, } +VMLS_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:17:result_uint16x8 [] = { d86d, d86e, d86f, d870, d871, d872, d873, d874, } +VMLS_N:18:result_uint32x4 [] = { ffffd2c8, ffffd2c9, ffffd2ca, ffffd2cb, } +VMLS_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLS_N:22:result_float32x4 [] = { c56a087b, c569f87b, c569e87b, c569d87b, } +VMLS_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSLI_N output: +VSLI_N:0:result_int8x8 [] = { 20, 21, 22, 23, 24, 25, 26, 27, } +VSLI_N:1:result_int16x4 [] = { ffffffe0, ffffffe1, ffffffe2, ffffffe3, } +VSLI_N:2:result_int32x2 [] = { 6, 7, } +VSLI_N:3:result_int64x1 [] = { 64fffffff0, } +VSLI_N:4:result_uint8x8 [] = { 50, 51, 52, 53, 50, 51, 52, 53, } +VSLI_N:5:result_uint16x4 [] = { 7bf0, 7bf1, 7bf2, 7bf3, } +VSLI_N:6:result_uint32x2 [] = { 3ffffff0, 3ffffff1, } +VSLI_N:7:result_uint64x1 [] = { 10, } +VSLI_N:8:result_poly8x8 [] = { 50, 51, 52, 53, 50, 51, 52, 53, } +VSLI_N:9:result_poly16x4 [] = { 7bf0, 7bf1, 7bf2, 7bf3, } +VSLI_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSLI_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSLI_N:12:result_int8x16 [] = { ffffffd0, ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, ffffffd6, ffffffd7, ffffffd8, ffffffd9, ffffffda, ffffffdb, ffffffdc, ffffffdd, ffffffde, ffffffdf, } +VSLI_N:13:result_int16x8 [] = { ffffff60, ffffff61, ffffff62, ffffff63, ffffff64, ffffff65, ffffff66, ffffff67, } +VSLI_N:14:result_int32x4 [] = { fe2ffff0, fe2ffff1, fe2ffff2, fe2ffff3, } +VSLI_N:15:result_int64x2 [] = { 18fff0, 18fff1, } +VSLI_N:16:result_uint8x16 [] = { 60, 61, 62, 63, 64, 65, 66, 67, 60, 61, 62, 63, 64, 65, 66, 67, } +VSLI_N:17:result_uint16x8 [] = { 3ff0, 3ff1, 3ff2, 3ff3, 3ff4, 3ff5, 3ff6, 3ff7, } +VSLI_N:18:result_uint32x4 [] = { 1bfffff0, 1bfffff1, 1bfffff2, 1bfffff3, } +VSLI_N:19:result_uint64x2 [] = { 7ffffffffffff0, 7ffffffffffff1, } +VSLI_N:20:result_poly8x16 [] = { 60, 61, 62, 63, 64, 65, 66, 67, 60, 61, 62, 63, 64, 65, 66, 67, } +VSLI_N:21:result_poly16x8 [] = { 3ff0, 3ff1, 3ff2, 3ff3, 3ff4, 3ff5, 3ff6, 3ff7, } +VSLI_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSLI_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSLI_Nmax shift amount output: +VSLI_N:0:result_int8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, } +VSLI_N:1:result_int16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, } +VSLI_N:2:result_int32x2 [] = { fffffff0, fffffff1, } +VSLI_N:3:result_int64x1 [] = { 7ffffffffffffff0, } +VSLI_N:4:result_uint8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, } +VSLI_N:5:result_uint16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, } +VSLI_N:6:result_uint32x2 [] = { 7ffffff0, 7ffffff1, } +VSLI_N:7:result_uint64x1 [] = { 7ffffffffffffff0, } +VSLI_N:8:result_poly8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, } +VSLI_N:9:result_poly16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, } +VSLI_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSLI_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSLI_N:12:result_int8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, } +VSLI_N:13:result_int16x8 [] = { 7ff0, 7ff1, 7ff2, 7ff3, 7ff4, 7ff5, 7ff6, 7ff7, } +VSLI_N:14:result_int32x4 [] = { 7ffffff0, 7ffffff1, 7ffffff2, 7ffffff3, } +VSLI_N:15:result_int64x2 [] = { 7ffffffffffffff0, 7ffffffffffffff1, } +VSLI_N:16:result_uint8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, } +VSLI_N:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSLI_N:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSLI_N:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSLI_N:20:result_poly8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, } +VSLI_N:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSLI_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSLI_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSRI_N output: +VSRI_N:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VSRI_N:1:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VSRI_N:2:result_int32x2 [] = { 80000001, 80000001, } +VSRI_N:3:result_int64x1 [] = { ffffffff00000000, } +VSRI_N:4:result_uint8x8 [] = { c5, c5, c5, c5, c5, c5, c5, c5, } +VSRI_N:5:result_uint16x4 [] = { ffc0, ffc0, ffc0, ffc0, } +VSRI_N:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VSRI_N:7:result_uint64x1 [] = { e000000000000000, } +VSRI_N:8:result_poly8x8 [] = { c5, c5, c5, c5, c5, c5, c5, c5, } +VSRI_N:9:result_poly16x4 [] = { ffc0, ffc0, ffc0, ffc0, } +VSRI_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSRI_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSRI_N:12:result_int8x16 [] = { fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VSRI_N:13:result_int16x8 [] = { fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, } +VSRI_N:14:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VSRI_N:15:result_int64x2 [] = { ffff000000000000, ffff000000000000, } +VSRI_N:16:result_uint8x16 [] = { e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, } +VSRI_N:17:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VSRI_N:18:result_uint32x4 [] = { fffffe00, fffffe00, fffffe00, fffffe00, } +VSRI_N:19:result_uint64x2 [] = { fffffffffffff800, fffffffffffff800, } +VSRI_N:20:result_poly8x16 [] = { e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, } +VSRI_N:21:result_poly16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VSRI_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSRI_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSRI_N max shift amount output: +VSRI_N:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VSRI_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRI_N:2:result_int32x2 [] = { fffffff0, fffffff1, } +VSRI_N:3:result_int64x1 [] = { fffffffffffffff0, } +VSRI_N:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VSRI_N:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VSRI_N:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VSRI_N:7:result_uint64x1 [] = { fffffffffffffff0, } +VSRI_N:8:result_poly8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VSRI_N:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VSRI_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSRI_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSRI_N:12:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VSRI_N:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VSRI_N:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRI_N:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRI_N:16:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VSRI_N:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSRI_N:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRI_N:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRI_N:20:result_poly8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VSRI_N:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSRI_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSRI_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTST/VTSTQ (signed input) output: +VTST/VTSTQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:2:result_int32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:3:result_int64x1 [] = { 3333333333333333, } +VTST/VTSTQ:4:result_uint8x8 [] = { 0, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:5:result_uint16x4 [] = { 0, ffff, 0, ffff, } +VTST/VTSTQ:6:result_uint32x2 [] = { 0, ffffffff, } +VTST/VTSTQ:7:result_uint64x1 [] = { 3333333333333333, } +VTST/VTSTQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:10:result_float32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTST/VTSTQ:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTST/VTSTQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:16:result_uint8x16 [] = { 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:17:result_uint16x8 [] = { 0, ffff, 0, ffff, ffff, ffff, ffff, ffff, } +VTST/VTSTQ:18:result_uint32x4 [] = { 0, ffffffff, 0, ffffffff, } +VTST/VTSTQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTST/VTSTQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTST/VTSTQ (unsigned input) output: +VTST/VTSTQ:24:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:25:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:26:result_int32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:27:result_int64x1 [] = { 3333333333333333, } +VTST/VTSTQ:28:result_uint8x8 [] = { 0, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:29:result_uint16x4 [] = { 0, ffff, 0, ffff, } +VTST/VTSTQ:30:result_uint32x2 [] = { 0, ffffffff, } +VTST/VTSTQ:31:result_uint64x1 [] = { 3333333333333333, } +VTST/VTSTQ:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:34:result_float32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VTST/VTSTQ:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:37:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:38:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTST/VTSTQ:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:40:result_uint8x16 [] = { 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:41:result_uint16x8 [] = { 0, ffff, 0, ffff, ffff, ffff, ffff, ffff, } +VTST/VTSTQ:42:result_uint32x4 [] = { 0, ffffffff, 0, ffffffff, } +VTST/VTSTQ:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTST/VTSTQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VADDHN output: +VADDHN:0:result_int8x8 [] = { 32, 32, 32, 32, 32, 32, 32, 32, } +VADDHN:1:result_int16x4 [] = { 32, 32, 32, 32, } +VADDHN:2:result_int32x2 [] = { 18, 18, } +VADDHN:3:result_int64x1 [] = { 3333333333333333, } +VADDHN:4:result_uint8x8 [] = { 3, 3, 3, 3, 3, 3, 3, 3, } +VADDHN:5:result_uint16x4 [] = { 37, 37, 37, 37, } +VADDHN:6:result_uint32x2 [] = { 3, 3, } +VADDHN:7:result_uint64x1 [] = { 3333333333333333, } +VADDHN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VADDHN:10:result_float32x2 [] = { 33333333, 33333333, } +VADDHN:11:result_float16x4 [] = { 0, 0, 0, 0, } +VADDHN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDHN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDHN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VADDHN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDHN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDHN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VADDHN:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDHN:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDHN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRADDHN output: +VRADDHN:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:1:result_int16x4 [] = { 33, 33, 33, 33, } +VRADDHN:2:result_int32x2 [] = { 19, 19, } +VRADDHN:3:result_int64x1 [] = { 3333333333333333, } +VRADDHN:4:result_uint8x8 [] = { 4, 4, 4, 4, 4, 4, 4, 4, } +VRADDHN:5:result_uint16x4 [] = { 38, 38, 38, 38, } +VRADDHN:6:result_uint32x2 [] = { 4, 4, } +VRADDHN:7:result_uint64x1 [] = { 3333333333333333, } +VRADDHN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRADDHN:10:result_float32x2 [] = { 33333333, 33333333, } +VRADDHN:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRADDHN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRADDHN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRADDHN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRADDHN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRADDHN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRADDHN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRADDHN:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRADDHN:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRADDHN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VADDL output: +VADDL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VADDL:2:result_int32x2 [] = { 33333333, 33333333, } +VADDL:3:result_int64x1 [] = { 3333333333333333, } +VADDL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VADDL:6:result_uint32x2 [] = { 33333333, 33333333, } +VADDL:7:result_uint64x1 [] = { 3333333333333333, } +VADDL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VADDL:10:result_float32x2 [] = { 33333333, 33333333, } +VADDL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VADDL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:13:result_int16x8 [] = { ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, } +VADDL:14:result_int32x4 [] = { ffffffe2, ffffffe3, ffffffe4, ffffffe5, } +VADDL:15:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1, } +VADDL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:17:result_uint16x8 [] = { 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1ea, } +VADDL:18:result_uint32x4 [] = { 1ffe1, 1ffe2, 1ffe3, 1ffe4, } +VADDL:19:result_uint64x2 [] = { 1ffffffe0, 1ffffffe1, } +VADDL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VADDW output: +VADDW:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VADDW:2:result_int32x2 [] = { 33333333, 33333333, } +VADDW:3:result_int64x1 [] = { 3333333333333333, } +VADDW:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VADDW:6:result_uint32x2 [] = { 33333333, 33333333, } +VADDW:7:result_uint64x1 [] = { 3333333333333333, } +VADDW:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VADDW:10:result_float32x2 [] = { 33333333, 33333333, } +VADDW:11:result_float16x4 [] = { 0, 0, 0, 0, } +VADDW:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:13:result_int16x8 [] = { ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, } +VADDW:14:result_int32x4 [] = { ffffffe2, ffffffe3, ffffffe4, ffffffe5, } +VADDW:15:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1, } +VADDW:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:17:result_uint16x8 [] = { e3, e4, e5, e6, e7, e8, e9, ea, } +VADDW:18:result_uint32x4 [] = { ffe1, ffe2, ffe3, ffe4, } +VADDW:19:result_uint64x2 [] = { ffffffe0, ffffffe1, } +VADDW:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDW:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDW:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VHADD/VHADDQ output: +VHADD/VHADDQ:0:result_int8x8 [] = { fffffff1, fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, } +VHADD/VHADDQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2, } +VHADD/VHADDQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VHADD/VHADDQ:3:result_int64x1 [] = { 3333333333333333, } +VHADD/VHADDQ:4:result_uint8x8 [] = { f1, f2, f2, f3, f3, f4, f4, f5, } +VHADD/VHADDQ:5:result_uint16x4 [] = { fff0, fff1, fff1, fff2, } +VHADD/VHADDQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VHADD/VHADDQ:7:result_uint64x1 [] = { 3333333333333333, } +VHADD/VHADDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VHADD/VHADDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VHADD/VHADDQ:10:result_float32x2 [] = { 33333333, 33333333, } +VHADD/VHADDQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VHADD/VHADDQ:12:result_int8x16 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, fffffff6, fffffff6, fffffff7, fffffff7, fffffff8, fffffff8, fffffff9, fffffff9, } +VHADD/VHADDQ:13:result_int16x8 [] = { fffffff1, fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, } +VHADD/VHADDQ:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff2, } +VHADD/VHADDQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VHADD/VHADDQ:16:result_uint8x16 [] = { f4, f5, f5, f6, f6, f7, f7, f8, f8, f9, f9, fa, fa, fb, fb, fc, } +VHADD/VHADDQ:17:result_uint16x8 [] = { fff1, fff1, fff2, fff2, fff3, fff3, fff4, fff4, } +VHADD/VHADDQ:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff2, } +VHADD/VHADDQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VHADD/VHADDQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VHADD/VHADDQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VHADD/VHADDQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VHADD/VHADDQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRHADD/VRHADDQ output: +VRHADD/VRHADDQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, } +VRHADD/VRHADDQ:1:result_int16x4 [] = { fffffff1, fffffff2, fffffff2, fffffff3, } +VRHADD/VRHADDQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VRHADD/VRHADDQ:3:result_int64x1 [] = { 3333333333333333, } +VRHADD/VRHADDQ:4:result_uint8x8 [] = { f2, f2, f3, f3, f4, f4, f5, f5, } +VRHADD/VRHADDQ:5:result_uint16x4 [] = { fff1, fff1, fff2, fff2, } +VRHADD/VRHADDQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VRHADD/VRHADDQ:7:result_uint64x1 [] = { 3333333333333333, } +VRHADD/VRHADDQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRHADD/VRHADDQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRHADD/VRHADDQ:10:result_float32x2 [] = { 33333333, 33333333, } +VRHADD/VRHADDQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRHADD/VRHADDQ:12:result_int8x16 [] = { fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, fffffff6, fffffff6, fffffff7, fffffff7, fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, } +VRHADD/VRHADDQ:13:result_int16x8 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, } +VRHADD/VRHADDQ:14:result_int32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2, } +VRHADD/VRHADDQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRHADD/VRHADDQ:16:result_uint8x16 [] = { f5, f5, f6, f6, f7, f7, f8, f8, f9, f9, fa, fa, fb, fb, fc, fc, } +VRHADD/VRHADDQ:17:result_uint16x8 [] = { fff1, fff2, fff2, fff3, fff3, fff4, fff4, fff5, } +VRHADD/VRHADDQ:18:result_uint32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2, } +VRHADD/VRHADDQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRHADD/VRHADDQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRHADD/VRHADDQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRHADD/VRHADDQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRHADD/VRHADDQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VHSUB/VHSUBQ output: +VHSUB/VHSUBQ:0:result_int8x8 [] = { fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, } +VHSUB/VHSUBQ:1:result_int16x4 [] = { ffffffff, ffffffff, 0, 0, } +VHSUB/VHSUBQ:2:result_int32x2 [] = { 0, 0, } +VHSUB/VHSUBQ:3:result_int64x1 [] = { 3333333333333333, } +VHSUB/VHSUBQ:4:result_uint8x8 [] = { fe, ff, ff, 0, 0, 1, 1, 2, } +VHSUB/VHSUBQ:5:result_uint16x4 [] = { ffff, 0, 0, 1, } +VHSUB/VHSUBQ:6:result_uint32x2 [] = { 0, 0, } +VHSUB/VHSUBQ:7:result_uint64x1 [] = { 3333333333333333, } +VHSUB/VHSUBQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VHSUB/VHSUBQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VHSUB/VHSUBQ:10:result_float32x2 [] = { 33333333, 33333333, } +VHSUB/VHSUBQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VHSUB/VHSUBQ:12:result_int8x16 [] = { fffffffe, fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, } +VHSUB/VHSUBQ:13:result_int16x8 [] = { fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, } +VHSUB/VHSUBQ:14:result_int32x4 [] = { ffffffff, 0, 0, 1, } +VHSUB/VHSUBQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VHSUB/VHSUBQ:16:result_uint8x16 [] = { fb, fc, fc, fd, fd, fe, fe, ff, ff, 0, 0, 1, 1, 2, 2, 3, } +VHSUB/VHSUBQ:17:result_uint16x8 [] = { ffff, ffff, 0, 0, 1, 1, 2, 2, } +VHSUB/VHSUBQ:18:result_uint32x4 [] = { ffffffff, 0, 0, 1, } +VHSUB/VHSUBQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VHSUB/VHSUBQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VHSUB/VHSUBQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VHSUB/VHSUBQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VHSUB/VHSUBQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSUBL output: +VSUBL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBL:2:result_int32x2 [] = { 33333333, 33333333, } +VSUBL:3:result_int64x1 [] = { 3333333333333333, } +VSUBL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBL:6:result_uint32x2 [] = { 33333333, 33333333, } +VSUBL:7:result_uint64x1 [] = { 3333333333333333, } +VSUBL:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBL:10:result_float32x2 [] = { 33333333, 33333333, } +VSUBL:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSUBL:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:13:result_int16x8 [] = { fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, } +VSUBL:14:result_int32x4 [] = { fffffffe, ffffffff, 0, 1, } +VSUBL:15:result_int64x2 [] = { 0, 1, } +VSUBL:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:17:result_uint16x8 [] = { fffd, fffe, ffff, 0, 1, 2, 3, 4, } +VSUBL:18:result_uint32x4 [] = { ffffffff, 0, 1, 2, } +VSUBL:19:result_uint64x2 [] = { 0, 1, } +VSUBL:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBL:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBL:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSUBW output: +VSUBW:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBW:2:result_int32x2 [] = { 33333333, 33333333, } +VSUBW:3:result_int64x1 [] = { 3333333333333333, } +VSUBW:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBW:6:result_uint32x2 [] = { 33333333, 33333333, } +VSUBW:7:result_uint64x1 [] = { 3333333333333333, } +VSUBW:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBW:10:result_float32x2 [] = { 33333333, 33333333, } +VSUBW:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSUBW:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:13:result_int16x8 [] = { fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, } +VSUBW:14:result_int32x4 [] = { fffffffe, ffffffff, 0, 1, } +VSUBW:15:result_int64x2 [] = { 0, 1, } +VSUBW:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:17:result_uint16x8 [] = { fefd, fefe, feff, ff00, ff01, ff02, ff03, ff04, } +VSUBW:18:result_uint32x4 [] = { fffeffff, ffff0000, ffff0001, ffff0002, } +VSUBW:19:result_uint64x2 [] = { ffffffff00000000, ffffffff00000001, } +VSUBW:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBW:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBW:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSUBHN output: +VSUBHN:0:result_int8x8 [] = { 31, 31, 31, 31, 31, 31, 31, 31, } +VSUBHN:1:result_int16x4 [] = { 31, 31, 31, 31, } +VSUBHN:2:result_int32x2 [] = { 17, 17, } +VSUBHN:3:result_int64x1 [] = { 3333333333333333, } +VSUBHN:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VSUBHN:5:result_uint16x4 [] = { 36, 36, 36, 36, } +VSUBHN:6:result_uint32x2 [] = { 2, 2, } +VSUBHN:7:result_uint64x1 [] = { 3333333333333333, } +VSUBHN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBHN:10:result_float32x2 [] = { 33333333, 33333333, } +VSUBHN:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSUBHN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBHN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBHN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VSUBHN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBHN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBHN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VSUBHN:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBHN:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBHN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSUBHN output: +VRSUBHN:0:result_int8x8 [] = { 31, 31, 31, 31, 31, 31, 31, 31, } +VRSUBHN:1:result_int16x4 [] = { 31, 31, 31, 31, } +VRSUBHN:2:result_int32x2 [] = { 17, 17, } +VRSUBHN:3:result_int64x1 [] = { 3333333333333333, } +VRSUBHN:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VRSUBHN:5:result_uint16x4 [] = { 36, 36, 36, 36, } +VRSUBHN:6:result_uint32x2 [] = { 2, 2, } +VRSUBHN:7:result_uint64x1 [] = { 3333333333333333, } +VRSUBHN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSUBHN:10:result_float32x2 [] = { 33333333, 33333333, } +VRSUBHN:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRSUBHN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSUBHN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSUBHN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSUBHN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSUBHN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSUBHN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSUBHN:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSUBHN:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSUBHN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VMVN/VMVNQ output: +VMVN/VMVNQ:0:result_int8x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:1:result_int16x4 [] = { f, e, d, c, } +VMVN/VMVNQ:2:result_int32x2 [] = { f, e, } +VMVN/VMVNQ:3:result_int64x1 [] = { 3333333333333333, } +VMVN/VMVNQ:4:result_uint8x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:5:result_uint16x4 [] = { f, e, d, c, } +VMVN/VMVNQ:6:result_uint32x2 [] = { f, e, } +VMVN/VMVNQ:7:result_uint64x1 [] = { 3333333333333333, } +VMVN/VMVNQ:8:result_poly8x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VMVN/VMVNQ:10:result_float32x2 [] = { 33333333, 33333333, } +VMVN/VMVNQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VMVN/VMVNQ:12:result_int8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, } +VMVN/VMVNQ:13:result_int16x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:14:result_int32x4 [] = { f, e, d, c, } +VMVN/VMVNQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMVN/VMVNQ:16:result_uint8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, } +VMVN/VMVNQ:17:result_uint16x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:18:result_uint32x4 [] = { f, e, d, c, } +VMVN/VMVNQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMVN/VMVNQ:20:result_poly8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, } +VMVN/VMVNQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMVN/VMVNQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMVN/VMVNQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQMOVN cumulative saturation output: +VQMOVN:0:vqmovn_s16 Neon cumulative saturation 0 +VQMOVN:1:vqmovn_s32 Neon cumulative saturation 0 +VQMOVN:2:vqmovn_s64 Neon cumulative saturation 0 +VQMOVN:3:vqmovn_u16 Neon cumulative saturation 0 +VQMOVN:4:vqmovn_u32 Neon cumulative saturation 0 +VQMOVN:5:vqmovn_u64 Neon cumulative saturation 0 + +VQMOVN output: +VQMOVN:6:result_int8x8 [] = { 12, 12, 12, 12, 12, 12, 12, 12, } +VQMOVN:7:result_int16x4 [] = { 1278, 1278, 1278, 1278, } +VQMOVN:8:result_int32x2 [] = { 12345678, 12345678, } +VQMOVN:9:result_int64x1 [] = { 3333333333333333, } +VQMOVN:10:result_uint8x8 [] = { 82, 82, 82, 82, 82, 82, 82, 82, } +VQMOVN:11:result_uint16x4 [] = { 8765, 8765, 8765, 8765, } +VQMOVN:12:result_uint32x2 [] = { 87654321, 87654321, } +VQMOVN:13:result_uint64x1 [] = { 3333333333333333, } +VQMOVN:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVN:16:result_float32x2 [] = { 33333333, 33333333, } +VQMOVN:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQMOVN:18:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:19:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:20:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQMOVN cumulative saturation output: +VQMOVN:30:vqmovn_s16 Neon cumulative saturation 1 +VQMOVN:31:vqmovn_s32 Neon cumulative saturation 1 +VQMOVN:32:vqmovn_s64 Neon cumulative saturation 1 +VQMOVN:33:vqmovn_u16 Neon cumulative saturation 1 +VQMOVN:34:vqmovn_u32 Neon cumulative saturation 1 +VQMOVN:35:vqmovn_u64 Neon cumulative saturation 1 + +VQMOVN output: +VQMOVN:36:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQMOVN:37:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQMOVN:38:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQMOVN:39:result_int64x1 [] = { 3333333333333333, } +VQMOVN:40:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQMOVN:41:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQMOVN:42:result_uint32x2 [] = { ffffffff, ffffffff, } +VQMOVN:43:result_uint64x1 [] = { 3333333333333333, } +VQMOVN:44:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:45:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVN:46:result_float32x2 [] = { 33333333, 33333333, } +VQMOVN:47:result_float16x4 [] = { 0, 0, 0, 0, } +VQMOVN:48:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:49:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:50:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:51:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:52:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:53:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:54:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:55:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:56:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:57:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:58:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:59:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQMOVUN cumulative saturation output: +VQMOVUN:0:vqmovun_s16 Neon cumulative saturation 0 +VQMOVUN:1:vqmovun_s32 Neon cumulative saturation 0 +VQMOVUN:2:vqmovun_s64 Neon cumulative saturation 0 + +VQMOVUN output: +VQMOVUN:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:4:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:5:result_int32x2 [] = { 33333333, 33333333, } +VQMOVUN:6:result_int64x1 [] = { 3333333333333333, } +VQMOVUN:7:result_uint8x8 [] = { 34, 34, 34, 34, 34, 34, 34, 34, } +VQMOVUN:8:result_uint16x4 [] = { 5678, 5678, 5678, 5678, } +VQMOVUN:9:result_uint32x2 [] = { 12345678, 12345678, } +VQMOVUN:10:result_uint64x1 [] = { 3333333333333333, } +VQMOVUN:11:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:12:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:13:result_float32x2 [] = { 33333333, 33333333, } +VQMOVUN:14:result_float16x4 [] = { 0, 0, 0, 0, } +VQMOVUN:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:16:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:17:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:25:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:26:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQMOVUN (negative input) cumulative saturation output: +VQMOVUN:27:vqmovun_s16 Neon cumulative saturation 1 +VQMOVUN:28:vqmovun_s32 Neon cumulative saturation 1 +VQMOVUN:29:vqmovun_s64 Neon cumulative saturation 1 + +VQMOVUN (negative input) output: +VQMOVUN:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:31:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:32:result_int32x2 [] = { 33333333, 33333333, } +VQMOVUN:33:result_int64x1 [] = { 3333333333333333, } +VQMOVUN:34:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQMOVUN:35:result_uint16x4 [] = { 0, 0, 0, 0, } +VQMOVUN:36:result_uint32x2 [] = { 0, 0, } +VQMOVUN:37:result_uint64x1 [] = { 3333333333333333, } +VQMOVUN:38:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:39:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:40:result_float32x2 [] = { 33333333, 33333333, } +VQMOVUN:41:result_float16x4 [] = { 0, 0, 0, 0, } +VQMOVUN:42:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:43:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:44:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:45:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:46:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:47:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:48:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:49:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:50:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:51:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:52:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:53:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHR_N output: +VRSHR_N:0:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VRSHR_N:1:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:2:result_int32x2 [] = { fffffffc, fffffffc, } +VRSHR_N:3:result_int64x1 [] = { 0, } +VRSHR_N:4:result_uint8x8 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, } +VRSHR_N:5:result_uint16x4 [] = { 1ffe, 1ffe, 1ffe, 1ffe, } +VRSHR_N:6:result_uint32x2 [] = { 8000000, 8000000, } +VRSHR_N:7:result_uint64x1 [] = { 80000000, } +VRSHR_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:10:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:12:result_int8x16 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, fffffffc, fffffffd, fffffffd, fffffffe, fffffffe, ffffffff, ffffffff, 0, } +VRSHR_N:13:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:14:result_int32x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd, } +VRSHR_N:15:result_int64x2 [] = { 0, 0, } +VRSHR_N:16:result_uint8x16 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, 40, 40, } +VRSHR_N:17:result_uint16x8 [] = { 1ffe, 1ffe, 1ffe, 1ffe, 1fff, 1fff, 1fff, 1fff, } +VRSHR_N:18:result_uint32x4 [] = { 8000000, 8000000, 8000000, 8000000, } +VRSHR_N:19:result_uint64x2 [] = { 80000000, 80000000, } +VRSHR_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHR_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHR_N (overflow test: max shift amount, positive input) output: +VRSHR_N:24:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:25:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:26:result_int32x2 [] = { 0, 0, } +VRSHR_N:27:result_int64x1 [] = { 0, } +VRSHR_N:28:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHR_N:29:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSHR_N:30:result_uint32x2 [] = { 1, 1, } +VRSHR_N:31:result_uint64x1 [] = { 1, } +VRSHR_N:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:34:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:35:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:36:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:37:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:38:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHR_N:39:result_int64x2 [] = { 0, 0, } +VRSHR_N:40:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHR_N:41:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHR_N:42:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSHR_N:43:result_uint64x2 [] = { 1, 1, } +VRSHR_N:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHR_N:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHR_N (overflow test: shift by 1, with negative input) output: +VRSHR_N:48:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHR_N:49:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VRSHR_N:50:result_int32x2 [] = { 40000000, 40000000, } +VRSHR_N:51:result_int64x1 [] = { 4000000000000000, } +VRSHR_N:52:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:53:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSHR_N:54:result_uint32x2 [] = { 80000000, 80000000, } +VRSHR_N:55:result_uint64x1 [] = { 8000000000000000, } +VRSHR_N:56:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:57:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:58:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:59:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:60:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHR_N:61:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VRSHR_N:62:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VRSHR_N:63:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VRSHR_N:64:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:65:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSHR_N:66:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSHR_N:67:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSHR_N:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHR_N:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHR_N (overflow test: shift by 3, positive input) output: +VRSHR_N:72:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHR_N:73:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VRSHR_N:74:result_int32x2 [] = { 10000000, 10000000, } +VRSHR_N:75:result_int64x1 [] = { 1000000000000000, } +VRSHR_N:76:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:77:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSHR_N:78:result_uint32x2 [] = { 20000000, 20000000, } +VRSHR_N:79:result_uint64x1 [] = { 2000000000000000, } +VRSHR_N:80:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:81:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:82:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:83:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:84:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHR_N:85:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VRSHR_N:86:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VRSHR_N:87:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VRSHR_N:88:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:89:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSHR_N:90:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSHR_N:91:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSHR_N:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:93:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:94:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHR_N:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHR_N (overflow test: shift by 1, with negative input) output: +VRSHR_N:96:result_int8x8 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSHR_N:97:result_int16x4 [] = { ffffc000, ffffc000, ffffc000, ffffc000, } +VRSHR_N:98:result_int32x2 [] = { c0000000, c0000000, } +VRSHR_N:99:result_int64x1 [] = { c000000000000000, } +VRSHR_N:100:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:101:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSHR_N:102:result_uint32x2 [] = { 80000000, 80000000, } +VRSHR_N:103:result_uint64x1 [] = { 8000000000000000, } +VRSHR_N:104:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:105:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:106:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:107:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:108:result_int8x16 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSHR_N:109:result_int16x8 [] = { ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, } +VRSHR_N:110:result_int32x4 [] = { c0000000, c0000000, c0000000, c0000000, } +VRSHR_N:111:result_int64x2 [] = { c000000000000000, c000000000000000, } +VRSHR_N:112:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:113:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSHR_N:114:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSHR_N:115:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSHR_N:116:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:118:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHR_N:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSHR_N (overflow test: shift by 3, with negative input) output: +VRSHR_N:120:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSHR_N:121:result_int16x4 [] = { fffff000, fffff000, fffff000, fffff000, } +VRSHR_N:122:result_int32x2 [] = { f0000000, f0000000, } +VRSHR_N:123:result_int64x1 [] = { f000000000000000, } +VRSHR_N:124:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:125:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSHR_N:126:result_uint32x2 [] = { 20000000, 20000000, } +VRSHR_N:127:result_uint64x1 [] = { 2000000000000000, } +VRSHR_N:128:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:129:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSHR_N:130:result_float32x2 [] = { 33333333, 33333333, } +VRSHR_N:131:result_float16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:132:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSHR_N:133:result_int16x8 [] = { fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, } +VRSHR_N:134:result_int32x4 [] = { f0000000, f0000000, f0000000, f0000000, } +VRSHR_N:135:result_int64x2 [] = { f000000000000000, f000000000000000, } +VRSHR_N:136:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:137:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSHR_N:138:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSHR_N:139:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSHR_N:140:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHR_N:141:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHR_N:142:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHR_N:143:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N output: +VRSRA_N:0:result_int8x8 [] = { fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, } +VRSRA_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VRSRA_N:2:result_int32x2 [] = { fffffffd, fffffffe, } +VRSRA_N:3:result_int64x1 [] = { fffffffffffffff0, } +VRSRA_N:4:result_uint8x8 [] = { 5, 6, 7, 8, 9, a, b, c, } +VRSRA_N:5:result_uint16x4 [] = { fffd, fffe, ffff, 0, } +VRSRA_N:6:result_uint32x2 [] = { fffffff4, fffffff5, } +VRSRA_N:7:result_uint64x1 [] = { fffffffffffffff0, } +VRSRA_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:10:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:12:result_int8x16 [] = { fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, 8, } +VRSRA_N:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VRSRA_N:14:result_int32x4 [] = { fffffffd, fffffffe, ffffffff, 0, } +VRSRA_N:15:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VRSRA_N:16:result_uint8x16 [] = { 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, 11, 12, 13, 14, } +VRSRA_N:17:result_uint16x8 [] = { fffd, fffe, ffff, 0, 1, 2, 3, 4, } +VRSRA_N:18:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VRSRA_N:19:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VRSRA_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N (checking overflow: shift by 1, positive input) output: +VRSRA_N:24:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VRSRA_N:25:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VRSRA_N:26:result_int32x2 [] = { 40000000, 40000000, } +VRSRA_N:27:result_int64x1 [] = { 4000000000000000, } +VRSRA_N:28:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSRA_N:29:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSRA_N:30:result_uint32x2 [] = { 80000000, 80000000, } +VRSRA_N:31:result_uint64x1 [] = { 8000000000000000, } +VRSRA_N:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:34:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:35:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:36:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VRSRA_N:37:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VRSRA_N:38:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VRSRA_N:39:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VRSRA_N:40:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSRA_N:41:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSRA_N:42:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSRA_N:43:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSRA_N:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N (checking overflow: shift by 3, positive input) output: +VRSRA_N:48:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VRSRA_N:49:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VRSRA_N:50:result_int32x2 [] = { 10000000, 10000000, } +VRSRA_N:51:result_int64x1 [] = { 1000000000000000, } +VRSRA_N:52:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSRA_N:53:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSRA_N:54:result_uint32x2 [] = { 20000000, 20000000, } +VRSRA_N:55:result_uint64x1 [] = { 2000000000000000, } +VRSRA_N:56:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:57:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:58:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:59:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:60:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VRSRA_N:61:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VRSRA_N:62:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VRSRA_N:63:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VRSRA_N:64:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSRA_N:65:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSRA_N:66:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSRA_N:67:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSRA_N:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:69:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:70:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N (checking overflow: shift by max, positive input) output: +VRSRA_N:72:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:73:result_int16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:74:result_int32x2 [] = { 0, 0, } +VRSRA_N:75:result_int64x1 [] = { 0, } +VRSRA_N:76:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:77:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:78:result_uint32x2 [] = { 1, 1, } +VRSRA_N:79:result_uint64x1 [] = { 1, } +VRSRA_N:80:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:81:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:82:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:83:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:84:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:85:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:86:result_int32x4 [] = { 0, 0, 0, 0, } +VRSRA_N:87:result_int64x2 [] = { 0, 0, } +VRSRA_N:88:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:89:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:90:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:91:result_uint64x2 [] = { 1, 1, } +VRSRA_N:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:93:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:94:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N (checking overflow: shift by 1, negative input) output: +VRSRA_N:96:result_int8x8 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSRA_N:97:result_int16x4 [] = { ffffc000, ffffc000, ffffc000, ffffc000, } +VRSRA_N:98:result_int32x2 [] = { c0000000, c0000000, } +VRSRA_N:99:result_int64x1 [] = { c000000000000000, } +VRSRA_N:100:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:101:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:102:result_uint32x2 [] = { 1, 1, } +VRSRA_N:103:result_uint64x1 [] = { 1, } +VRSRA_N:104:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:105:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:106:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:107:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:108:result_int8x16 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSRA_N:109:result_int16x8 [] = { ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, } +VRSRA_N:110:result_int32x4 [] = { c0000000, c0000000, c0000000, c0000000, } +VRSRA_N:111:result_int64x2 [] = { c000000000000000, c000000000000000, } +VRSRA_N:112:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:113:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:114:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:115:result_uint64x2 [] = { 1, 1, } +VRSRA_N:116:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:117:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:118:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:119:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N (checking overflow: shift by max, negative input) output: +VRSRA_N:120:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSRA_N:121:result_int16x4 [] = { fffff000, fffff000, fffff000, fffff000, } +VRSRA_N:122:result_int32x2 [] = { f0000000, f0000000, } +VRSRA_N:123:result_int64x1 [] = { f000000000000000, } +VRSRA_N:124:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:125:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:126:result_uint32x2 [] = { 1, 1, } +VRSRA_N:127:result_uint64x1 [] = { 1, } +VRSRA_N:128:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:129:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:130:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:131:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:132:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSRA_N:133:result_int16x8 [] = { fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, } +VRSRA_N:134:result_int32x4 [] = { f0000000, f0000000, f0000000, f0000000, } +VRSRA_N:135:result_int64x2 [] = { f000000000000000, f000000000000000, } +VRSRA_N:136:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:137:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:138:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:139:result_uint64x2 [] = { 1, 1, } +VRSRA_N:140:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:141:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:142:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:143:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRSRA_N (checking overflow: shift by max, negative input) output: +VRSRA_N:144:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:145:result_int16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:146:result_int32x2 [] = { 0, 0, } +VRSRA_N:147:result_int64x1 [] = { 0, } +VRSRA_N:148:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:149:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:150:result_uint32x2 [] = { 1, 1, } +VRSRA_N:151:result_uint64x1 [] = { 1, } +VRSRA_N:152:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:153:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VRSRA_N:154:result_float32x2 [] = { 33333333, 33333333, } +VRSRA_N:155:result_float16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:156:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:157:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:158:result_int32x4 [] = { 0, 0, 0, 0, } +VRSRA_N:159:result_int64x2 [] = { 0, 0, } +VRSRA_N:160:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:161:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:162:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:163:result_uint64x2 [] = { 1, 1, } +VRSRA_N:164:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSRA_N:165:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSRA_N:166:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSRA_N:167:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VSHLL_N output: +VSHLL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VSHLL_N:2:result_int32x2 [] = { 33333333, 33333333, } +VSHLL_N:3:result_int64x1 [] = { 3333333333333333, } +VSHLL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VSHLL_N:6:result_uint32x2 [] = { 33333333, 33333333, } +VSHLL_N:7:result_uint64x1 [] = { 3333333333333333, } +VSHLL_N:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VSHLL_N:10:result_float32x2 [] = { 33333333, 33333333, } +VSHLL_N:11:result_float16x4 [] = { 0, 0, 0, 0, } +VSHLL_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:13:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHLL_N:14:result_int32x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VSHLL_N:15:result_int64x2 [] = { ffffffffffffff80, ffffffffffffff88, } +VSHLL_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:17:result_uint16x8 [] = { 3c0, 3c4, 3c8, 3cc, 3d0, 3d4, 3d8, 3dc, } +VSHLL_N:18:result_uint32x4 [] = { fff00, fff10, fff20, fff30, } +VSHLL_N:19:result_uint64x2 [] = { 7ffffff80, 7ffffff88, } +VSHLL_N:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHLL_N:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHLL_N:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VPADDL/VPADDLQ output: +VPADDL/VPADDLQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:1:result_int16x4 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, } +VPADDL/VPADDLQ:2:result_int32x2 [] = { ffffffe1, ffffffe5, } +VPADDL/VPADDLQ:3:result_int64x1 [] = { ffffffffffffffe1, } +VPADDL/VPADDLQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:5:result_uint16x4 [] = { 1e1, 1e5, 1e9, 1ed, } +VPADDL/VPADDLQ:6:result_uint32x2 [] = { 1ffe1, 1ffe5, } +VPADDL/VPADDLQ:7:result_uint64x1 [] = { 1ffffffe1, } +VPADDL/VPADDLQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPADDL/VPADDLQ:10:result_float32x2 [] = { 33333333, 33333333, } +VPADDL/VPADDLQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VPADDL/VPADDLQ:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:13:result_int16x8 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, fffffff1, fffffff5, fffffff9, fffffffd, } +VPADDL/VPADDLQ:14:result_int32x4 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, } +VPADDL/VPADDLQ:15:result_int64x2 [] = { ffffffffffffffe1, ffffffffffffffe5, } +VPADDL/VPADDLQ:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:17:result_uint16x8 [] = { 1e1, 1e5, 1e9, 1ed, 1f1, 1f5, 1f9, 1fd, } +VPADDL/VPADDLQ:18:result_uint32x4 [] = { 1ffe1, 1ffe5, 1ffe9, 1ffed, } +VPADDL/VPADDLQ:19:result_uint64x2 [] = { 1ffffffe1, 1ffffffe5, } +VPADDL/VPADDLQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADDL/VPADDLQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADDL/VPADDLQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VPADD output: +VPADD:0:result_int8x8 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, ffffffe1, ffffffe5, ffffffe9, ffffffed, } +VPADD:1:result_int16x4 [] = { ffffffe1, ffffffe5, ffffffe1, ffffffe5, } +VPADD:2:result_int32x2 [] = { ffffffe1, ffffffe1, } +VPADD:3:result_int64x1 [] = { 3333333333333333, } +VPADD:4:result_uint8x8 [] = { e1, e5, e9, ed, e1, e5, e9, ed, } +VPADD:5:result_uint16x4 [] = { ffe1, ffe5, ffe1, ffe5, } +VPADD:6:result_uint32x2 [] = { ffffffe1, ffffffe1, } +VPADD:7:result_uint64x1 [] = { 3333333333333333, } +VPADD:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPADD:10:result_float32x2 [] = { c1f80000, c1f80000, } +VPADD:11:result_float16x4 [] = { 0, 0, 0, 0, } +VPADD:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADD:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADD:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VPADD:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADD:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADD:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VPADD:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADD:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADD:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VPADAL/VPADALQ output: +VPADAL/VPADALQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:1:result_int16x4 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, } +VPADAL/VPADALQ:2:result_int32x2 [] = { ffffffd1, ffffffd6, } +VPADAL/VPADALQ:3:result_int64x1 [] = { ffffffffffffffd1, } +VPADAL/VPADALQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:5:result_uint16x4 [] = { 1d1, 1d6, 1db, 1e0, } +VPADAL/VPADALQ:6:result_uint32x2 [] = { 1ffd1, 1ffd6, } +VPADAL/VPADALQ:7:result_uint64x1 [] = { 1ffffffd1, } +VPADAL/VPADALQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPADAL/VPADALQ:10:result_float32x2 [] = { 33333333, 33333333, } +VPADAL/VPADALQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VPADAL/VPADALQ:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:13:result_int16x8 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, ffffffe5, ffffffea, ffffffef, fffffff4, } +VPADAL/VPADALQ:14:result_int32x4 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, } +VPADAL/VPADALQ:15:result_int64x2 [] = { ffffffffffffffd1, ffffffffffffffd6, } +VPADAL/VPADALQ:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:17:result_uint16x8 [] = { 1d1, 1d6, 1db, 1e0, 1e5, 1ea, 1ef, 1f4, } +VPADAL/VPADALQ:18:result_uint32x4 [] = { 1ffd1, 1ffd6, 1ffdb, 1ffe0, } +VPADAL/VPADALQ:19:result_uint64x2 [] = { 1ffffffd1, 1ffffffd6, } +VPADAL/VPADALQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADAL/VPADALQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADAL/VPADALQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHLU_N/VQSHLUQ_N (negative input) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:0:vqshlu_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:1:vqshlu_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:2:vqshlu_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:3:vqshlu_n_s64 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:4:vqshluq_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:5:vqshluq_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:6:vqshluq_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:7:vqshluq_n_s64 Neon cumulative saturation 1 + +VQSHLU_N/VQSHLUQ_N (negative input) output: +VQSHLU_N/VQSHLUQ_N:8:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:9:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:10:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:11:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:12:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:13:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:14:result_uint32x2 [] = { 0, 0, } +VQSHLU_N/VQSHLUQ_N:15:result_uint64x1 [] = { 0, } +VQSHLU_N/VQSHLUQ_N:16:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:17:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:18:result_float32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:19:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:20:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:21:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:22:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:23:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:24:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:25:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:26:result_uint32x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:27:result_uint64x2 [] = { 0, 0, } +VQSHLU_N/VQSHLUQ_N:28:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:29:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:30:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:31:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:32:vqshlu_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:33:vqshlu_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:34:vqshlu_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:35:vqshlu_n_s64 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:36:vqshluq_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:37:vqshluq_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:38:vqshluq_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:39:vqshluq_n_s64 Neon cumulative saturation 0 + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 1) output: +VQSHLU_N/VQSHLUQ_N:40:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:41:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:42:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:43:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:44:result_uint8x8 [] = { fe, fe, fe, fe, fe, fe, fe, fe, } +VQSHLU_N/VQSHLUQ_N:45:result_uint16x4 [] = { fffe, fffe, fffe, fffe, } +VQSHLU_N/VQSHLUQ_N:46:result_uint32x2 [] = { fffffffe, fffffffe, } +VQSHLU_N/VQSHLUQ_N:47:result_uint64x1 [] = { fffffffffffffffe, } +VQSHLU_N/VQSHLUQ_N:48:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:49:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:50:result_float32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:51:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:52:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:53:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:54:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:55:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:56:result_uint8x16 [] = { fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, } +VQSHLU_N/VQSHLUQ_N:57:result_uint16x8 [] = { fffe, fffe, fffe, fffe, fffe, fffe, fffe, fffe, } +VQSHLU_N/VQSHLUQ_N:58:result_uint32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } +VQSHLU_N/VQSHLUQ_N:59:result_uint64x2 [] = { fffffffffffffffe, fffffffffffffffe, } +VQSHLU_N/VQSHLUQ_N:60:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:61:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:62:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:63:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:64:vqshlu_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:65:vqshlu_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:66:vqshlu_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:67:vqshlu_n_s64 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:68:vqshluq_n_s8 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:69:vqshluq_n_s16 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:70:vqshluq_n_s32 Neon cumulative saturation 1 +VQSHLU_N/VQSHLUQ_N:71:vqshluq_n_s64 Neon cumulative saturation 1 + +VQSHLU_N/VQSHLUQ_N (check cumulative saturation: shift by 2) output: +VQSHLU_N/VQSHLUQ_N:72:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:73:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:74:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:75:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:76:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHLU_N/VQSHLUQ_N:77:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHLU_N/VQSHLUQ_N:78:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHLU_N/VQSHLUQ_N:79:result_uint64x1 [] = { ffffffffffffffff, } +VQSHLU_N/VQSHLUQ_N:80:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:81:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:82:result_float32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:83:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:84:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:85:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:86:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:87:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:88:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHLU_N/VQSHLUQ_N:89:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHLU_N/VQSHLUQ_N:90:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHLU_N/VQSHLUQ_N:91:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHLU_N/VQSHLUQ_N:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:93:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:94:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHLU_N/VQSHLUQ_N cumulative saturation output: +VQSHLU_N/VQSHLUQ_N:96:vqshlu_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:97:vqshlu_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:98:vqshlu_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:99:vqshlu_n_s64 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:100:vqshluq_n_s8 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:101:vqshluq_n_s16 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:102:vqshluq_n_s32 Neon cumulative saturation 0 +VQSHLU_N/VQSHLUQ_N:103:vqshluq_n_s64 Neon cumulative saturation 0 + +VQSHLU_N/VQSHLUQ_N output: +VQSHLU_N/VQSHLUQ_N:104:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:105:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:106:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:107:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:108:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VQSHLU_N/VQSHLUQ_N:109:result_uint16x4 [] = { 8, 8, 8, 8, } +VQSHLU_N/VQSHLUQ_N:110:result_uint32x2 [] = { 18, 18, } +VQSHLU_N/VQSHLUQ_N:111:result_uint64x1 [] = { 40, } +VQSHLU_N/VQSHLUQ_N:112:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:113:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:114:result_float32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:115:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:116:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:117:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:118:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:119:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:120:result_uint8x16 [] = { a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, } +VQSHLU_N/VQSHLUQ_N:121:result_uint16x8 [] = { 180, 180, 180, 180, 180, 180, 180, 180, } +VQSHLU_N/VQSHLUQ_N:122:result_uint32x4 [] = { 380, 380, 380, 380, } +VQSHLU_N/VQSHLUQ_N:123:result_uint64x2 [] = { 800, 800, } +VQSHLU_N/VQSHLUQ_N:124:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:125:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:126:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:127:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCLZ/VCLZQ output: +VCLZ/VCLZQ:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VCLZ/VCLZQ:1:result_int16x4 [] = { 3, 3, 3, 3, } +VCLZ/VCLZQ:2:result_int32x2 [] = { 11, 11, } +VCLZ/VCLZQ:3:result_int64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VCLZ/VCLZQ:5:result_uint16x4 [] = { 0, 0, 0, 0, } +VCLZ/VCLZQ:6:result_uint32x2 [] = { 5, 5, } +VCLZ/VCLZQ:7:result_uint64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLZ/VCLZQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCLZ/VCLZQ:10:result_float32x2 [] = { 33333333, 33333333, } +VCLZ/VCLZQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VCLZ/VCLZQ:12:result_int8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, } +VCLZ/VCLZQ:13:result_int16x8 [] = { 3, 3, 3, 3, 3, 3, 3, 3, } +VCLZ/VCLZQ:14:result_int32x4 [] = { 3, 3, 3, 3, } +VCLZ/VCLZQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:16:result_uint8x16 [] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, } +VCLZ/VCLZQ:17:result_uint16x8 [] = { d, d, d, d, d, d, d, d, } +VCLZ/VCLZQ:18:result_uint32x4 [] = { 1f, 1f, 1f, 1f, } +VCLZ/VCLZQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLZ/VCLZQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLZ/VCLZQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLZ/VCLZQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCLZ/VCLZQ (input=0) output: +VCLZ/VCLZQ:24:result_int8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8, } +VCLZ/VCLZQ:25:result_int16x4 [] = { 10, 10, 10, 10, } +VCLZ/VCLZQ:26:result_int32x2 [] = { 20, 20, } +VCLZ/VCLZQ:27:result_int64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:28:result_uint8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8, } +VCLZ/VCLZQ:29:result_uint16x4 [] = { 10, 10, 10, 10, } +VCLZ/VCLZQ:30:result_uint32x2 [] = { 20, 20, } +VCLZ/VCLZQ:31:result_uint64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLZ/VCLZQ:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCLZ/VCLZQ:34:result_float32x2 [] = { 33333333, 33333333, } +VCLZ/VCLZQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VCLZ/VCLZQ:36:result_int8x16 [] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, } +VCLZ/VCLZQ:37:result_int16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VCLZ/VCLZQ:38:result_int32x4 [] = { 20, 20, 20, 20, } +VCLZ/VCLZQ:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:40:result_uint8x16 [] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, } +VCLZ/VCLZQ:41:result_uint16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VCLZ/VCLZQ:42:result_uint32x4 [] = { 20, 20, 20, 20, } +VCLZ/VCLZQ:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLZ/VCLZQ:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLZ/VCLZQ:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLZ/VCLZQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCLS/VCLSQ (positive input) output: +VCLS/VCLSQ:0:result_int8x8 [] = { 6, 6, 6, 6, 6, 6, 6, 6, } +VCLS/VCLSQ:1:result_int16x4 [] = { 2, 2, 2, 2, } +VCLS/VCLSQ:2:result_int32x2 [] = { 19, 19, } +VCLS/VCLSQ:3:result_int64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:7:result_uint64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:10:result_float32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VCLS/VCLSQ:12:result_int8x16 [] = { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, } +VCLS/VCLSQ:13:result_int16x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VCLS/VCLSQ:14:result_int32x4 [] = { 14, 14, 14, 14, } +VCLS/VCLSQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLS/VCLSQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLS/VCLSQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCLS/VCLSQ (negative input) output: +VCLS/VCLSQ:24:result_int8x8 [] = { 7, 7, 7, 7, 7, 7, 7, 7, } +VCLS/VCLSQ:25:result_int16x4 [] = { 1, 1, 1, 1, } +VCLS/VCLSQ:26:result_int32x2 [] = { 1, 1, } +VCLS/VCLSQ:27:result_int64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:28:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:29:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:30:result_uint32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:31:result_uint64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:32:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:33:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:34:result_float32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:35:result_float16x4 [] = { 0, 0, 0, 0, } +VCLS/VCLSQ:36:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCLS/VCLSQ:37:result_int16x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VCLS/VCLSQ:38:result_int32x4 [] = { 0, 0, 0, 0, } +VCLS/VCLSQ:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:41:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:42:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLS/VCLSQ:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:45:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:46:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLS/VCLSQ:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VCNT/VCNTQ output: +VCNT/VCNTQ:0:result_int8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8, } +VCNT/VCNTQ:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:2:result_int32x2 [] = { 33333333, 33333333, } +VCNT/VCNTQ:3:result_int64x1 [] = { 3333333333333333, } +VCNT/VCNTQ:4:result_uint8x8 [] = { 4, 4, 4, 4, 4, 4, 4, 4, } +VCNT/VCNTQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VCNT/VCNTQ:7:result_uint64x1 [] = { 3333333333333333, } +VCNT/VCNTQ:8:result_poly8x8 [] = { 4, 4, 4, 4, 4, 4, 4, 4, } +VCNT/VCNTQ:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:10:result_float32x2 [] = { 33333333, 33333333, } +VCNT/VCNTQ:11:result_float16x4 [] = { 0, 0, 0, 0, } +VCNT/VCNTQ:12:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCNT/VCNTQ:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCNT/VCNTQ:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCNT/VCNTQ:16:result_uint8x16 [] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, } +VCNT/VCNTQ:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCNT/VCNTQ:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCNT/VCNTQ:20:result_poly8x16 [] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, } +VCNT/VCNTQ:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCNT/VCNTQ:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHRN_N cumulative saturation output: +VQSHRN_N:0:vqshrn_n_s16 Neon cumulative saturation 0 +VQSHRN_N:1:vqshrn_n_s32 Neon cumulative saturation 0 +VQSHRN_N:2:vqshrn_n_s64 Neon cumulative saturation 0 +VQSHRN_N:3:vqshrn_n_u16 Neon cumulative saturation 1 +VQSHRN_N:4:vqshrn_n_u32 Neon cumulative saturation 1 +VQSHRN_N:5:vqshrn_n_u64 Neon cumulative saturation 1 + +VQSHRN_N output: +VQSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VQSHRN_N:7:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9, } +VQSHRN_N:8:result_int32x2 [] = { fffffffc, fffffffc, } +VQSHRN_N:9:result_int64x1 [] = { 3333333333333333, } +VQSHRN_N:10:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRN_N:11:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRN_N:12:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRN_N:13:result_uint64x1 [] = { 3333333333333333, } +VQSHRN_N:14:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:15:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRN_N:16:result_float32x2 [] = { 33333333, 33333333, } +VQSHRN_N:17:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHRN_N:18:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:19:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:20:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:21:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:22:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:23:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:24:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:25:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:26:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:27:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:28:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:29:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHRN_N (check saturation: shift by 3) cumulative saturation output: +VQSHRN_N:30:vqshrn_n_s16 Neon cumulative saturation 1 +VQSHRN_N:31:vqshrn_n_s32 Neon cumulative saturation 1 +VQSHRN_N:32:vqshrn_n_s64 Neon cumulative saturation 1 +VQSHRN_N:33:vqshrn_n_u16 Neon cumulative saturation 1 +VQSHRN_N:34:vqshrn_n_u32 Neon cumulative saturation 1 +VQSHRN_N:35:vqshrn_n_u64 Neon cumulative saturation 1 + +VQSHRN_N (check saturation: shift by 3) output: +VQSHRN_N:36:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHRN_N:37:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHRN_N:38:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHRN_N:39:result_int64x1 [] = { 3333333333333333, } +VQSHRN_N:40:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRN_N:41:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRN_N:42:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRN_N:43:result_uint64x1 [] = { 3333333333333333, } +VQSHRN_N:44:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:45:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRN_N:46:result_float32x2 [] = { 33333333, 33333333, } +VQSHRN_N:47:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHRN_N:48:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:49:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:50:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:51:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:52:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:53:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:54:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:55:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:56:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:57:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:58:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:59:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHRN_N (check saturation: shift by max) cumulative saturation output: +VQSHRN_N:60:vqshrn_n_s16 Neon cumulative saturation 0 +VQSHRN_N:61:vqshrn_n_s32 Neon cumulative saturation 0 +VQSHRN_N:62:vqshrn_n_s64 Neon cumulative saturation 0 +VQSHRN_N:63:vqshrn_n_u16 Neon cumulative saturation 0 +VQSHRN_N:64:vqshrn_n_u32 Neon cumulative saturation 0 +VQSHRN_N:65:vqshrn_n_u64 Neon cumulative saturation 0 + +VQSHRN_N (check saturation: shift by max) output: +VQSHRN_N:66:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHRN_N:67:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHRN_N:68:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHRN_N:69:result_int64x1 [] = { 3333333333333333, } +VQSHRN_N:70:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRN_N:71:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRN_N:72:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRN_N:73:result_uint64x1 [] = { 3333333333333333, } +VQSHRN_N:74:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:75:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRN_N:76:result_float32x2 [] = { 33333333, 33333333, } +VQSHRN_N:77:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHRN_N:78:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:79:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:80:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:81:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:82:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:83:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:84:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:85:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:86:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:87:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:88:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:89:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VPMAX output: +VPMAX:0:result_int8x8 [] = { fffffff1, fffffff3, fffffff5, fffffff7, fffffff1, fffffff3, fffffff5, fffffff7, } +VPMAX:1:result_int16x4 [] = { fffffff1, fffffff3, fffffff1, fffffff3, } +VPMAX:2:result_int32x2 [] = { fffffff1, fffffff1, } +VPMAX:3:result_int64x1 [] = { 3333333333333333, } +VPMAX:4:result_uint8x8 [] = { f1, f3, f5, f7, f1, f3, f5, f7, } +VPMAX:5:result_uint16x4 [] = { fff1, fff3, fff1, fff3, } +VPMAX:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VPMAX:7:result_uint64x1 [] = { 3333333333333333, } +VPMAX:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPMAX:10:result_float32x2 [] = { c1700000, c1700000, } +VPMAX:11:result_float16x4 [] = { 0, 0, 0, 0, } +VPMAX:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMAX:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMAX:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VPMAX:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMAX:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMAX:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VPMAX:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMAX:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMAX:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VPMIN output: +VPMIN:0:result_int8x8 [] = { fffffff0, fffffff2, fffffff4, fffffff6, fffffff0, fffffff2, fffffff4, fffffff6, } +VPMIN:1:result_int16x4 [] = { fffffff0, fffffff2, fffffff0, fffffff2, } +VPMIN:2:result_int32x2 [] = { fffffff0, fffffff0, } +VPMIN:3:result_int64x1 [] = { 3333333333333333, } +VPMIN:4:result_uint8x8 [] = { f0, f2, f4, f6, f0, f2, f4, f6, } +VPMIN:5:result_uint16x4 [] = { fff0, fff2, fff0, fff2, } +VPMIN:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VPMIN:7:result_uint64x1 [] = { 3333333333333333, } +VPMIN:8:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VPMIN:10:result_float32x2 [] = { c1800000, c1800000, } +VPMIN:11:result_float16x4 [] = { 0, 0, 0, 0, } +VPMIN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMIN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMIN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VPMIN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMIN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMIN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VPMIN:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMIN:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMIN:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHRUN_N (negative input) cumulative saturation output: +VQSHRUN_N:0:vqshrun_n_s16 Neon cumulative saturation 1 +VQSHRUN_N:1:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:2:vqshrun_n_s64 Neon cumulative saturation 1 + +VQSHRUN_N (negative input) output: +VQSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:4:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:5:result_int32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:6:result_int64x1 [] = { 3333333333333333, } +VQSHRUN_N:7:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHRUN_N:8:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:9:result_uint32x2 [] = { 0, 0, } +VQSHRUN_N:10:result_uint64x1 [] = { 3333333333333333, } +VQSHRUN_N:11:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:12:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:13:result_float32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:14:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:16:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:17:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:25:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:26:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHRUN_N (check cumulative saturation) cumulative saturation output: +VQSHRUN_N:27:vqshrun_n_s16 Neon cumulative saturation 1 +VQSHRUN_N:28:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:29:vqshrun_n_s64 Neon cumulative saturation 1 + +VQSHRUN_N (check cumulative saturation) output: +VQSHRUN_N:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:31:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:32:result_int32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:33:result_int64x1 [] = { 3333333333333333, } +VQSHRUN_N:34:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRUN_N:35:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRUN_N:36:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRUN_N:37:result_uint64x1 [] = { 3333333333333333, } +VQSHRUN_N:38:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:39:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:40:result_float32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:41:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:42:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:43:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:44:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:45:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:46:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:47:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:48:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:49:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:50:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:51:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:52:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:53:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQSHRUN_N cumulative saturation output: +VQSHRUN_N:54:vqshrun_n_s16 Neon cumulative saturation 0 +VQSHRUN_N:55:vqshrun_n_s32 Neon cumulative saturation 1 +VQSHRUN_N:56:vqshrun_n_s64 Neon cumulative saturation 0 + +VQSHRUN_N output: +VQSHRUN_N:57:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:58:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:59:result_int32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:60:result_int64x1 [] = { 3333333333333333, } +VQSHRUN_N:61:result_uint8x8 [] = { 48, 48, 48, 48, 48, 48, 48, 48, } +VQSHRUN_N:62:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:63:result_uint32x2 [] = { deadbe, deadbe, } +VQSHRUN_N:64:result_uint64x1 [] = { 3333333333333333, } +VQSHRUN_N:65:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:66:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:67:result_float32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:68:result_float16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:69:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:70:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:71:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:72:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:73:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:74:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:75:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:76:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:77:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:78:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:79:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:80:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRUN_N (negative input) cumulative saturation output: +VQRSHRUN_N:0:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:1:vqrshrun_n_s32 Neon cumulative saturation 0 +VQRSHRUN_N:2:vqrshrun_n_s64 Neon cumulative saturation 1 + +VQRSHRUN_N (negative input) output: +VQRSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:4:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:5:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:6:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:7:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHRUN_N:8:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:9:result_uint32x2 [] = { 0, 0, } +VQRSHRUN_N:10:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:11:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:12:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:13:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:14:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:16:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:17:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:23:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:24:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:25:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:26:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRUN_N (check cumulative saturation: shift by 1) cumulative saturation output: +VQRSHRUN_N:27:vqrshrun_n_s16 Neon cumulative saturation 1 +VQRSHRUN_N:28:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:29:vqrshrun_n_s64 Neon cumulative saturation 1 + +VQRSHRUN_N (check cumulative saturation: shift by 1) output: +VQRSHRUN_N:30:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:31:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:32:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:33:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:34:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRUN_N:35:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRUN_N:36:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRUN_N:37:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:38:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:39:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:40:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:41:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:42:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:43:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:44:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:45:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:46:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:47:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:48:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:49:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:50:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:51:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:52:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:53:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) cumulative saturation output: +VQRSHRUN_N:54:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:55:vqrshrun_n_s32 Neon cumulative saturation 0 +VQRSHRUN_N:56:vqrshrun_n_s64 Neon cumulative saturation 0 + +VQRSHRUN_N (check cumulative saturation: shift by max, positive input) output: +VQRSHRUN_N:57:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:58:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:59:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:60:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:61:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VQRSHRUN_N:62:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VQRSHRUN_N:63:result_uint32x2 [] = { 80000000, 80000000, } +VQRSHRUN_N:64:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:65:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:66:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:67:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:68:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:69:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:70:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:71:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:72:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:73:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:74:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:75:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:76:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:77:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:78:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:79:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:80:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) cumulative saturation output: +VQRSHRUN_N:81:vqrshrun_n_s16 Neon cumulative saturation 1 +VQRSHRUN_N:82:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:83:vqrshrun_n_s64 Neon cumulative saturation 1 + +VQRSHRUN_N (check cumulative saturation: shift by max, negative input) output: +VQRSHRUN_N:84:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:85:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:86:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:87:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:88:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHRUN_N:89:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:90:result_uint32x2 [] = { 0, 0, } +VQRSHRUN_N:91:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:92:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:93:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:94:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:95:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:96:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:97:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:98:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:99:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:100:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:101:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:102:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:103:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:104:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:105:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:106:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:107:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VQRSHRUN_N cumulative saturation output: +VQRSHRUN_N:108:vqrshrun_n_s16 Neon cumulative saturation 0 +VQRSHRUN_N:109:vqrshrun_n_s32 Neon cumulative saturation 1 +VQRSHRUN_N:110:vqrshrun_n_s64 Neon cumulative saturation 0 + +VQRSHRUN_N output: +VQRSHRUN_N:111:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:112:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:113:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:114:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:115:result_uint8x8 [] = { 49, 49, 49, 49, 49, 49, 49, 49, } +VQRSHRUN_N:116:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:117:result_uint32x2 [] = { deadbf, deadbf, } +VQRSHRUN_N:118:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:119:result_poly8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:120:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:121:result_float32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:122:result_float16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:123:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:124:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:125:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:126:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:127:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:128:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:129:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:130:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:131:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:132:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:133:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:134:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VST2_LANE/VST2Q_LANE chunk 0 output: +VST2_LANE/VST2Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, 0, 0, } +VST2_LANE/VST2Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VST2_LANE/VST2Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:4:result_uint8x8 [] = { f0, f1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:5:result_uint16x4 [] = { fff0, fff1, 0, 0, } +VST2_LANE/VST2Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VST2_LANE/VST2Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:8:result_poly8x8 [] = { f0, f1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:9:result_poly16x4 [] = { fff0, fff1, 0, 0, } +VST2_LANE/VST2Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VST2_LANE/VST2Q_LANE:11:result_float16x4 [] = { cc00, cb80, 0, 0, } +VST2_LANE/VST2Q_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:13:result_int16x8 [] = { fffffff0, fffffff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:14:result_int32x4 [] = { fffffff0, fffffff1, 0, 0, } +VST2_LANE/VST2Q_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:17:result_uint16x8 [] = { fff0, fff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:18:result_uint32x4 [] = { fffffff0, fffffff1, 0, 0, } +VST2_LANE/VST2Q_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:21:result_poly16x8 [] = { fff0, fff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:22:result_float32x4 [] = { c1800000, c1700000, 0, 0, } +VST2_LANE/VST2Q_LANE:23:result_float16x8 [] = { cc00, cb80, 0, 0, 0, 0, 0, 0, } + +VST2_LANE/VST2Q_LANE chunk 1 output: +VST2_LANE/VST2Q_LANE:24:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:25:result_int16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:26:result_int32x2 [] = { 0, 0, } +VST2_LANE/VST2Q_LANE:27:result_int64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:28:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:29:result_uint16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:30:result_uint32x2 [] = { 0, 0, } +VST2_LANE/VST2Q_LANE:31:result_uint64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:32:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:33:result_poly16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:34:result_float32x2 [] = { 0, 0, } +VST2_LANE/VST2Q_LANE:35:result_float16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:37:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:38:result_int32x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:41:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:42:result_uint32x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:45:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:46:result_float32x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VST3_LANE/VST3Q_LANE chunk 0 output: +VST3_LANE/VST3Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, 0, } +VST3_LANE/VST3Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VST3_LANE/VST3Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:4:result_uint8x8 [] = { f0, f1, f2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:5:result_uint16x4 [] = { fff0, fff1, fff2, 0, } +VST3_LANE/VST3Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VST3_LANE/VST3Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:8:result_poly8x8 [] = { f0, f1, f2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:9:result_poly16x4 [] = { fff0, fff1, fff2, 0, } +VST3_LANE/VST3Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VST3_LANE/VST3Q_LANE:11:result_float16x4 [] = { cc00, cb80, cb00, 0, } +VST3_LANE/VST3Q_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, 0, } +VST3_LANE/VST3Q_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:17:result_uint16x8 [] = { fff0, fff1, fff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, 0, } +VST3_LANE/VST3Q_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:21:result_poly16x8 [] = { fff0, fff1, fff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:22:result_float32x4 [] = { c1800000, c1700000, c1600000, 0, } +VST3_LANE/VST3Q_LANE:23:result_float16x8 [] = { cc00, cb80, cb00, 0, 0, 0, 0, 0, } + +VST3_LANE/VST3Q_LANE chunk 1 output: +VST3_LANE/VST3Q_LANE:24:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:25:result_int16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:26:result_int32x2 [] = { fffffff2, 0, } +VST3_LANE/VST3Q_LANE:27:result_int64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:28:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:29:result_uint16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:30:result_uint32x2 [] = { fffffff2, 0, } +VST3_LANE/VST3Q_LANE:31:result_uint64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:32:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:33:result_poly16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:34:result_float32x2 [] = { c1600000, 0, } +VST3_LANE/VST3Q_LANE:35:result_float16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:37:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:38:result_int32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:41:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:42:result_uint32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:45:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:46:result_float32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VST3_LANE/VST3Q_LANE chunk 2 output: +VST3_LANE/VST3Q_LANE:48:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:49:result_int16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:50:result_int32x2 [] = { 0, 0, } +VST3_LANE/VST3Q_LANE:51:result_int64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:52:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:53:result_uint16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:54:result_uint32x2 [] = { 0, 0, } +VST3_LANE/VST3Q_LANE:55:result_uint64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:56:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:57:result_poly16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:58:result_float32x2 [] = { 0, 0, } +VST3_LANE/VST3Q_LANE:59:result_float16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:61:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:62:result_int32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:65:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:66:result_uint32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:69:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:70:result_float32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VST4_LANE/VST4Q_LANE chunk 0 output: +VST4_LANE/VST4Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VST4_LANE/VST4Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:4:result_uint8x8 [] = { f0, f1, f2, f3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VST4_LANE/VST4Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VST4_LANE/VST4Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:8:result_poly8x8 [] = { f0, f1, f2, f3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:9:result_poly16x4 [] = { fff0, fff1, fff2, fff3, } +VST4_LANE/VST4Q_LANE:10:result_float32x2 [] = { c1800000, c1700000, } +VST4_LANE/VST4Q_LANE:11:result_float16x4 [] = { cc00, cb80, cb00, ca80, } +VST4_LANE/VST4Q_LANE:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:13:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:14:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:17:result_uint16x8 [] = { fff0, fff1, fff2, fff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:18:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:21:result_poly16x8 [] = { fff0, fff1, fff2, fff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:22:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VST4_LANE/VST4Q_LANE:23:result_float16x8 [] = { cc00, cb80, cb00, ca80, 0, 0, 0, 0, } + +VST4_LANE/VST4Q_LANE chunk 1 output: +VST4_LANE/VST4Q_LANE:24:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:25:result_int16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:26:result_int32x2 [] = { fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:27:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:28:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:29:result_uint16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:30:result_uint32x2 [] = { fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:31:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:32:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:33:result_poly16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:34:result_float32x2 [] = { c1600000, c1500000, } +VST4_LANE/VST4Q_LANE:35:result_float16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:36:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:37:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:38:result_int32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:39:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:40:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:41:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:42:result_uint32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:43:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:44:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:45:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:46:result_float32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:47:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VST4_LANE/VST4Q_LANE chunk 2 output: +VST4_LANE/VST4Q_LANE:48:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:49:result_int16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:50:result_int32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:51:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:52:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:53:result_uint16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:54:result_uint32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:55:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:56:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:57:result_poly16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:58:result_float32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:59:result_float16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:60:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:61:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:62:result_int32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:63:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:64:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:65:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:66:result_uint32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:67:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:68:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:69:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:70:result_float32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:71:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VST4_LANE/VST4Q_LANE chunk 3 output: +VST4_LANE/VST4Q_LANE:72:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:73:result_int16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:74:result_int32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:75:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:76:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:77:result_uint16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:78:result_uint32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:79:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:80:result_poly8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:81:result_poly16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:82:result_float32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:83:result_float16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:84:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:85:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:86:result_int32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:87:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:88:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:89:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:90:result_uint32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:91:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:92:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:93:result_poly16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:94:result_float32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:95:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBL1 output: +VTBL1:0:result_int8x8 [] = { 0, fffffff2, fffffff2, fffffff2, 0, 0, fffffff2, fffffff2, } +VTBL1:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL1:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL1:3:result_int64x1 [] = { 3333333333333333, } +VTBL1:4:result_uint8x8 [] = { 0, f3, f3, f3, 0, 0, f3, f3, } +VTBL1:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL1:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL1:7:result_uint64x1 [] = { 3333333333333333, } +VTBL1:8:result_poly8x8 [] = { 0, f3, f3, f3, 0, 0, f3, f3, } +VTBL1:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL1:10:result_float32x2 [] = { 33333333, 33333333, } +VTBL1:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBL1:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL1:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL1:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL1:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL1:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL1:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL1:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL1:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL1:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL1:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL1:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL1:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBL2 output: +VTBL2:0:result_int8x8 [] = { fffffff6, fffffff3, fffffff3, fffffff3, 0, 0, fffffff3, fffffff3, } +VTBL2:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL2:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL2:3:result_int64x1 [] = { 3333333333333333, } +VTBL2:4:result_uint8x8 [] = { f6, f5, f5, f5, 0, 0, f5, f5, } +VTBL2:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL2:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL2:7:result_uint64x1 [] = { 3333333333333333, } +VTBL2:8:result_poly8x8 [] = { f6, f5, f5, f5, 0, 0, f5, f5, } +VTBL2:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL2:10:result_float32x2 [] = { 33333333, 33333333, } +VTBL2:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBL2:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL2:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL2:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL2:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL2:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL2:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL2:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL2:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL2:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL2:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL2:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL2:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBL3 output: +VTBL3:0:result_int8x8 [] = { fffffff8, fffffff4, fffffff4, fffffff4, ffffffff, 0, fffffff4, fffffff4, } +VTBL3:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL3:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL3:3:result_int64x1 [] = { 3333333333333333, } +VTBL3:4:result_uint8x8 [] = { f8, f7, f7, f7, ff, 0, f7, f7, } +VTBL3:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL3:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL3:7:result_uint64x1 [] = { 3333333333333333, } +VTBL3:8:result_poly8x8 [] = { f8, f7, f7, f7, ff, 0, f7, f7, } +VTBL3:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL3:10:result_float32x2 [] = { 33333333, 33333333, } +VTBL3:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBL3:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL3:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL3:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL3:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL3:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL3:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL3:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL3:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL3:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL3:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL3:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL3:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBL4 output: +VTBL4:0:result_int8x8 [] = { fffffffa, fffffff5, fffffff5, fffffff5, 3, 0, fffffff5, fffffff5, } +VTBL4:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL4:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL4:3:result_int64x1 [] = { 3333333333333333, } +VTBL4:4:result_uint8x8 [] = { fa, f9, f9, f9, 3, 0, f9, f9, } +VTBL4:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL4:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL4:7:result_uint64x1 [] = { 3333333333333333, } +VTBL4:8:result_poly8x8 [] = { fa, f9, f9, f9, 3, 0, f9, f9, } +VTBL4:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL4:10:result_float32x2 [] = { 33333333, 33333333, } +VTBL4:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBL4:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL4:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL4:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL4:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL4:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL4:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL4:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL4:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL4:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL4:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL4:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL4:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBX1 output: +VTBX1:0:result_int8x8 [] = { 33, fffffff2, fffffff2, fffffff2, 33, 33, fffffff2, fffffff2, } +VTBX1:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX1:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX1:3:result_int64x1 [] = { 3333333333333333, } +VTBX1:4:result_uint8x8 [] = { cc, f3, f3, f3, cc, cc, f3, f3, } +VTBX1:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX1:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX1:7:result_uint64x1 [] = { 3333333333333333, } +VTBX1:8:result_poly8x8 [] = { cc, f3, f3, f3, cc, cc, f3, f3, } +VTBX1:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX1:10:result_float32x2 [] = { 33333333, 33333333, } +VTBX1:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBX1:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX1:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX1:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX1:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX1:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX1:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX1:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX1:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX1:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX1:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX1:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX1:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBX2 output: +VTBX2:0:result_int8x8 [] = { fffffff6, fffffff3, fffffff3, fffffff3, 33, 33, fffffff3, fffffff3, } +VTBX2:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX2:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX2:3:result_int64x1 [] = { 3333333333333333, } +VTBX2:4:result_uint8x8 [] = { f6, f5, f5, f5, cc, cc, f5, f5, } +VTBX2:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX2:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX2:7:result_uint64x1 [] = { 3333333333333333, } +VTBX2:8:result_poly8x8 [] = { f6, f5, f5, f5, cc, cc, f5, f5, } +VTBX2:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX2:10:result_float32x2 [] = { 33333333, 33333333, } +VTBX2:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBX2:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX2:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX2:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX2:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX2:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX2:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX2:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX2:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX2:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX2:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX2:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX2:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBX3 output: +VTBX3:0:result_int8x8 [] = { fffffff8, fffffff4, fffffff4, fffffff4, ffffffff, 33, fffffff4, fffffff4, } +VTBX3:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX3:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX3:3:result_int64x1 [] = { 3333333333333333, } +VTBX3:4:result_uint8x8 [] = { f8, f7, f7, f7, ff, cc, f7, f7, } +VTBX3:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX3:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX3:7:result_uint64x1 [] = { 3333333333333333, } +VTBX3:8:result_poly8x8 [] = { f8, f7, f7, f7, ff, cc, f7, f7, } +VTBX3:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX3:10:result_float32x2 [] = { 33333333, 33333333, } +VTBX3:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBX3:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX3:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX3:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX3:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX3:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX3:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX3:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX3:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX3:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX3:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX3:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX3:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VTBX4 output: +VTBX4:0:result_int8x8 [] = { fffffffa, fffffff5, fffffff5, fffffff5, 3, 33, fffffff5, fffffff5, } +VTBX4:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX4:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX4:3:result_int64x1 [] = { 3333333333333333, } +VTBX4:4:result_uint8x8 [] = { fa, f9, f9, f9, 3, cc, f9, f9, } +VTBX4:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX4:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX4:7:result_uint64x1 [] = { 3333333333333333, } +VTBX4:8:result_poly8x8 [] = { fa, f9, f9, f9, 3, cc, f9, f9, } +VTBX4:9:result_poly16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX4:10:result_float32x2 [] = { 33333333, 33333333, } +VTBX4:11:result_float16x4 [] = { 0, 0, 0, 0, } +VTBX4:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX4:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX4:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX4:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX4:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX4:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX4:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX4:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX4:20:result_poly8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX4:21:result_poly16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX4:22:result_float32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX4:23:result_float16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } + +VRECPE/VRECPEQ (positive input) output: +VRECPE/VRECPEQ:0:result_uint32x2 [] = { ffffffff, ffffffff, } +VRECPE/VRECPEQ:1:result_uint32x4 [] = { bf000000, bf000000, bf000000, bf000000, } +VRECPE/VRECPEQ:2:result_float32x2 [] = { 3f068000, 3f068000, } +VRECPE/VRECPEQ:3:result_float32x4 [] = { 3c030000, 3c030000, 3c030000, 3c030000, } + +VRECPE/VRECPEQ (negative input) output: +VRECPE/VRECPEQ:4:result_uint32x2 [] = { 80000000, 80000000, } +VRECPE/VRECPEQ:5:result_uint32x4 [] = { ee800000, ee800000, ee800000, ee800000, } +VRECPE/VRECPEQ:6:result_float32x2 [] = { bdcc8000, bdcc8000, } +VRECPE/VRECPEQ:7:result_float32x4 [] = { bc030000, bc030000, bc030000, bc030000, } + +VRECPE/VRECPEQ FP special (NaN, infinity) output: +VRECPE/VRECPEQ:8:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRECPE/VRECPEQ:9:result_float32x4 [] = { 0, 0, 0, 0, } + +VRECPE/VRECPEQ FP special (zero, large value) output: +VRECPE/VRECPEQ:10:result_float32x2 [] = { 7f800000, 7f800000, } +VRECPE/VRECPEQ:11:result_float32x4 [] = { 0, 0, 0, 0, } + +VRECPE/VRECPEQ FP special (-0, -infinity) output: +VRECPE/VRECPEQ:12:result_float32x2 [] = { ff800000, ff800000, } +VRECPE/VRECPEQ:13:result_float32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VRECPE/VRECPEQ FP special (large negative value) output: +VRECPE/VRECPEQ:14:result_float32x2 [] = { 80000000, 80000000, } + +VRSQRTE/VRSQRTEQ output: +VRSQRTE/VRSQRTEQ:0:result_uint32x2 [] = { ffffffff, ffffffff, } +VRSQRTE/VRSQRTEQ:1:result_uint32x4 [] = { 9c800000, 9c800000, 9c800000, 9c800000, } +VRSQRTE/VRSQRTEQ:2:result_float32x2 [] = { 3e498000, 3e498000, } +VRSQRTE/VRSQRTEQ:3:result_float32x4 [] = { 3e700000, 3e700000, 3e700000, 3e700000, } + +VRSQRTE/VRSQRTEQ (2) output: +VRSQRTE/VRSQRTEQ:4:result_uint32x2 [] = { 80000000, 80000000, } +VRSQRTE/VRSQRTEQ:5:result_uint32x4 [] = { ae800000, ae800000, ae800000, ae800000, } + +VRSQRTE/VRSQRTEQ (3) output: +VRSQRTE/VRSQRTEQ:6:result_uint32x2 [] = { b4800000, b4800000, } +VRSQRTE/VRSQRTEQ:7:result_uint32x4 [] = { ed000000, ed000000, ed000000, ed000000, } + +VRSQRTE/VRSQRTEQ FP special (NaN, 0) output: +VRSQRTE/VRSQRTEQ:8:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRSQRTE/VRSQRTEQ:9:result_float32x4 [] = { 7f800000, 7f800000, 7f800000, 7f800000, } + +VRSQRTE/VRSQRTEQ FP special (negative, infinity) output: +VRSQRTE/VRSQRTEQ:10:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRSQRTE/VRSQRTEQ:11:result_float32x4 [] = { 0, 0, 0, 0, } + +VRSQRTE/VRSQRTEQ FP special (-0, -infinity) output: +VRSQRTE/VRSQRTEQ:12:result_float32x2 [] = { ff800000, ff800000, } +VRSQRTE/VRSQRTEQ:13:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } + +VCAGE/VCAGEQ output: +VCAGE/VCAGEQ:0:result_uint32x2 [] = { ffffffff, 0, } +VCAGE/VCAGEQ:1:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } + +VCAGE/VCAGEQ output: +VCAGE/VCAGEQ:2:result_uint32x2 [] = { ffffffff, ffffffff, } +VCAGE/VCAGEQ:3:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } + +VCALE/VCALEQ output: +VCALE/VCALEQ:0:result_uint32x2 [] = { ffffffff, ffffffff, } +VCALE/VCALEQ:1:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } + +VCALE/VCALEQ output: +VCALE/VCALEQ:2:result_uint32x2 [] = { 0, 0, } +VCALE/VCALEQ:3:result_uint32x4 [] = { 0, 0, 0, 0, } + +VCAGT/VCAGTQ output: +VCAGT/VCAGTQ:0:result_uint32x2 [] = { 0, 0, } +VCAGT/VCAGTQ:1:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } + +VCAGT/VCAGTQ output: +VCAGT/VCAGTQ:2:result_uint32x2 [] = { ffffffff, ffffffff, } +VCAGT/VCAGTQ:3:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } + +VCALT/VCALTQ output: +VCALT/VCALTQ:0:result_uint32x2 [] = { 0, ffffffff, } +VCALT/VCALTQ:1:result_uint32x4 [] = { 0, 0, 0, ffffffff, } + +VCALT/VCALTQ output: +VCALT/VCALTQ:2:result_uint32x2 [] = { 0, 0, } +VCALT/VCALTQ:3:result_uint32x4 [] = { 0, 0, 0, 0, } + +VCVT/VCVTQ output: +VCVT/VCVTQ:0:result_float32x2 [] = { c1800000, c1700000, } +VCVT/VCVTQ:1:result_float32x2 [] = { 4f800000, 4f800000, } +VCVT/VCVTQ:2:result_float32x4 [] = { 41720000, c1720000, 0, 80000000, } +VCVT/VCVTQ:3:result_float32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VCVT/VCVTQ:4:result_float32x4 [] = { 4f800000, 4f800000, 4f800000, 4f800000, } +VCVT/VCVTQ:5:result_int32x2 [] = { fffffff1, 5, } +VCVT/VCVTQ:6:result_uint32x2 [] = { 0, 5, } +VCVT/VCVTQ:7:result_float16x4 [] = { 0, 8000, 4b8f, cb8f, } +VCVT/VCVTQ:8:result_int32x4 [] = { 0, 0, f, fffffff1, } +VCVT/VCVTQ:9:result_uint32x4 [] = { 0, 0, f, 0, } + +VCVT_N/VCVTQ_N output: +VCVT_N/VCVTQ_N:10:result_float32x2 [] = { c0800000, c0700000, } +VCVT_N/VCVTQ_N:11:result_float32x2 [] = { 4c000000, 4c000000, } +VCVT_N/VCVTQ_N:12:result_float32x4 [] = { b2800000, b2700000, b2600000, b2500000, } +VCVT_N/VCVTQ_N:13:result_float32x4 [] = { 49800000, 49800000, 49800000, 49800000, } +VCVT_N/VCVTQ_N:14:result_int32x2 [] = { ff0b3333, 54cccd, } +VCVT_N/VCVTQ_N:15:result_uint32x2 [] = { 0, 15, } +VCVT_N/VCVTQ_N:16:result_int32x4 [] = { 0, 0, 1e3d7, fffe1c29, } +VCVT_N/VCVTQ_N:17:result_uint32x4 [] = { 0, 0, 1e, 0, } + +VCVT/VCVTQ (check rounding) output: +VCVT/VCVTQ:18:result_int32x2 [] = { a, a, } +VCVT/VCVTQ:19:result_uint32x2 [] = { a, a, } +VCVT/VCVTQ:20:result_int32x4 [] = { 7d, 7d, 7d, 7d, } +VCVT/VCVTQ:21:result_uint32x4 [] = { 7d, 7d, 7d, 7d, } + +VCVT_N/VCVTQ_N (check rounding) output: +VCVT_N/VCVTQ_N:22:result_int32x2 [] = { a66666, a66666, } +VCVT_N/VCVTQ_N:23:result_uint32x2 [] = { a66666, a66666, } +VCVT_N/VCVTQ_N:24:result_int32x4 [] = { fbccc, fbccc, fbccc, fbccc, } +VCVT_N/VCVTQ_N:25:result_uint32x4 [] = { fbccc, fbccc, fbccc, fbccc, } + +VCVT_N/VCVTQ_N (check saturation) output: +VCVT_N/VCVTQ_N:26:result_int32x2 [] = { 7fffffff, 7fffffff, } +VCVT_N/VCVTQ_N:27:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } + +VCVT FP16 (check fp16-fp32 inf/nan/denormal) output: +VCVT FP16:28:result_float32x4 [] = { 38640000, 7f800000, 7fc00000, ff800000, } + +VCVT FP16 (check fp32-fp16 inf/nan) output: +VCVT FP16:29:result_float16x4 [] = { 7e00, 7c00, fc00, 8000, } + +VRECPS/VRECPSQ output: +VRECPS/VRECPSQ:0:result_float32x2 [] = { c2e19eb7, c2e19eb7, } +VRECPS/VRECPSQ:1:result_float32x4 [] = { c1db851f, c1db851f, c1db851f, c1db851f, } + +VRECPS/VRECPSQ FP special (NAN) and normal values output: +VRECPS/VRECPSQ:2:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRECPS/VRECPSQ:3:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } + +VRECPS/VRECPSQ FP special (infinity, 0) and normal values output: +VRECPS/VRECPSQ:4:result_float32x2 [] = { ff800000, ff800000, } +VRECPS/VRECPSQ:5:result_float32x4 [] = { 40000000, 40000000, 40000000, 40000000, } + +VRECPS/VRECPSQ FP special (infinity, 0) output: +VRECPS/VRECPSQ:6:result_float32x2 [] = { 40000000, 40000000, } +VRECPS/VRECPSQ:7:result_float32x4 [] = { 40000000, 40000000, 40000000, 40000000, } + +VRSQRTS/VRSQRTSQ output: +VRSQRTS/VRSQRTSQ:0:result_float32x2 [] = { c2796b84, c2796b84, } +VRSQRTS/VRSQRTSQ:1:result_float32x4 [] = { c0e4a3d8, c0e4a3d8, c0e4a3d8, c0e4a3d8, } + +VRSQRTS/VRSQRTSQ FP special (NAN) and normal values output: +VRSQRTS/VRSQRTSQ:2:result_float32x2 [] = { 7fc00000, 7fc00000, } +VRSQRTS/VRSQRTSQ:3:result_float32x4 [] = { 7fc00000, 7fc00000, 7fc00000, 7fc00000, } + +VRSQRTS/VRSQRTSQ FP special (infinity, 0) and normal values output: +VRSQRTS/VRSQRTSQ:4:result_float32x2 [] = { ff800000, ff800000, } +VRSQRTS/VRSQRTSQ:5:result_float32x4 [] = { 3fc00000, 3fc00000, 3fc00000, 3fc00000, } + +VRSQRTS/VRSQRTSQ FP special (infinity, 0) output: +VRSQRTS/VRSQRTSQ:6:result_float32x2 [] = { 3fc00000, 3fc00000, } +VRSQRTS/VRSQRTSQ:7:result_float32x4 [] = { 3fc00000, 3fc00000, 3fc00000, 3fc00000, } diff --git a/ref_dsp.c b/ref_dsp.c new file mode 100644 index 0000000..124f6b7 --- /dev/null +++ b/ref_dsp.c @@ -0,0 +1,421 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__cplusplus) +#include <cstdio> +#include <cstdint> +#else +#include <stdio.h> +#if defined(_MSC_VER) +#include "msstdint.h" +#else +#include <stdint.h> +#endif +#endif + +#if defined(__arm__) || defined(__aarch64__) +#include <armdsp.h> +#include <dspfns.h> /* For Overflow */ +#else +#include "stm-armdsp.h" +#include "stm-dspfns.h" /* For Overflow */ +#endif + +extern FILE* ref_file; + +void exec_dsp (void) +{ + int32_t svar1, svar2, sacc, sres; + int32_t lo, hi; + + + fprintf(ref_file, "\n\nDSP (non-NEON) intrinsics\n"); + + /* qadd */ + /* int32_t qadd(int32_t val1, int32_t val2); */ + svar1 = 1; + svar2 = 2; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = -2; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = 2; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x7000; + svar2 = 0x7000; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFF; + svar2 = 0x8FFF; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x70000000; + svar2 = 0x70000000; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFFFFFF; + svar2 = 0x8FFFFFFF; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + /* qsub */ + /* int32_t qsub(int32_t val1, int32_t val2); */ + svar1 = 1; + svar2 = 2; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = -2; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = 2; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x7000; + svar2 = 0xFFFF9000; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFF; + svar2 = 0xFFFF7001; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x70000000; + svar2 = 0x90000000; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFFFFFF; + svar2 = 0x70000001; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0; + svar2 = 0x80000000; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + + /* qdadd */ + /* int32_t qdadd(int32_t val1, int32_t val2); */ + svar1 = 1; + svar2 = 2; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = -2; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = 2; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x7000; + svar2 = 0x7000; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFF; + svar2 = 0x8FFF; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x70000000; + svar2 = 0x70000000; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0; + svar2 = 0x70000000; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFFFFFF; + svar2 = 0x8FFFFFFF; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0; + svar2 = 0x8FFFFFFF; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + /* qdsub */ + /* int32_t qdsub(int32_t val1, int32_t val2); */ + svar1 = 1; + svar2 = 2; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = -2; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = 2; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x7000; + svar2 = 0xFFFF9000; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFF; + svar2 = 0xFFFF7001; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x70000000; + svar2 = 0x90000000; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0; + svar2 = 0x90000000; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFFFFFF; + svar2 = 0x70000001; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0; + svar2 = 0x70000001; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + + /* smulbb, smulbt, smultb, smultt */ + /* int32_t smulbb(int32_t val1, int32_t val2); */ + svar1 = 0x12345678; + svar2 = 0x12345678; + sres = smulbb(svar1, svar2); + fprintf(ref_file, "smulbb(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smulbt(svar1, svar2); + fprintf(ref_file, "smulbt(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smultb(svar1, svar2); + fprintf(ref_file, "smultb(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smultt(svar1, svar2); + fprintf(ref_file, "smultt(%#x, %#x) = %#x\n", svar1, svar2, sres); + + svar1 = 0xF123F456; + svar2 = 0xF123F456; + sres = smulbb(svar1, svar2); + fprintf(ref_file, "smulbb(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smulbt(svar1, svar2); + fprintf(ref_file, "smulbt(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smultb(svar1, svar2); + fprintf(ref_file, "smultb(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smultt(svar1, svar2); + fprintf(ref_file, "smultt(%#x, %#x) = %#x\n", svar1, svar2, sres); + + + /* smlabb, smlabt, smlatb, smlatt */ + /* int32_t smlabb(int32_t val1, int32_t val2, int32_t acc); */ + sacc = 0x01020304; + svar1 = 0x12345678; + svar2 = 0x12345678; + sres = smlabb(svar1, svar2, sacc); + fprintf(ref_file, "smlabb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlabt(svar1, svar2, sacc); + fprintf(ref_file, "smlabt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlatb(svar1, svar2, sacc); + fprintf(ref_file, "smlatb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlatt(svar1, svar2, sacc); + fprintf(ref_file, "smlatt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + + svar1 = 0xF123F456; + svar2 = 0xF123F456; + sres = smlabb(svar1, svar2, sacc); + fprintf(ref_file, "smlabb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlabt(svar1, svar2, sacc); + fprintf(ref_file, "smlabt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlatb(svar1, svar2, sacc); + fprintf(ref_file, "smlatb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlatt(svar1, svar2, sacc); + fprintf(ref_file, "smlatt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + + + /* smlalbb, smlalbt, smlaltb, smlaltt */ + /* int32_t smlalbb(int32_t *lo, int32_t *hi, int32_t val1, int32_t val2); */ + svar1 = 0x12345678; + svar2 = 0x12345678; + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlalbb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlalbb(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlalbt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlalbt(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlaltb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlaltb(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlaltt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlaltt(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + + svar1 = 0xF123F456; + svar2 = 0xF123F456; + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlalbb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlalbb(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlalbt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlalbt(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlaltb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlaltb(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlaltt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlaltt(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + + svar1 = 0x7FFF7FFF; + svar2 = 0x7FFF7FFF; + hi = 0x12345678; + lo = 0xFFFFFFFF; + fprintf(ref_file, "smlalbb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlalbb(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0xFFFFFFFF; + fprintf(ref_file, "smlalbt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlalbt(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0xFFFFFFFF; + fprintf(ref_file, "smlaltb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlaltb(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0xFFFFFFFF; + fprintf(ref_file, "smlaltt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlaltt(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + + + /* smulwb, smulwt */ + /* int32_t smulwb(int32_t val1, int32_t val2); */ + svar1 = 0x12345678; + svar2 = 0x12345678; + sres = smulwb(svar1, svar2); + fprintf(ref_file, "smulwb(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smulwt(svar1, svar2); + fprintf(ref_file, "smulwt(%#x, %#x) = %#x\n", svar1, svar2, sres); + + svar1 = 0xF123F456; + svar2 = 0xF123F456; + sres = smulwb(svar1, svar2); + fprintf(ref_file, "smulwb(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smulwt(svar1, svar2); + fprintf(ref_file, "smulwt(%#x, %#x) = %#x\n", svar1, svar2, sres); + + + /* smlawb, smlawt */ + /* int32_t smlawb(int32_t val1, int32_t val2, int32_t acc); */ + sacc = 0x01020304; + svar1 = 0x12345678; + svar2 = 0x12345678; + sres = smlawb(svar1, svar2, sacc); + fprintf(ref_file, "smlawb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlawt(svar1, svar2, sacc); + fprintf(ref_file, "smlawt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + + svar1 = 0xF123F456; + svar2 = 0xF123F456; + sres = smlawb(svar1, svar2, sacc); + fprintf(ref_file, "smlawb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlawt(svar1, svar2, sacc); + fprintf(ref_file, "smlawt(%#x, %#x, %#X) = %#x\n", svar1, svar2, sacc, sres); + +} diff --git a/ref_dspfns.c b/ref_dspfns.c new file mode 100644 index 0000000..a8409c6 --- /dev/null +++ b/ref_dspfns.c @@ -0,0 +1,1502 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__cplusplus) +#include <cstdio> +#include <cstdint> +#else +#include <stdio.h> +#if defined(_MSC_VER) +#include "msstdint.h" +#else +#include <stdint.h> +#endif +#endif + +#if defined(__arm__) || defined(__aarch64__) +#include <dspfns.h> +#else +#include "stm-dspfns.h" +#endif + +extern FILE* ref_file; + +#define __xSTR(X) #X +#define __STR(X) __xSTR(X) + +#define FN(X) X, __STR(X) + +typedef int32_t func32_32_32_ptr(int32_t, int32_t); +typedef int16_t func16_32_ptr(int32_t); +typedef int32_t func32_32_16_16_ptr(int32_t, int16_t, int16_t); + +void test_16_fn_32(func16_32_ptr func, const char* func_name, + int init_Overflow, int init_Carry) +{ + int32_t svar32_a; + int16_t svar16_a; + + fprintf(ref_file, "Checking %s with input Overflow=%d and input Carry=%d\n", + func_name, init_Overflow, init_Carry); + + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = func(svar32_a); + fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = func(svar32_a); + fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = func(svar32_a); + fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = func(svar32_a); + fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32769; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = func(svar32_a); + fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, Overflow, Carry); +} + +void test_32_fn_32_32(func32_32_32_ptr func, const char* func_name, + int init_Overflow, int init_Carry) +{ + int32_t svar32_a, svar32_b, svar32_c; + + fprintf(ref_file, "Checking %s with input Overflow=%d and input Carry=%d\n", + func_name, init_Overflow, init_Carry); + + svar32_a = 1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = -2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x7000; + svar32_b = 0x7000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFF; + svar32_b = 0x8FFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x70000000; + svar32_b = 0x70000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFFFFFF; + svar32_b = 0x8FFFFFFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFFFFFF; + svar32_b = 0xFFFFFFFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); +} + +void test_32_fn_32_16_16(func32_32_16_16_ptr func, const char* func_name, + int init_Overflow, int init_Carry) +{ + int32_t svar32_a, svar32_b; + int16_t svar16_a, svar16_b; + + fprintf(ref_file, "Checking %s with input Overflow=%d and input Carry=%d\n", + func_name, init_Overflow, init_Carry); + + svar16_a = 2; + svar16_b = 2; + svar32_a = 0x1234; + Overflow = 0; + Carry = init_Carry; + svar32_b = func(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "%s(%#x, %#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = 0; + Carry = init_Carry; + svar32_b = func(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "%s(%#x, %#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = 0; + Carry = init_Carry; + svar32_b = func(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "%s(%#x, %#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = 0; + Carry = init_Carry; + svar32_a = -1; + svar32_b = func(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "%s(%#x, %#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); +} + +void exec_dspfns1 (int init_Overflow, int init_Carry) +{ + int32_t svar32_a, svar32_b, svar32_c; + int16_t svar16_a, svar16_b, svar16_c; + + + fprintf(ref_file, "\n\nDSP FNS (non-NEON/ITU) intrinsics with input Overflow=%d and input Carry=%d\n", init_Overflow, init_Carry); + + /* saturate */ + /* int16_t saturate(int32_t x) */ + test_16_fn_32(FN(saturate), init_Overflow, init_Carry); + + + /* add */ + /* int16_t add(int16_t x, int16_t y) */ + svar16_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = add(svar16_a, svar16_b); + fprintf(ref_file, "add(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = add(svar16_a, svar16_b); + fprintf(ref_file, "add(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 20000; + svar16_b = 20000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = add(svar16_a, svar16_b); + fprintf(ref_file, "add(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -20000; + svar16_b = -20000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = add(svar16_a, svar16_b); + fprintf(ref_file, "add(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* sub */ + /* int16_t sub(int16_t x, int16_t y) */ + svar16_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = sub(svar16_a, svar16_b); + fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = sub(svar16_a, svar16_b); + fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 20000; + svar16_b = 20000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = sub(svar16_a, svar16_b); + fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -20000; + svar16_b = -20000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = sub(svar16_a, svar16_b); + fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0; + svar16_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = sub(svar16_a, svar16_b); + fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* abs_s */ + /* int16_t abs_s(int16_t x) */ + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = abs_s(svar16_b); + fprintf(ref_file, "abs_s(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry); + + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = abs_s(svar16_b); + fprintf(ref_file, "abs_s(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry); + + svar16_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = abs_s(svar16_b); + fprintf(ref_file, "abs_s(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry); + + + /* shl */ + /* int16_t shl(int16_t x, int16_t y) */ + svar16_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 10; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = 20; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 20; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = -64; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* shr */ + /* int16_t shr(int16_t x, int16_t y) */ + svar16_a = 1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 10; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = -20; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 20; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = 64; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* mult */ + /* int16_t mult(int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mult(svar16_a, svar16_b); + fprintf(ref_file, "mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mult(svar16_a, svar16_b); + fprintf(ref_file, "mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mult(svar16_a, svar16_b); + fprintf(ref_file, "mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* L_mult */ + /* int32_t L_mult(int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_mult(svar16_a, svar16_b); + fprintf(ref_file, "L_mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar32_a, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_mult(svar16_a, svar16_b); + fprintf(ref_file, "L_mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar32_a, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_mult(svar16_a, svar16_b); + fprintf(ref_file, "L_mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar32_a, Overflow, Carry); + + + /* negate */ + /* int16_t negate(int16_t x) */ + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = negate(svar16_b); + fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry); + + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = negate(svar16_b); + fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry); + + svar16_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = negate(svar16_b); + fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry); + + + /* extract_h */ + /* int16_t extract_h(int32_t x) */ + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_h(svar32_a); + fprintf(ref_file, "extract_h(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_h(svar32_a); + fprintf(ref_file, "extract_h(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_h(svar32_a); + fprintf(ref_file, "extract_h(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 0x12345678; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_h(svar32_a); + fprintf(ref_file, "extract_h(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + + /* extract_l */ + /* int16_t extract_l(int32_t x) */ + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_l(svar32_a); + fprintf(ref_file, "extract_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_l(svar32_a); + fprintf(ref_file, "extract_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_l(svar32_a); + fprintf(ref_file, "extract_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 0x43218765; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_l(svar32_a); + fprintf(ref_file, "extract_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + + /* round */ + /* int16_t round(int32_t x) */ + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = round(svar32_a); + fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = round(svar32_a); + fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = round(svar32_a); + fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 0x43218765; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = round(svar32_a); + fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 0x87654321; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = round(svar32_a); + fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + + /* L_mac */ + /* int32_t L_mac(int32_t acc, int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + svar32_a = 0x1234; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_mac(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_mac(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_mac(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_mac(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_mac(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_mac(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = -1; + svar32_b = L_mac(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_mac(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + + /* L_msu */ + /* int32_t L_msu(int32_t acc, int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + svar32_a = 0x1234; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_msu(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_msu(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_msu(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_msu(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_msu(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_msu(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_msu(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_msu(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + + /* L_add */ + /* int32_t L_add(int32_t val1, int32_t val2); */ + svar32_a = 1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = -2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x7000; + svar32_b = 0x7000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFF; + svar32_b = 0x8FFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x70000000; + svar32_b = 0x70000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFFFFFF; + svar32_b = 0x8FFFFFFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + /* L_sub */ + /* int32_t L_sub(int32_t val1, int32_t val2); */ + svar32_a = 1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = -2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x7000; + svar32_b = 0xFFFF9000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFF; + svar32_b = 0xFFFF7001; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x70000000; + svar32_b = 0x90000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFFFFFF; + svar32_b = 0x70000001; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0; + svar32_b = 0x80000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + + /* L_add_c */ + /* int32_t L_add_c(int32_t val1, int32_t val2); */ + test_32_fn_32_32(FN(L_add_c), init_Overflow, init_Carry); + + + + /* L_sub_c */ + /* int32_t L_sub_c(int32_t val1, int32_t val2); */ +#undef MYFN +#define MYFN L_sub_c + svar32_a = 1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = -2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x7000; + svar32_b = 0x7000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFF; + svar32_b = 0x8FFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x70000000; + svar32_b = 0x70000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFFFFFF; + svar32_b = 0x8FFFFFFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x1; + svar32_b = 0x80000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFFFFFFF; + svar32_b = 0x7FFFFFFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + + /* L_macNs */ + /* int32_t L_macNs(int32_t acc, int16_t x, int16_t y) */ + test_32_fn_32_16_16(FN(L_macNs), init_Overflow, init_Carry); + + /* L_msuNs */ + /* int32_t L_msuNs(int32_t acc, int16_t x, int16_t y) */ + test_32_fn_32_16_16(FN(L_msuNs), init_Overflow, init_Carry); + + + /* negate */ + /* int32_t negate(int32_t x) */ + svar32_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = negate(svar32_b); + fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = negate(svar32_b); + fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = negate(svar32_b); + fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + + /* mult_r */ + /* int16_t mult_r(int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mult_r(svar16_a, svar16_b); + fprintf(ref_file, "mult_r(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mult_r(svar16_a, svar16_b); + fprintf(ref_file, "mult_r(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mult_r(svar16_a, svar16_b); + fprintf(ref_file, "mult_r(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* norm_s */ + /* int16_t norm_s(int32_t x) */ + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_s(svar32_a); + fprintf(ref_file, "norm_s(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_s(svar32_a); + fprintf(ref_file, "norm_s(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_s(svar32_a); + fprintf(ref_file, "norm_s(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 12000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_s(svar32_a); + fprintf(ref_file, "norm_s(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + + /* norm_l */ + /* int16_t norm_l(int16_t x) */ + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_l(svar32_a); + fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_l(svar32_a); + fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_l(svar32_a); + fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 12000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_l(svar32_a); + fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 0x123456; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_l(svar32_a); + fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 0xABCDEF; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_l(svar32_a); + fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + + /* L_shl */ + /* int32_t L_shl(int32_t x, int16_t y) */ + svar32_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 10; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = 20; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = 40; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 20; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = -64; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = -64; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + + /* L_shr */ + /* int32_t L_shr(int32_t x, int16_t y) */ + svar32_a = 1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 10; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = -20; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = -40; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 20; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = 64; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = 64; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + + /* shr_r */ + /* int16_t shr_r(int16_t x, int16_t y) */ + svar16_a = 1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 10; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = -20; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 20; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = 64; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* mac_r */ + /* int16_t mac_r(int32_t acc, int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + svar32_a = 0x1234; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mac_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mac_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mac_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = -1; + svar16_c = mac_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x244; + svar16_b = 0x522; + svar32_a = 0x123456; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mac_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* msu_r */ + /* int32_t msu_r(int32_t acc, int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + svar32_a = 0x1234; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = msu_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = msu_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = msu_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = msu_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x321; + svar16_b = 0x243; + svar32_a = 0x123456; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = msu_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* L_deposit_h */ + /* int32_t L_deposit_h(int16_t x) */ + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_h(svar16_b); + fprintf(ref_file, "L_deposit_h(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_h(svar16_b); + fprintf(ref_file, "L_deposit_h(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + svar16_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_h(svar16_b); + fprintf(ref_file, "L_deposit_h(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + svar16_b = 0x1234; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_h(svar16_b); + fprintf(ref_file, "L_deposit_h(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + + /* L_deposit_l */ + /* int32_t L_deposit_l(int16_t x) */ + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_l(svar16_b); + fprintf(ref_file, "L_deposit_l(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_l(svar16_b); + fprintf(ref_file, "L_deposit_l(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + svar16_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_l(svar16_b); + fprintf(ref_file, "L_deposit_l(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + svar16_b = 0x1234; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_l(svar16_b); + fprintf(ref_file, "L_deposit_l(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + + /* L_shr_r */ + /* int32_t L_shr_r(int32_t x, int16_t y) */ + svar32_a = 1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 10; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = -20; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = -40; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 20; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = 64; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = 64; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + + /* L_abs */ + /* int32_t L_abs(int32_t x) */ + svar32_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_abs(svar32_b); + fprintf(ref_file, "L_abs(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_abs(svar32_b); + fprintf(ref_file, "L_abs(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = 0x80000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_abs(svar32_b); + fprintf(ref_file, "L_abs(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + + /* L_sat */ + /* int32_t L_sat(int32_t x) */ + svar32_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_sat(svar32_b); + fprintf(ref_file, "L_sat(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_sat(svar32_b); + fprintf(ref_file, "L_sat(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_sat(svar32_b); + fprintf(ref_file, "L_sat(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = 32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_sat(svar32_b); + fprintf(ref_file, "L_sat(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + + /* div_s */ + /* int16_t div_s(int16_t x, int16_t y) */ + svar16_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = div_s(svar16_a, svar16_b); + fprintf(ref_file, "div_s(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 10000; + svar16_b = 20000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = div_s(svar16_a, svar16_b); + fprintf(ref_file, "div_s(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + svar16_a = 10000; + svar16_b = 20000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = div_s(svar16_a, svar16_b); + fprintf(ref_file, "div_s(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + +} + +void exec_dspfns(void) +{ + Overflow = 0; + + exec_dspfns1(0, 0); + exec_dspfns1(0, 1); + exec_dspfns1(1, 0); + exec_dspfns1(1, 1); +} diff --git a/ref_integer.c b/ref_integer.c new file mode 100644 index 0000000..27bb58a --- /dev/null +++ b/ref_integer.c @@ -0,0 +1,290 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__cplusplus) +#include <cstdio> +#include <cstdint> +#else +#include <stdio.h> +#if defined(_MSC_VER) +#include "msstdint.h" +#else +#include <stdint.h> +#endif +#endif + +#ifndef __arm__ +#include "stm-arm.h" +#include "stm-dspfns.h" /* For Overflow */ +#else +#include <dspfns.h> /* For Overflow */ +#endif + +extern FILE* ref_file; + +void exec_integer (void) +{ + int i; + uint32_t uvar, ures; + int32_t svar1, svar2, sres; + + uint8_t clz; + + fprintf(ref_file, "\n\nInteger (non-NEON) intrinsics\n"); + + /* __clz */ + /* uint8_t __clz(uint32_t val); */ + uvar = 0xFFFFFFFF; + for(i=0; i<=32; i++) { + clz = __clz(uvar); + fprintf(ref_file, "__clz(%#x) = %d\n", (unsigned int)uvar, clz); + uvar >>= 1; + } + + /* __qadd */ + /* int32_t __qadd(int32_t val1, int32_t val2); */ + svar1 = 1; + svar2 = 2; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = -1; + svar2 = -2; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = -1; + svar2 = 2; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x7000; + svar2 = 0x7000; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x8FFF; + svar2 = 0x8FFF; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x70000000; + svar2 = 0x70000000; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x8FFFFFFF; + svar2 = 0x8FFFFFFF; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + /* __qdbl */ + /* int32_t __qdbl(int32_t val); */ + svar1 = 1; + Overflow = 0; + sres = __qdbl(svar1); + fprintf(ref_file, "__qdbl(%#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x70000000; + Overflow = 0; + sres = __qdbl(svar1); + fprintf(ref_file, "__qdbl(%#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x8FFFFFFF; + Overflow = 0; + sres = __qdbl(svar1); + fprintf(ref_file, "__qdbl(%#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0xEFFFFFFF; + Overflow = 0; + sres = __qdbl(svar1); + fprintf(ref_file, "__qdbl(%#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + /* __qsub */ + /* int32_t __qsub(int32_t val1, int32_t val2); */ + svar1 = 1; + svar2 = 2; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = -1; + svar2 = -2; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = -1; + svar2 = 2; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x7000; + svar2 = 0xFFFF9000; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x8FFF; + svar2 = 0xFFFF7001; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x70000000; + svar2 = 0x90000000; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x8FFFFFFF; + svar2 = 0x70000001; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0; + svar2 = 0x80000000; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + /* __rbit */ + /* uint32_t __rbit(uint32_t val); */ + uvar = 0x12345678; + ures = __rbit(uvar); + fprintf(ref_file, "__rbit(%#x) = %#x\n", + (unsigned int)uvar, (unsigned int)ures); + + /* __rev */ + /* uint32_t __rev(uint32_t val); */ + uvar = 0x12345678; + ures = __rev(uvar); + fprintf(ref_file, "__rev(%#x) = %#x\n", + (unsigned int)uvar, (unsigned int)ures); + + /* __ssat */ + /* int32_t __ssat(int32_t val, uint32_t sat); */ + svar1 = 0x12345678; + Overflow = 0; + sres = __ssat(svar1, 30); + fprintf(ref_file, "__ssat(%#x, 30) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x12345678; + Overflow = 0; + sres = __ssat(svar1, 19); + fprintf(ref_file, "__ssat(%#x, 19) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __ssat(svar1, 29); + fprintf(ref_file, "__ssat(%#x, 29) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __ssat(svar1, 12); + fprintf(ref_file, "__ssat(%#x, 12) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __ssat(svar1, 32); + fprintf(ref_file, "__ssat(%#x, 32) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __ssat(svar1, 1); + fprintf(ref_file, "__ssat(%#x, 1) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + /* __usat */ + /* int32_t __usat(uint32_t val, uint32_t sat); */ + svar1 = 0x12345678; + Overflow = 0; + sres = __usat(svar1, 30); + fprintf(ref_file, "__usat(%#x, 30) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x12345678; + Overflow = 0; + sres = __usat(svar1, 19); + fprintf(ref_file, "__usat(%#x, 19) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __usat(svar1, 29); + fprintf(ref_file, "__usat(%#x, 29) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __usat(svar1, 12); + fprintf(ref_file, "__usat(%#x, 12) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __usat(svar1, 31); + fprintf(ref_file, "__usat(%#x, 31) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __usat(svar1, 0); + fprintf(ref_file, "__usat(%#x, 0) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); +} diff --git a/ref_v_binary_op.c b/ref_v_binary_op.c new file mode 100644 index 0000000..bb4a723 --- /dev/null +++ b/ref_v_binary_op.c @@ -0,0 +1,88 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* Template file for binary operator validation */ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x,x), then store the result. */ +#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Fill input vector2 with arbitrary values */ + VDUP(vector2, , int, s, 8, 8, 2); + VDUP(vector2, , int, s, 16, 4, -4); + VDUP(vector2, , int, s, 32, 2, 3); + VDUP(vector2, , int, s, 64, 1, 100); + VDUP(vector2, , uint, u, 8, 8, 20); + VDUP(vector2, , uint, u, 16, 4, 30); + VDUP(vector2, , uint, u, 32, 2, 40); + VDUP(vector2, , uint, u, 64, 1, 2); + VDUP(vector2, q, int, s, 8, 16, -10); + VDUP(vector2, q, int, s, 16, 8, -20); + VDUP(vector2, q, int, s, 32, 4, -30); + VDUP(vector2, q, int, s, 64, 2, 24); + VDUP(vector2, q, uint, u, 8, 16, 12); + VDUP(vector2, q, uint, u, 16, 8, 3); + VDUP(vector2, q, uint, u, 32, 4, 55); + VDUP(vector2, q, uint, u, 64, 2, 3); + + /* Apply a binary operator named INSN_NAME */ + TEST_MACRO_ALL_VARIANTS_1_5(TEST_BINARY_OP, INSN_NAME); + + dump_results_hex (TEST_MSG); + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} diff --git a/ref_v_binary_sat_op.c b/ref_v_binary_sat_op.c new file mode 100644 index 0000000..532da16 --- /dev/null +++ b/ref_v_binary_sat_op.c @@ -0,0 +1,110 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* Template file for binary saturating operator validation */ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = OP(vector1,vector2), then store the result. */ + +#define TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) + +#define TEST_BINARY_SAT_OP(INSN, Q, T1, T2, W, N) \ + TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector1); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector1" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); + + /* Choose arbitrary initialization values */ + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , int, s, 64, 1, 0x44); + VDUP(vector2, , uint, u, 8, 8, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x66); + VDUP(vector2, , uint, u, 32, 2, 0x77); + VDUP(vector2, , uint, u, 64, 1, 0x88); + + VDUP(vector2, q, int, s, 8, 16, 0x11); + VDUP(vector2, q, int, s, 16, 8, 0x22); + VDUP(vector2, q, int, s, 32, 4, 0x33); + VDUP(vector2, q, int, s, 64, 2, 0x44); + VDUP(vector2, q, uint, u, 8, 16, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x66); + VDUP(vector2, q, uint, u, 32, 4, 0x77); + VDUP(vector2, q, uint, u, 64, 2, 0x88); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + dump_results_hex (TEST_MSG); + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} diff --git a/ref_v_comp_f_op.c b/ref_v_comp_f_op.c new file mode 100644 index 0000000..9d07c23 --- /dev/null +++ b/ref_v_comp_f_op.c @@ -0,0 +1,89 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + int i; + + /* Basic test: y=vcomp(x,x), then store the result. */ +#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) \ + VECT_VAR(vector_res, T3, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vector_res, T3, W, N)) + +#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N) \ + TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector2, float, 32, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + + clean_results (); + + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose init value arbitrarily, will be used for vector + comparison */ + VDUP(vector2, , float, f, 32, 2, -16.0f); + VDUP(vector2, q, float, f, 32, 4, -14.0f); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + fprintf(gcc_tests_file, "\n%s output:\n", TEST_MSG); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + + VDUP(vector2, , float, f, 32, 2, -10.0f); + VDUP(vector2, q, float, f, 32, 4, 10.0f); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + fprintf(gcc_tests_file, "\n%s output:\n", TEST_MSG); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); +} diff --git a/ref_v_comp_op.c b/ref_v_comp_op.c new file mode 100644 index 0000000..9ca5038 --- /dev/null +++ b/ref_v_comp_op.c @@ -0,0 +1,221 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" +#include <math.h> + + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + int i; + + /* Basic test: y=vcomp(x,x), then store the result. */ +#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) \ + VECT_VAR(vector_res, T3, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vector_res, T3, W, N)) + +#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N) \ + TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + /* No need for 64 bits elements */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, uint, 8, 16); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector2, int, 8, 8); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 8, 8); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector2, int, 8, 16); + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + DECL_VARIABLE(vector2, uint, 8, 16); + DECL_VARIABLE(vector2, uint, 16, 8); + DECL_VARIABLE(vector2, uint, 32, 4); + DECL_VARIABLE(vector2, float, 32, 4); + + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 16); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + + clean_results (); + + /* There is no 64 bits variant, we can't use the generic initializer */ + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 8, 8); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , float, f, 32, 2); + + VLOAD(vector, buffer, q, int, s, 8, 16); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 8, 16); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose init value arbitrarily, will be used for vector + comparison */ + VDUP(vector2, , int, s, 8, 8, -10); + VDUP(vector2, , int, s, 16, 4, -14); + VDUP(vector2, , int, s, 32, 2, -16); + VDUP(vector2, , uint, u, 8, 8, 0xF3); + VDUP(vector2, , uint, u, 16, 4, 0xFFF2); + VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF1); + VDUP(vector2, , float, f, 32, 2, -15.0f); + + VDUP(vector2, q, int, s, 8, 16, -4); + VDUP(vector2, q, int, s, 16, 8, -10); + VDUP(vector2, q, int, s, 32, 4, -14); + VDUP(vector2, q, uint, u, 8, 16, 0xF4); + VDUP(vector2, q, uint, u, 16, 8, 0xFFF6); + VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFF2); + VDUP(vector2, q, float, f, 32, 4, -14.0f); + + /* The same result buffers are used multiple times, so output them + before overwriting them */ + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + fprintf(gcc_tests_file, "\n%s output:\n", TEST_MSG); + TEST_VCOMP(INSN_NAME, , int, s, uint, 8, 8); + TEST_VCOMP(INSN_NAME, , int, s, uint, 16, 4); + TEST_VCOMP(INSN_NAME, , int, s, uint, 32, 2); + DUMP(TEST_MSG, uint, 8, 8, PRIx8); + DUMP(TEST_MSG, uint, 16, 4, PRIx16); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + TEST_VCOMP(INSN_NAME, , uint, u, uint, 8, 8); + TEST_VCOMP(INSN_NAME, , uint, u, uint, 16, 4); + TEST_VCOMP(INSN_NAME, , uint, u, uint, 32, 2); + DUMP(TEST_MSG, uint, 8, 8, PRIx8); + DUMP(TEST_MSG, uint, 16, 4, PRIx16); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + TEST_VCOMP(INSN_NAME, q, int, s, uint, 8, 16); + TEST_VCOMP(INSN_NAME, q, int, s, uint, 16, 8); + TEST_VCOMP(INSN_NAME, q, int, s, uint, 32, 4); + DUMP(TEST_MSG, uint, 8, 16, PRIx8); + DUMP(TEST_MSG, uint, 16, 8, PRIx16); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + + TEST_VCOMP(INSN_NAME, q, uint, u, uint, 8, 16); + TEST_VCOMP(INSN_NAME, q, uint, u, uint, 16, 8); + TEST_VCOMP(INSN_NAME, q, uint, u, uint, 32, 4); + DUMP(TEST_MSG, uint, 8, 16, PRIx8); + DUMP(TEST_MSG, uint, 16, 8, PRIx16); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + + /* Extra tests to have 100% coverage on all the variants */ + VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0); + TEST_VCOMP(INSN_NAME, , uint, u, uint, 32, 2); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + VDUP(vector2, , int, s, 32, 2, -15); + TEST_VCOMP(INSN_NAME, , int, s, uint, 32, 2); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + VDUP(vector2, , float, f, 32, 2, -16.0f); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + + /* Extra FP tests with special values (NaN, ....) */ + VDUP(vector, , float, f, 32, 2, 1.0); + VDUP(vector2, , float, f, 32, 2, NAN); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG " FP special (NaN)", uint, 32, 2, PRIx32); + + VDUP(vector, , float, f, 32, 2, 1.0); + VDUP(vector2, , float, f, 32, 2, -NAN); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG " FP special (-NaN)", uint, 32, 2, PRIx32); + + VDUP(vector, , float, f, 32, 2, NAN); + VDUP(vector2, , float, f, 32, 2, 1.0); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG " FP special (NaN)", uint, 32, 2, PRIx32); + + VDUP(vector, , float, f, 32, 2, 1.0); + VDUP(vector2, , float, f, 32, 2, HUGE_VALF); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG " FP special (inf)", uint, 32, 2, PRIx32); + + VDUP(vector, , float, f, 32, 2, 1.0); + VDUP(vector2, , float, f, 32, 2, -HUGE_VALF); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG " FP special (-inf)", uint, 32, 2, PRIx32); + + VDUP(vector, , float, f, 32, 2, HUGE_VALF); + VDUP(vector2, , float, f, 32, 2, 1.0); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG " FP special (inf)", uint, 32, 2, PRIx32); + + VDUP(vector, , float, f, 32, 2, -0.0); + VDUP(vector2, , float, f, 32, 2, 0.0); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG " FP special (-0.0)", uint, 32, 2, PRIx32); + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} diff --git a/ref_v_unary_op.c b/ref_v_unary_op.c new file mode 100644 index 0000000..18a4450 --- /dev/null +++ b/ref_v_unary_op.c @@ -0,0 +1,91 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* Template file for unary operator validation */ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, q, int, s, 8, 16); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + + dump_results_hex (TEST_MSG); + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} diff --git a/ref_v_unary_sat_op.c b/ref_v_unary_sat_op.c new file mode 100644 index 0000000..b9fea48 --- /dev/null +++ b/ref_v_unary_sat_op.c @@ -0,0 +1,96 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* Template file for unary saturating operator validation */ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) + +#define TEST_UNARY_SAT_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, q, int, s, 8, 16); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4); + + dump_results_hex (TEST_MSG); + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} diff --git a/ref_vaba.c b/ref_vaba.c new file mode 100644 index 0000000..5f2ff76 --- /dev/null +++ b/ref_vaba.c @@ -0,0 +1,125 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VABA/VABAQ" +void exec_vaba (void) +{ + /* Basic test: v4=vaba(v1,v2,v3), then store the result. */ +#define TEST_VABA(Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vaba##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + VECT_VAR(vector3, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ +#define DECL_VABA_VAR(VAR) \ + DECL_VARIABLE(VAR, int, 8, 8); \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 8, 8); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, int, 8, 16); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 8, 16); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4) + + DECL_VABA_VAR(vector1); + DECL_VABA_VAR(vector2); + DECL_VABA_VAR(vector3); + DECL_VABA_VAR(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + VLOAD(vector1, buffer, , int, s, 8, 8); + VLOAD(vector1, buffer, , int, s, 16, 4); + VLOAD(vector1, buffer, , int, s, 32, 2); + VLOAD(vector1, buffer, , uint, u, 8, 8); + VLOAD(vector1, buffer, , uint, u, 16, 4); + VLOAD(vector1, buffer, , uint, u, 32, 2); + VLOAD(vector1, buffer, q, int, s, 8, 16); + VLOAD(vector1, buffer, q, int, s, 16, 8); + VLOAD(vector1, buffer, q, int, s, 32, 4); + VLOAD(vector1, buffer, q, uint, u, 8, 16); + VLOAD(vector1, buffer, q, uint, u, 16, 8); + VLOAD(vector1, buffer, q, uint, u, 32, 4); + + + /* Choose init value arbitrarily */ + VDUP(vector2, , int, s, 8, 8, 1); + VDUP(vector2, , int, s, 16, 4, -13); + VDUP(vector2, , int, s, 32, 2, 8); + VDUP(vector2, , uint, u, 8, 8, 1); + VDUP(vector2, , uint, u, 16, 4, 13); + VDUP(vector2, , uint, u, 32, 2, 8); + VDUP(vector2, q, int, s, 8, 16, 10); + VDUP(vector2, q, int, s, 16, 8, -12); + VDUP(vector2, q, int, s, 32, 4, 32); + VDUP(vector2, q, uint, u, 8, 16, 10); + VDUP(vector2, q, uint, u, 16, 8, 12); + VDUP(vector2, q, uint, u, 32, 4, 32); + + /* Choose init value arbitrarily */ + VDUP(vector3, , int, s, 8, 8, -5); + VDUP(vector3, , int, s, 16, 4, 25); + VDUP(vector3, , int, s, 32, 2, -40); + VDUP(vector3, , uint, u, 8, 8, 100); + VDUP(vector3, , uint, u, 16, 4, 2340); + VDUP(vector3, , uint, u, 32, 2, 0xffffffff); + VDUP(vector3, q, int, s, 8, 16, -100); + VDUP(vector3, q, int, s, 16, 8, -3000); + VDUP(vector3, q, int, s, 32, 4, 10000); + VDUP(vector3, q, uint, u, 8, 16, 2); + VDUP(vector3, q, uint, u, 16, 8, 3); + VDUP(vector3, q, uint, u, 32, 4, 4); + + TEST_VABA(, int, s, 8, 8); + TEST_VABA(, int, s, 16, 4); + TEST_VABA(, int, s, 32, 2); + TEST_VABA(, uint, u, 8, 8); + TEST_VABA(, uint, u, 16, 4); + TEST_VABA(, uint, u, 32, 2); + TEST_VABA(q, int, s, 8, 16); + TEST_VABA(q, int, s, 16, 8); + TEST_VABA(q, int, s, 32, 4); + TEST_VABA(q, uint, u, 8, 16); + TEST_VABA(q, uint, u, 16, 8); + TEST_VABA(q, uint, u, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vabal.c b/ref_vabal.c new file mode 100644 index 0000000..e9ec975 --- /dev/null +++ b/ref_vabal.c @@ -0,0 +1,128 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VABAL" +void exec_vabal (void) +{ + /* Basic test: v4=vabal(v1,v2,v3), then store the result. */ +#define TEST_VABAL(T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vabal_##T2##W(VECT_VAR(vector1, T1, W2, N), \ + VECT_VAR(vector2, T1, W, N), \ + VECT_VAR(vector3, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ +#define DECL_VABAL_VAR_LONG(VAR) \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, int, 64, 2); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, uint, 64, 2) + +#define DECL_VABAL_VAR_SHORT(VAR) \ + DECL_VARIABLE(VAR, int, 8, 8); \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 8, 8); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2) + + DECL_VABAL_VAR_LONG(vector1); + DECL_VABAL_VAR_SHORT(vector2); + DECL_VABAL_VAR_SHORT(vector3); + DECL_VABAL_VAR_LONG(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + VLOAD(vector1, buffer, q, int, s, 16, 8); + VLOAD(vector1, buffer, q, int, s, 32, 4); + VLOAD(vector1, buffer, q, int, s, 64, 2); + VLOAD(vector1, buffer, q, uint, u, 16, 8); + VLOAD(vector1, buffer, q, uint, u, 32, 4); + VLOAD(vector1, buffer, q, uint, u, 64, 2); + + + /* Choose init value arbitrarily */ + VDUP(vector2, , int, s, 8, 8, 1); + VDUP(vector2, , int, s, 16, 4, -13); + VDUP(vector2, , int, s, 32, 2, 8); + VDUP(vector2, , uint, u, 8, 8, 1); + VDUP(vector2, , uint, u, 16, 4, 13); + VDUP(vector2, , uint, u, 32, 2, 8); + + /* Choose init value arbitrarily */ + VDUP(vector3, , int, s, 8, 8, -5); + VDUP(vector3, , int, s, 16, 4, 25); + VDUP(vector3, , int, s, 32, 2, -40); + VDUP(vector3, , uint, u, 8, 8, 100); + VDUP(vector3, , uint, u, 16, 4, 2340); + VDUP(vector3, , uint, u, 32, 2, 0xffffffff); + + TEST_VABAL(int, s, 8, 16, 8); + TEST_VABAL(int, s, 16, 32, 4); + TEST_VABAL(int, s, 32, 64, 2); + TEST_VABAL(uint, u, 8, 16, 8); + TEST_VABAL(uint, u, 16, 32, 4); + TEST_VABAL(uint, u, 32, 64, 2); + + dump_results_hex (TEST_MSG); + + /* Use values that could lead to overflow intermediate + * calculations. */ + VDUP(vector2, , int, s, 8, 8, 0x80); + VDUP(vector2, , int, s, 16, 4, 0x8000); + VDUP(vector2, , int, s, 32, 2, 0x80000000); + VDUP(vector2, , uint, u, 8, 8, 1); + VDUP(vector2, , uint, u, 16, 4, 13); + VDUP(vector2, , uint, u, 32, 2, 8); + + VDUP(vector3, , int, s, 8, 8, 0x7f); + VDUP(vector3, , int, s, 16, 4, 0x7fff); + VDUP(vector3, , int, s, 32, 2, 0x7fffffff); + VDUP(vector3, , uint, u, 8, 8, 0xff); + VDUP(vector3, , uint, u, 16, 4, 0xffff); + VDUP(vector3, , uint, u, 32, 2, 0xffffffff); + + TEST_VABAL(int, s, 8, 16, 8); + TEST_VABAL(int, s, 16, 32, 4); + TEST_VABAL(int, s, 32, 64, 2); + TEST_VABAL(uint, u, 8, 16, 8); + TEST_VABAL(uint, u, 16, 32, 4); + TEST_VABAL(uint, u, 32, 64, 2); + + dump_results_hex2 (TEST_MSG, " test intermediate overflow"); +} diff --git a/ref_vabd.c b/ref_vabd.c new file mode 100644 index 0000000..ef843e5 --- /dev/null +++ b/ref_vabd.c @@ -0,0 +1,133 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" +#include <math.h> + +#define TEST_MSG "VABD/VABDQ" +void exec_vabd (void) +{ + int i; + + /* Basic test: v4=vabd(v1,v2), then store the result. */ +#define TEST_VABD(Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vabd##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ +#define DECL_VABD_VAR(VAR) \ + DECL_VARIABLE(VAR, int, 8, 8); \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 8, 8); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2); \ + DECL_VARIABLE(VAR, int, 8, 16); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 8, 16); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + DECL_VABD_VAR(vector1); + DECL_VABD_VAR(vector2); + DECL_VABD_VAR(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + VLOAD(vector1, buffer, , int, s, 8, 8); + VLOAD(vector1, buffer, , int, s, 16, 4); + VLOAD(vector1, buffer, , int, s, 32, 2); + VLOAD(vector1, buffer, , uint, u, 8, 8); + VLOAD(vector1, buffer, , uint, u, 16, 4); + VLOAD(vector1, buffer, , uint, u, 32, 2); + VLOAD(vector1, buffer, , float, f, 32, 2); + VLOAD(vector1, buffer, q, int, s, 8, 16); + VLOAD(vector1, buffer, q, int, s, 16, 8); + VLOAD(vector1, buffer, q, int, s, 32, 4); + VLOAD(vector1, buffer, q, uint, u, 8, 16); + VLOAD(vector1, buffer, q, uint, u, 16, 8); + VLOAD(vector1, buffer, q, uint, u, 32, 4); + VLOAD(vector1, buffer, q, float, f, 32, 4); + + /* Choose init value arbitrarily */ + VDUP(vector2, , int, s, 8, 8, 1); + VDUP(vector2, , int, s, 16, 4, -13); + VDUP(vector2, , int, s, 32, 2, 8); + VDUP(vector2, , uint, u, 8, 8, 1); + VDUP(vector2, , uint, u, 16, 4, 13); + VDUP(vector2, , uint, u, 32, 2, 8); + VDUP(vector2, , float, f, 32, 2, 8.3f); + VDUP(vector2, q, int, s, 8, 16, 10); + VDUP(vector2, q, int, s, 16, 8, -12); + VDUP(vector2, q, int, s, 32, 4, 32); + VDUP(vector2, q, uint, u, 8, 16, 10); + VDUP(vector2, q, uint, u, 16, 8, 12); + VDUP(vector2, q, uint, u, 32, 4, 32); + VDUP(vector2, q, float, f, 32, 4, 32.12f); + + TEST_VABD(, int, s, 8, 8); + TEST_VABD(, int, s, 16, 4); + TEST_VABD(, int, s, 32, 2); + TEST_VABD(, uint, u, 8, 8); + TEST_VABD(, uint, u, 16, 4); + TEST_VABD(, uint, u, 32, 2); + TEST_VABD(, float, f, 32, 2); + TEST_VABD(q, int, s, 8, 16); + TEST_VABD(q, int, s, 16, 8); + TEST_VABD(q, int, s, 32, 4); + TEST_VABD(q, uint, u, 8, 16); + TEST_VABD(q, uint, u, 16, 8); + TEST_VABD(q, uint, u, 32, 4); + TEST_VABD(q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); + + + /* Extra FP tests with special values (-0.0, ....) */ + VDUP(vector1, q, float, f, 32, 4, -0.0f); + VDUP(vector2, q, float, f, 32, 4, 0.0); + TEST_VABD(q, float, f, 32, 4); + DUMP_FP(TEST_MSG " FP special (-0.0)", float, 32, 4, PRIx32); + + + /* Extra FP tests with special values (-0.0, ....) */ + VDUP(vector1, q, float, f, 32, 4, 0.0f); + VDUP(vector2, q, float, f, 32, 4, -0.0); + TEST_VABD(q, float, f, 32, 4); + DUMP_FP(TEST_MSG " FP special (-0.0)", float, 32, 4, PRIx32); +} diff --git a/ref_vabdl.c b/ref_vabdl.c new file mode 100644 index 0000000..fec375e --- /dev/null +++ b/ref_vabdl.c @@ -0,0 +1,93 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VABDL" +void exec_vabdl (void) +{ + /* Basic test: v4=vabdl(v1,v2), then store the result. */ +#define TEST_VABDL(T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vabdl_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ +#define DECL_VABDL_VAR_LONG(VAR) \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, int, 64, 2); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, uint, 64, 2) + +#define DECL_VABDL_VAR_SHORT(VAR) \ + DECL_VARIABLE(VAR, int, 8, 8); \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 8, 8); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2) + + DECL_VABDL_VAR_SHORT(vector1); + DECL_VABDL_VAR_SHORT(vector2); + DECL_VABDL_VAR_LONG(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + VLOAD(vector1, buffer, , int, s, 8, 8); + VLOAD(vector1, buffer, , int, s, 16, 4); + VLOAD(vector1, buffer, , int, s, 32, 2); + VLOAD(vector1, buffer, , uint, u, 8, 8); + VLOAD(vector1, buffer, , uint, u, 16, 4); + VLOAD(vector1, buffer, , uint, u, 32, 2); + + + /* Choose init value arbitrarily */ + VDUP(vector2, , int, s, 8, 8, 1); + VDUP(vector2, , int, s, 16, 4, -13); + VDUP(vector2, , int, s, 32, 2, 8); + VDUP(vector2, , uint, u, 8, 8, 1); + VDUP(vector2, , uint, u, 16, 4, 13); + VDUP(vector2, , uint, u, 32, 2, 8); + + TEST_VABDL(int, s, 8, 16, 8); + TEST_VABDL(int, s, 16, 32, 4); + TEST_VABDL(int, s, 32, 64, 2); + TEST_VABDL(uint, u, 8, 16, 8); + TEST_VABDL(uint, u, 16, 32, 4); + TEST_VABDL(uint, u, 32, 64, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vabs.c b/ref_vabs.c new file mode 100644 index 0000000..b191391 --- /dev/null +++ b/ref_vabs.c @@ -0,0 +1,54 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vabs +#define TEST_MSG "VABS/VABSQ" + +/* Extra tests for functions requiring floating-point types */ +void exec_vabs_f32(void); +#define EXTRA_TESTS exec_vabs_f32 + +#include "ref_v_unary_op.c" + +void exec_vabs_f32(void) +{ + int i; + + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + VDUP(vector, , float, f, 32, 2, -2.3f); + VDUP(vector, q, float, f, 32, 4, 3.4f); + + TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4); + + fprintf(ref_file, "\nfloat32:\n"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vadd.c b/ref_vadd.c new file mode 100644 index 0000000..918c70f --- /dev/null +++ b/ref_vadd.c @@ -0,0 +1,60 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vadd +#define TEST_MSG "VADD/VADDQ" + +/* Extra tests for functions requiring floating-point types */ +void exec_vadd_f32(void); +#define EXTRA_TESTS exec_vadd_f32 + +#include "ref_v_binary_op.c" + +void exec_vadd_f32(void) +{ + int i; + + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector2, float, 32, 4); + + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + VDUP(vector, , float, f, 32, 2, 2.3f); + VDUP(vector, q, float, f, 32, 4, 3.4f); + + VDUP(vector2, , float, f, 32, 2, 4.5f); + VDUP(vector2, q, float, f, 32, 4, 5.6f); + + TEST_BINARY_OP(INSN_NAME, , float, f, 32, 2); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + + fprintf(ref_file, "\nfloat32:\n"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vaddhn.c b/ref_vaddhn.c new file mode 100644 index 0000000..39c238b --- /dev/null +++ b/ref_vaddhn.c @@ -0,0 +1,94 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#if defined(__cplusplus) +#include <cstdint> +#else +#if defined(_MSC_VER) +#include "msstdint.h" +#else +#include <stdint.h> +#endif +#endif + +#ifndef INSN_NAME +#define INSN_NAME vaddhn +#define TEST_MSG "VADDHN" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: vec64=vaddhn(vec128_a, vec128_b), then store the result. */ +#define TEST_VADDHN1(INSN, T1, T2, W, W2, N) \ + VECT_VAR(vector64, T1, W2, N) = INSN##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N)) + +#define TEST_VADDHN(INSN, T1, T2, W, W2, N) \ + TEST_VADDHN1(INSN, T1, T2, W, W2, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector1); + DECL_VARIABLE_128BITS_VARIANTS(vector2); + + clean_results (); + + /* Fill input vector1 and vector2 with arbitrary values */ + VDUP(vector1, q, int, s, 16, 8, 50*(UINT8_MAX+1)); + VDUP(vector1, q, int, s, 32, 4, 50*(UINT16_MAX+1)); + VDUP(vector1, q, int, s, 64, 2, 24*((uint64_t)UINT32_MAX+1)); + VDUP(vector1, q, uint, u, 16, 8, 3*(UINT8_MAX+1)); + VDUP(vector1, q, uint, u, 32, 4, 55*(UINT16_MAX+1)); + VDUP(vector1, q, uint, u, 64, 2, 3*((uint64_t)UINT32_MAX+1)); + + VDUP(vector2, q, int, s, 16, 8, (uint16_t)UINT8_MAX); + VDUP(vector2, q, int, s, 32, 4, (uint32_t)UINT16_MAX); + VDUP(vector2, q, int, s, 64, 2, (uint64_t)UINT32_MAX); + VDUP(vector2, q, uint, u, 16, 8, (uint16_t)UINT8_MAX); + VDUP(vector2, q, uint, u, 32, 4, (uint32_t)UINT16_MAX); + VDUP(vector2, q, uint, u, 64, 2, (uint64_t)UINT32_MAX); + + TEST_VADDHN(INSN_NAME, int, s, 16, 8, 8); + TEST_VADDHN(INSN_NAME, int, s, 32, 16, 4); + TEST_VADDHN(INSN_NAME, int, s, 64, 32, 2); + TEST_VADDHN(INSN_NAME, uint, u, 16, 8, 8); + TEST_VADDHN(INSN_NAME, uint, u, 32, 16, 4); + TEST_VADDHN(INSN_NAME, uint, u, 64, 32, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vaddl.c b/ref_vaddl.c new file mode 100644 index 0000000..0856433 --- /dev/null +++ b/ref_vaddl.c @@ -0,0 +1,104 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vaddl +#define TEST_MSG "VADDL" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=vaddl(x,x), then store the result. */ +#define TEST_VADDL1(INSN, T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + +#define TEST_VADDL(INSN, T1, T2, W, W2, N) \ + TEST_VADDL1(INSN, T1, T2, W, W2, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + + DECL_VARIABLE(vector2, int, 8, 8); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 8, 8); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 8, 8); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + + /* Choose init value arbitrarily */ + VDUP(vector2, , int, s, 8, 8, -13); + VDUP(vector2, , int, s, 16, 4, -14); + VDUP(vector2, , int, s, 32, 2, -16); + VDUP(vector2, , uint, u, 8, 8, 0xf3); + VDUP(vector2, , uint, u, 16, 4, 0xfff1); + VDUP(vector2, , uint, u, 32, 2, 0xfffffff0); + + TEST_VADDL(INSN_NAME, int, s, 8, 16, 8); + TEST_VADDL(INSN_NAME, int, s, 16, 32, 4); + TEST_VADDL(INSN_NAME, int, s, 32, 64, 2); + TEST_VADDL(INSN_NAME, uint, u, 8, 16, 8); + TEST_VADDL(INSN_NAME, uint, u, 16, 32, 4); + TEST_VADDL(INSN_NAME, uint, u, 32, 64, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vaddw.c b/ref_vaddw.c new file mode 100644 index 0000000..638785c --- /dev/null +++ b/ref_vaddw.c @@ -0,0 +1,104 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vaddw +#define TEST_MSG "VADDW" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=vaddw(x,x), then store the result. */ +#define TEST_VADDW1(INSN, T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W2, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + +#define TEST_VADDW(INSN, T1, T2, W, W2, N) \ + TEST_VADDW1(INSN, T1, T2, W, W2, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector2, int, 8, 8); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 8, 8); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Choose init value arbitrarily */ + VDUP(vector2, , int, s, 8, 8, -13); + VDUP(vector2, , int, s, 16, 4, -14); + VDUP(vector2, , int, s, 32, 2, -16); + VDUP(vector2, , uint, u, 8, 8, 0xf3); + VDUP(vector2, , uint, u, 16, 4, 0xfff1); + VDUP(vector2, , uint, u, 32, 2, 0xfffffff0); + + TEST_VADDW(INSN_NAME, int, s, 8, 16, 8); + TEST_VADDW(INSN_NAME, int, s, 16, 32, 4); + TEST_VADDW(INSN_NAME, int, s, 32, 64, 2); + TEST_VADDW(INSN_NAME, uint, u, 8, 16, 8); + TEST_VADDW(INSN_NAME, uint, u, 16, 32, 4); + TEST_VADDW(INSN_NAME, uint, u, 32, 64, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vand.c b/ref_vand.c new file mode 100644 index 0000000..57e9013 --- /dev/null +++ b/ref_vand.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vand +#define TEST_MSG "VAND/VANDQ" + +#include "ref_v_binary_op.c" diff --git a/ref_vbic.c b/ref_vbic.c new file mode 100644 index 0000000..0d033ac --- /dev/null +++ b/ref_vbic.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vbic +#define TEST_MSG "VBIC/VBICQ" + +#include "ref_v_binary_op.c" diff --git a/ref_vbsl.c b/ref_vbsl.c new file mode 100644 index 0000000..4ede2d5 --- /dev/null +++ b/ref_vbsl.c @@ -0,0 +1,104 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VBSL/VBSLQ" +void exec_vbsl (void) +{ + /* Basic test: y=vbsl(unsigned_vec,x,x), then store the result. */ +#define TEST_VBSL(T3, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vbsl##Q##_##T2##W(VECT_VAR(vector_first, T3, W, N), \ + VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + DECL_VARIABLE_UNSIGNED_VARIANTS(vector_first); + + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose init value arbitrarily, will be used for vector + comparison. As we want different values for each type variant, we + can't use generic initialization macros. */ + VDUP(vector2, , int, s, 8, 8, -10); + VDUP(vector2, , int, s, 16, 4, -14); + VDUP(vector2, , int, s, 32, 2, -30); + VDUP(vector2, , int, s, 64, 1, -33); + VDUP(vector2, , uint, u, 8, 8, 0xF3); + VDUP(vector2, , uint, u, 16, 4, 0xFFF2); + VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0); + VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3); + VDUP(vector2, , float, f, 32, 2, -30.3f); + VDUP(vector2, , poly, p, 8, 8, 0xF3); + VDUP(vector2, , poly, p, 16, 4, 0xFFF2); + + VDUP(vector2, q, int, s, 8, 16, -10); + VDUP(vector2, q, int, s, 16, 8, -14); + VDUP(vector2, q, int, s, 32, 4, -30); + VDUP(vector2, q, int, s, 64, 2, -33); + VDUP(vector2, q, uint, u, 8, 16, 0xF3); + VDUP(vector2, q, uint, u, 16, 8, 0xFFF2); + VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFF0); + VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3); + VDUP(vector2, q, poly, p, 8, 16, 0xF3); + VDUP(vector2, q, poly, p, 16, 8, 0xFFF2); + VDUP(vector2, q, float, f, 32, 4, -30.4f); + + VDUP(vector_first, , uint, u, 8, 8, 0xF4); + VDUP(vector_first, , uint, u, 16, 4, 0xFFF6); + VDUP(vector_first, , uint, u, 32, 2, 0xFFFFFFF2); + VDUP(vector_first, , uint, u, 64, 1, 0xFFFFFFF2); + VDUP(vector_first, q, uint, u, 8, 16, 0xF4); + VDUP(vector_first, q, uint, u, 16, 8, 0xFFF6); + VDUP(vector_first, q, uint, u, 32, 4, 0xFFFFFFF2); + VDUP(vector_first, q, uint, u, 64, 2, 0xFFFFFFF2); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VBSL, uint); + TEST_VBSL(uint, , poly, p, 8, 8); + TEST_VBSL(uint, , poly, p, 16, 4); + TEST_VBSL(uint, q, poly, p, 8, 16); + TEST_VBSL(uint, q, poly, p, 16, 8); + TEST_VBSL(uint, , float, f, 32, 2); + TEST_VBSL(uint, q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vcage.c b/ref_vcage.c new file mode 100644 index 0000000..b9919f9 --- /dev/null +++ b/ref_vcage.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcage +#define TEST_MSG "VCAGE/VCAGEQ" + +#include "ref_v_comp_f_op.c" diff --git a/ref_vcagt.c b/ref_vcagt.c new file mode 100644 index 0000000..edb6fa0 --- /dev/null +++ b/ref_vcagt.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcagt +#define TEST_MSG "VCAGT/VCAGTQ" + +#include "ref_v_comp_f_op.c" diff --git a/ref_vcale.c b/ref_vcale.c new file mode 100644 index 0000000..b221f79 --- /dev/null +++ b/ref_vcale.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcale +#define TEST_MSG "VCALE/VCALEQ" + +#include "ref_v_comp_f_op.c" diff --git a/ref_vcalt.c b/ref_vcalt.c new file mode 100644 index 0000000..189a9ae --- /dev/null +++ b/ref_vcalt.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcalt +#define TEST_MSG "VCALT/VCALTQ" + +#include "ref_v_comp_f_op.c" diff --git a/ref_vceq.c b/ref_vceq.c new file mode 100644 index 0000000..30d8aba --- /dev/null +++ b/ref_vceq.c @@ -0,0 +1,63 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vceq +#define TEST_MSG "VCEQ/VCEQQ" + +/* Extra tests for _p8 variants, which exist only for vceq */ +void exec_vceq_p8(void); +#define EXTRA_TESTS exec_vceq_p8 + +#include "ref_v_comp_op.c" + +void exec_vceq_p8(void) +{ + int i; /* Used by DUMP() */ + + DECL_VARIABLE(vector, poly, 8, 8); + DECL_VARIABLE(vector, poly, 8, 16); + + DECL_VARIABLE(vector2, poly, 8, 8); + DECL_VARIABLE(vector2, poly, 8, 16); + + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 8, 16); + + clean_results (); + + VLOAD(vector, buffer, , poly, p, 8, 8); + VLOAD(vector, buffer, q, poly, p, 8, 16); + + VDUP(vector2, , poly, p, 8, 8, 0xF3); + VDUP(vector2, q, poly, p, 8, 16, 0xF4); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG " p8"); + TEST_VCOMP(INSN_NAME, , poly, p, uint, 8, 8); + TEST_VCOMP(INSN_NAME, q, poly, p, uint, 8, 16); + + DUMP(TEST_MSG, uint, 8, 8, PRIx8); + DUMP(TEST_MSG, uint, 8, 16, PRIx8); + +} diff --git a/ref_vcge.c b/ref_vcge.c new file mode 100644 index 0000000..9e1fdea --- /dev/null +++ b/ref_vcge.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcge +#define TEST_MSG "VCGE/VCGEQ" + +#include "ref_v_comp_op.c" diff --git a/ref_vcgt.c b/ref_vcgt.c new file mode 100644 index 0000000..afd8fe5 --- /dev/null +++ b/ref_vcgt.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcgt +#define TEST_MSG "VCGT/VCGTQ" + +#include "ref_v_comp_op.c" diff --git a/ref_vcle.c b/ref_vcle.c new file mode 100644 index 0000000..3da8d1a --- /dev/null +++ b/ref_vcle.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcle +#define TEST_MSG "VCLE/VCLEQ" + +#include "ref_v_comp_op.c" diff --git a/ref_vcls.c b/ref_vcls.c new file mode 100644 index 0000000..e3efd64 --- /dev/null +++ b/ref_vcls.c @@ -0,0 +1,107 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vcls +#define TEST_MSG "VCLS/VCLSQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + /* Fill input vector with arbitrary values */ + VDUP(vector, , int, s, 8, 8, 0x1); + VDUP(vector, , int, s, 16, 4, 0x1234); + VDUP(vector, , int, s, 32, 2, 0x34); + VDUP(vector, q, int, s, 8, 16, 0); + VDUP(vector, q, int, s, 16, 8, 0x1234); + VDUP(vector, q, int, s, 32, 4, 0x678); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + + dump_results_hex2 (TEST_MSG, " (positive input)"); + + + /* Fill input vector with arbitrary values (negative) */ + VDUP(vector, , int, s, 8, 8, 0xFF); + VDUP(vector, , int, s, 16, 4, 0xC234); + VDUP(vector, , int, s, 32, 2, 0xDEAD0034); + VDUP(vector, q, int, s, 8, 16, 0x80); + VDUP(vector, q, int, s, 16, 8, 0xE234); + VDUP(vector, q, int, s, 32, 4, 0xBEEF0678); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + + dump_results_hex2 (TEST_MSG, " (negative input)"); +} diff --git a/ref_vclt.c b/ref_vclt.c new file mode 100644 index 0000000..ce974d9 --- /dev/null +++ b/ref_vclt.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vclt +#define TEST_MSG "VCLT/VCLTQ" + +#include "ref_v_comp_op.c" diff --git a/ref_vclz.c b/ref_vclz.c new file mode 100644 index 0000000..aaa799f --- /dev/null +++ b/ref_vclz.c @@ -0,0 +1,142 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vclz +#define TEST_MSG "VCLZ/VCLZQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, uint, 8, 16); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, uint, 8, 16); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + + clean_results (); + + /* Fill input vector with arbitrary values */ + VDUP(vector, , int, s, 8, 8, 0x84); + VDUP(vector, , int, s, 16, 4, 0x1234); + VDUP(vector, , int, s, 32, 2, 0x5678); + VDUP(vector, , uint, u, 8, 8, 0x34); + VDUP(vector, , uint, u, 16, 4, 0x8234); + VDUP(vector, , uint, u, 32, 2, 0x7654321); + VDUP(vector, q, int, s, 8, 16, 0x34); + VDUP(vector, q, int, s, 16, 8, 0x1234); + VDUP(vector, q, int, s, 32, 4, 0x12345678); + VDUP(vector, q, uint, u, 8, 16, 0x13); + VDUP(vector, q, uint, u, 16, 8, 0x4); + VDUP(vector, q, uint, u, 32, 4, 0x1); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8); + TEST_UNARY_OP(INSN_NAME, , uint, u, 16, 4); + TEST_UNARY_OP(INSN_NAME, , uint, u, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4); + + dump_results_hex (TEST_MSG); + + /* Test with zero as input. */ + VDUP(vector, , int, s, 8, 8, 0); + VDUP(vector, , int, s, 16, 4, 0); + VDUP(vector, , int, s, 32, 2, 0); + VDUP(vector, , uint, u, 8, 8, 0); + VDUP(vector, , uint, u, 16, 4, 0); + VDUP(vector, , uint, u, 32, 2, 0); + VDUP(vector, q, int, s, 8, 16, 0); + VDUP(vector, q, int, s, 16, 8, 0); + VDUP(vector, q, int, s, 32, 4, 0); + VDUP(vector, q, uint, u, 8, 16, 0); + VDUP(vector, q, uint, u, 16, 8, 0); + VDUP(vector, q, uint, u, 32, 4, 0); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8); + TEST_UNARY_OP(INSN_NAME, , uint, u, 16, 4); + TEST_UNARY_OP(INSN_NAME, , uint, u, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4); + + dump_results_hex2 (TEST_MSG, " (input=0)"); +} diff --git a/ref_vcnt.c b/ref_vcnt.c new file mode 100644 index 0000000..e781760 --- /dev/null +++ b/ref_vcnt.c @@ -0,0 +1,88 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vcnt +#define TEST_MSG "VCNT/VCNTQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, poly, 8, 8); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, uint, 8, 16); + DECL_VARIABLE(vector, poly, 8, 16); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, poly, 8, 8); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, uint, 8, 16); + DECL_VARIABLE(vector_res, poly, 8, 16); + + clean_results (); + + /* Fill input vector with arbitrary values */ + VDUP(vector, , int, s, 8, 8, 0xFF); + VDUP(vector, , uint, u, 8, 8, 0x35); + VDUP(vector, , poly, p, 8, 8, 0x35); + VDUP(vector, q, int, s, 8, 16, 0); + VDUP(vector, q, uint, u, 8, 16, 0xBD); + VDUP(vector, q, poly, p, 8, 16, 0xBD); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8); + TEST_UNARY_OP(INSN_NAME, , poly, p, 8, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, poly, p, 8, 16); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vcombine.c b/ref_vcombine.c new file mode 100644 index 0000000..cfe5c1a --- /dev/null +++ b/ref_vcombine.c @@ -0,0 +1,100 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VCOMBINE" +void exec_vcombine (void) +{ + /* Basic test: vec128=vcombine(vec64_a, vec64_b), then store the result. */ +#define TEST_VCOMBINE(T1, T2, W, N, N2) \ + VECT_VAR(vector128, T1, W, N2) = \ + vcombine_##T2##W(VECT_VAR(vector64_a, T1, W, N), \ + VECT_VAR(vector64_b, T1, W, N)); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N2), VECT_VAR(vector128, T1, W, N2)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_64BITS_VARIANTS(vector64_a); + DECL_VARIABLE_64BITS_VARIANTS(vector64_b); + DECL_VARIABLE_128BITS_VARIANTS(vector128); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_VARIABLE(vector64_a, float, 16, 4); + DECL_VARIABLE(vector64_b, float, 16, 4); + DECL_VARIABLE(vector64_b_init, uint, 16, 4); + DECL_VARIABLE(vector128, float, 16, 8); +#endif + + TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector64_a, buffer); + VLOAD(vector64_a, buffer, , float, f, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + VLOAD(vector64_a, buffer, , float, f, 16, 4); +#endif + + VDUP(vector64_b, , int, s, 8, 8, 0x11); + VDUP(vector64_b, , int, s, 16, 4, 0x22); + VDUP(vector64_b, , int, s, 32, 2, 0x33); + VDUP(vector64_b, , int, s, 64, 1, 0x44); + VDUP(vector64_b, , uint, u, 8, 8, 0x55); + VDUP(vector64_b, , uint, u, 16, 4, 0x66); + VDUP(vector64_b, , uint, u, 32, 2, 0x77); + VDUP(vector64_b, , uint, u, 64, 1, 0x88); + VDUP(vector64_b, , poly, p, 8, 8, 0x55); + VDUP(vector64_b, , poly, p, 16, 4, 0x66); + VDUP(vector64_b, , float, f, 32, 2, 3.3f); + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + /* There is no vdup_n_f16, so we need another initialization + method. */ + VDUP(vector64_b_init, , uint, u, 16, 4, 0x4b80 /* 15 */); + VECT_VAR(vector64_b, float, 16, 4) = + vreinterpret_f16_u16(VECT_VAR(vector64_b_init, uint, 16, 4)); +#endif + + clean_results (); + + TEST_VCOMBINE(int, s, 8, 8, 16); + TEST_VCOMBINE(int, s, 16, 4, 8); + TEST_VCOMBINE(int, s, 32, 2, 4); + TEST_VCOMBINE(int, s, 64, 1, 2); + TEST_VCOMBINE(uint, u, 8, 8, 16); + TEST_VCOMBINE(uint, u, 16, 4, 8); + TEST_VCOMBINE(uint, u, 32, 2, 4); + TEST_VCOMBINE(uint, u, 64, 1, 2); + TEST_VCOMBINE(poly, p, 8, 8, 16); + TEST_VCOMBINE(poly, p, 16, 4, 8); + TEST_VCOMBINE(float, f, 32, 2, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_VCOMBINE(float, f, 16, 4, 8); +#endif + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vcreate.c b/ref_vcreate.c new file mode 100644 index 0000000..cfd50d7 --- /dev/null +++ b/ref_vcreate.c @@ -0,0 +1,119 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* Template file for unary operator validation */ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vcreate +#define TEST_MSG "VCREATE" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=vcreate(x), then store the result. */ +#define TEST_VCREATE(T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = vcreate_##T2##W(VECT_VAR(val, T1, W, N)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + +#define DECL_VAL(VAR, T1, W, N) \ + uint64_t VECT_VAR(VAR, T1, W, N) + + DECL_VAL(val, int, 8, 8); + DECL_VAL(val, int, 16, 4); + DECL_VAL(val, int, 32, 2); + DECL_VAL(val, int, 64, 1); + DECL_VAL(val, float, 32, 2); + DECL_VAL(val, uint, 8, 8); + DECL_VAL(val, uint, 16, 4); + DECL_VAL(val, uint, 32, 2); + DECL_VAL(val, uint, 64, 1); + DECL_VAL(val, poly, 8, 8); + DECL_VAL(val, poly, 16, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_VAL(val, float, 16, 4); +#endif + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 1); + DECL_VARIABLE(vector_res, float, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_VARIABLE(vector_res, float, 16, 4); +#endif + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 1); + DECL_VARIABLE(vector_res, poly, 8, 8); + DECL_VARIABLE(vector_res, poly, 16, 4); + + clean_results (); + + /* Initialize input values arbitrarily */ + VECT_VAR(val, int, 8, 8) = 0x123456789abcdef0LL; + VECT_VAR(val, int, 16, 4) = 0x123456789abcdef0LL; + VECT_VAR(val, int, 32, 2) = 0x123456789abcdef0LL; + VECT_VAR(val, int, 64, 1) = 0x123456789abcdef0LL; + VECT_VAR(val, float, 32, 2) = 0x123456789abcdef0LL; +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + VECT_VAR(val, float, 16, 4) = 0x123456789abcdef0LL; +#endif + VECT_VAR(val, uint, 8, 8) = 0x123456789abcdef0ULL; + VECT_VAR(val, uint, 16, 4) = 0x123456789abcdef0ULL; + VECT_VAR(val, uint, 32, 2) = 0x123456789abcdef0ULL; + VECT_VAR(val, uint, 64, 1) = 0x123456789abcdef0ULL; + VECT_VAR(val, poly, 8, 8) = 0x123456789abcdef0ULL; + VECT_VAR(val, poly, 16, 4) = 0x123456789abcdef0ULL; + + TEST_VCREATE(int, s, 8, 8); + TEST_VCREATE(int, s, 16, 4); + TEST_VCREATE(int, s, 32, 2); + TEST_VCREATE(float, f, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_VCREATE(float, f, 16, 4); +#endif + TEST_VCREATE(int, s, 64, 1); + TEST_VCREATE(uint, u, 8, 8); + TEST_VCREATE(uint, u, 16, 4); + TEST_VCREATE(uint, u, 32, 2); + TEST_VCREATE(uint, u, 64, 1); + TEST_VCREATE(poly, p, 8, 8); + TEST_VCREATE(poly, p, 16, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vcvt.c b/ref_vcvt.c new file mode 100644 index 0000000..a22613f --- /dev/null +++ b/ref_vcvt.c @@ -0,0 +1,236 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" +#include <math.h> + +#define TEST_MSG "VCVT/VCVTQ" +void exec_vcvt (void) +{ + int i; + + /* Basic test: y=vcvt(x), then store the result. */ +#define TEST_VCVT(Q, T1, T2, W, N, TS1, TS2) \ + VECT_VAR(vector_res, T1, W, N) = \ + vcvt##Q##_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP(TEST_MSG, T1, W, N, PRIx##W); + +#define TEST_VCVT_FP(Q, T1, T2, W, N, TS1, TS2) \ + VECT_VAR(vector_res, T1, W, N) = \ + vcvt##Q##_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP_FP(TEST_MSG, T1, W, N, PRIx##W); + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define TEST_VCVT_FP16(T1, T2, W1, W2, N) \ + VECT_VAR(vector_res, T1, W1, N) = \ + vcvt_##T2##W1##_##T2##W2(VECT_VAR(vector, T1, W2, N)); \ + vst1q_##T2##W1(VECT_VAR(result, T1, W1, N), \ + VECT_VAR(vector_res, T1, W1, N)); \ + DUMP_FP(TEST_MSG, T1, W1, N, PRIx##W1); + +#define TEST_VCVT_2FP16(T1, T2, W1, W2, N) \ + VECT_VAR(vector_res, T1, W1, N) = \ + vcvt_##T2##W1##_##T2##W2(VECT_VAR(vector, T1, W2, N)); \ + vst1_##T2##W1(VECT_VAR(result, T1, W1, N), \ + VECT_VAR(vector_res, T1, W1, N)); \ + DUMP_FP16(TEST_MSG, T1, W1, N, PRIx##W1); +#endif + +#define TEST_VCVT_N(Q, T1, T2, W, N, TS1, TS2, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vcvt##Q##_n_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N), V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP(TEST_MSG, T1, W, N, PRIx##W); + +#define TEST_VCVT_N_FP(Q, T1, T2, W, N, TS1, TS2, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vcvt##Q##_n_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N), V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP_FP(TEST_MSG, T1, W, N, PRIx##W); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_VARIABLE(vector_init, uint, 16, 4); + DECL_VARIABLE(vector_init, uint, 16, 8); + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif + + clean_results (); + + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif + + /* Make sure some elements have a fractional part, to exercise + integer conversions */ + TEST_VSET_LANE(vector, , float, f, 32, 2, 0, -15.3f); + TEST_VSET_LANE(vector, , float, f, 32, 2, 1, 5.3f); + TEST_VSET_LANE(vector, q, float, f, 32, 4, 2, -15.3f); + TEST_VSET_LANE(vector, q, float, f, 32, 4, 3, 5.3f); + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + /* FP16 tests. */ + /* There is no vdup_n_f16, so we need another initialization + method. */ + /* Use all lanes with values of different types: first, a "standard" + positive number, a "standard" negative one, and +0 and -0. */ + TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 0, 0x4b90 /* 15.125 */); + TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 1, 0xcb90 /* -15.125 */); + TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 2, 0 /* 0 */); + TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 3, 0x8000 /* -0 */); + VECT_VAR(vector, float, 16, 4) = + vreinterpret_f16_u16(VECT_VAR(vector_init, uint, 16, 4)); +#endif + + /* The same result buffers are used multiple times, so we output + them before overwriting them */ + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + fprintf(gcc_tests_file, "\n%s output:\n", TEST_MSG); + + /* vcvt_f32_xx */ + TEST_VCVT_FP(, float, f, 32, 2, int, s); + TEST_VCVT_FP(, float, f, 32, 2, uint, u); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_VCVT_FP16(float, f, 32, 16, 4); +#endif + + /* vcvtq_f32_xx */ + TEST_VCVT_FP(q, float, f, 32, 4, int, s); + TEST_VCVT_FP(q, float, f, 32, 4, uint, u); + + /* vcvt_xx_f32 */ + TEST_VCVT(, int, s, 32, 2, float, f); + TEST_VCVT(, uint, u, 32, 2, float, f); + + TEST_VSET_LANE(vector, q, float, f, 32, 4, 0, 0.0f); + TEST_VSET_LANE(vector, q, float, f, 32, 4, 1, -0.0f); + TEST_VSET_LANE(vector, q, float, f, 32, 4, 2, 15.12f); + TEST_VSET_LANE(vector, q, float, f, 32, 4, 3, -15.12f); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_VCVT_2FP16(float, f, 16, 32, 4); +#endif + + /* vcvtq_xx_f32 */ + TEST_VCVT(q, int, s, 32, 4, float, f); + TEST_VCVT(q, uint, u, 32, 4, float, f); + + /* The same result buffers are used multiple times, so output them + before overwriting them */ +#undef TEST_MSG +#define TEST_MSG "VCVT_N/VCVTQ_N" + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + fprintf(gcc_tests_file, "\n%s output:\n", TEST_MSG); + + /* vcvt_n_f32_xx */ + TEST_VCVT_N_FP(, float, f, 32, 2, int, s, 2); + TEST_VCVT_N_FP(, float, f, 32, 2, uint, u, 7); + + /* vcvtq_n_f32_xx */ + TEST_VCVT_N_FP(q, float, f, 32, 4, int, s, 30); + TEST_VCVT_N_FP(q, float, f, 32, 4, uint, u, 12); + + /* vcvt_n_xx_f32 */ + TEST_VCVT_N(, int, s, 32, 2, float, f, 20); + TEST_VCVT_N(, uint, u, 32, 2, float, f, 2); + + /* vcvtq_n_xx_f32 */ + TEST_VCVT_N(q, int, s, 32, 4, float, f, 13); + TEST_VCVT_N(q, uint, u, 32, 4, float, f, 1); + + /* Check rounding */ +#undef TEST_MSG +#define TEST_MSG "VCVT/VCVTQ" + fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check rounding)"); + VDUP(vector, , float, f, 32, 2, 10.4f); + VDUP(vector, q, float, f, 32, 4, 125.9f); + /* vcvt_xx_f32 */ + TEST_VCVT(, int, s, 32, 2, float, f); + TEST_VCVT(, uint, u, 32, 2, float, f); + /* vcvtq_xx_f32 */ + TEST_VCVT(q, int, s, 32, 4, float, f); + TEST_VCVT(q, uint, u, 32, 4, float, f); + +#undef TEST_MSG +#define TEST_MSG "VCVT_N/VCVTQ_N" + fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check rounding)"); + /* vcvt_n_xx_f32 */ + TEST_VCVT_N(, int, s, 32, 2, float, f, 20); + TEST_VCVT_N(, uint, u, 32, 2, float, f, 20); + /* vcvtq_n_xx_f32 */ + TEST_VCVT_N(q, int, s, 32, 4, float, f, 13); + TEST_VCVT_N(q, uint, u, 32, 4, float, f, 13); + +#undef TEST_MSG +#define TEST_MSG "VCVT_N/VCVTQ_N" + fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check saturation)"); + /* vcvt_n_xx_f32 */ + TEST_VCVT_N(, int, s, 32, 2, float, f, 31); + /* vcvtq_n_xx_f32 */ + TEST_VCVT_N(q, int, s, 32, 4, float, f, 31); + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#undef TEST_MSG +#define TEST_MSG "VCVT FP16" + fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check fp16-fp32 inf/nan/denormal)"); + TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 0, 0x0390 /* DENORMAL */); + TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 1, 0x7c00 /* inf */); + TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 2, 0x7e00 /* nan */); + TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 3, 0xfc00 /* -inf */); + VECT_VAR(vector, float, 16, 4) = + vreinterpret_f16_u16(VECT_VAR(vector_init, uint, 16, 4)); + TEST_VCVT_FP16(float, f, 32, 16, 4); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check fp32-fp16 inf/nan)"); + TEST_VSET_LANE(vector, q, float, f, 32, 4, 0, NAN); + TEST_VSET_LANE(vector, q, float, f, 32, 4, 1, HUGE_VALF); + TEST_VSET_LANE(vector, q, float, f, 32, 4, 2, -HUGE_VALF); + TEST_VSET_LANE(vector, q, float, f, 32, 4, 3, -0.0f); + TEST_VCVT_2FP16(float, f, 16, 32, 4); +#endif +} diff --git a/ref_vdup.c b/ref_vdup.c new file mode 100644 index 0000000..286a8ae --- /dev/null +++ b/ref_vdup.c @@ -0,0 +1,116 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VDUP/VDUPQ" +void exec_vdup (void) +{ + int i; + + /* Basic test: vec=vdup(x), then store the result. */ +#define TEST_VDUP(Q, T1, T2, W, N) \ + VECT_VAR(vector, T1, W, N) = \ + vdup##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)) + + /* Basic test: vec=vmov(x), then store the result. */ +#define TEST_VMOV(Q, T1, T2, W, N) \ + VECT_VAR(vector, T1, W, N) = \ + vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + + for (i=0; i< 3; i++) { + clean_results (); + + TEST_VDUP(, int, s, 8, 8); + TEST_VDUP(, int, s, 16, 4); + TEST_VDUP(, int, s, 32, 2); + TEST_VDUP(, int, s, 64, 1); + TEST_VDUP(, uint, u, 8, 8); + TEST_VDUP(, uint, u, 16, 4); + TEST_VDUP(, uint, u, 32, 2); + TEST_VDUP(, uint, u, 64, 1); + TEST_VDUP(, poly, p, 8, 8); + TEST_VDUP(, poly, p, 16, 4); + TEST_VDUP(, float, f, 32, 2); + + TEST_VDUP(q, int, s, 8, 16); + TEST_VDUP(q, int, s, 16, 8); + TEST_VDUP(q, int, s, 32, 4); + TEST_VDUP(q, int, s, 64, 2); + TEST_VDUP(q, uint, u, 8, 16); + TEST_VDUP(q, uint, u, 16, 8); + TEST_VDUP(q, uint, u, 32, 4); + TEST_VDUP(q, uint, u, 64, 2); + TEST_VDUP(q, poly, p, 8, 16); + TEST_VDUP(q, poly, p, 16, 8); + TEST_VDUP(q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); + } + +#undef TEST_MSG +#define TEST_MSG "VMOV/VMOVQ" + for (i=0; i< 3; i++) { + clean_results (); + + TEST_VMOV(, int, s, 8, 8); + TEST_VMOV(, int, s, 16, 4); + TEST_VMOV(, int, s, 32, 2); + TEST_VMOV(, int, s, 64, 1); + TEST_VMOV(, uint, u, 8, 8); + TEST_VMOV(, uint, u, 16, 4); + TEST_VMOV(, uint, u, 32, 2); + TEST_VMOV(, uint, u, 64, 1); + TEST_VMOV(, poly, p, 8, 8); + TEST_VMOV(, poly, p, 16, 4); + TEST_VMOV(, float, f, 32, 2); + + TEST_VMOV(q, int, s, 8, 16); + TEST_VMOV(q, int, s, 16, 8); + TEST_VMOV(q, int, s, 32, 4); + TEST_VMOV(q, int, s, 64, 2); + TEST_VMOV(q, uint, u, 8, 16); + TEST_VMOV(q, uint, u, 16, 8); + TEST_VMOV(q, uint, u, 32, 4); + TEST_VMOV(q, uint, u, 64, 2); + TEST_VMOV(q, poly, p, 8, 16); + TEST_VMOV(q, poly, p, 16, 8); + TEST_VMOV(q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); + } +} diff --git a/ref_vdup_lane.c b/ref_vdup_lane.c new file mode 100644 index 0000000..302fd37 --- /dev/null +++ b/ref_vdup_lane.c @@ -0,0 +1,81 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VDUP_LANE/VDUP_LANEQ" +void exec_vdup_lane (void) +{ + /* Basic test: vec1=vdup_lane(vec2, lane), then store the result. */ +#define TEST_VDUP_LANE(Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vector_res, T1, W, N) = \ + vdup##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N2), L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + /* Input vector can only have 64 bits */ + DECL_VARIABLE_64BITS_VARIANTS(vector); + + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer); + VLOAD(vector, buffer, , float, f, 32, 2); + + /* Choose lane arbitrarily */ + TEST_VDUP_LANE(, int, s, 8, 8, 8, 1); + TEST_VDUP_LANE(, int, s, 16, 4, 4, 2); + TEST_VDUP_LANE(, int, s, 32, 2, 2, 1); + TEST_VDUP_LANE(, int, s, 64, 1, 1, 0); + TEST_VDUP_LANE(, uint, u, 8, 8, 8, 7); + TEST_VDUP_LANE(, uint, u, 16, 4, 4, 3); + TEST_VDUP_LANE(, uint, u, 32, 2, 2, 1); + TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0); + TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7); + TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3); + TEST_VDUP_LANE(, float, f, 32, 2, 2, 1); + + TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2); + TEST_VDUP_LANE(q, int, s, 16, 8, 4, 3); + TEST_VDUP_LANE(q, int, s, 32, 4, 2, 1); + TEST_VDUP_LANE(q, int, s, 64, 2, 1, 0); + TEST_VDUP_LANE(q, uint, u, 8, 16, 8, 5); + TEST_VDUP_LANE(q, uint, u, 16, 8, 4, 1); + TEST_VDUP_LANE(q, uint, u, 32, 4, 2, 0); + TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0); + TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5); + TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1); + TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_veor.c b/ref_veor.c new file mode 100644 index 0000000..50226ff --- /dev/null +++ b/ref_veor.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME veor +#define TEST_MSG "VEOR/VEORQ" + +#include "ref_v_binary_op.c" diff --git a/ref_vext.c b/ref_vext.c new file mode 100644 index 0000000..32e84ec --- /dev/null +++ b/ref_vext.c @@ -0,0 +1,108 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VEXT/VEXTQ" +void exec_vext (void) +{ + /* vector_res = vext(vector1,vector2,offset), then store the result. */ +#define TEST_VEXT(Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vext##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector1); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); + VLOAD(vector1, buffer, , float, f, 32, 2); + VLOAD(vector1, buffer, q, float, f, 32, 4); + + /* Choose arbitrary initialization values */ + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , int, s, 64, 1, 0x44); + VDUP(vector2, , uint, u, 8, 8, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x66); + VDUP(vector2, , uint, u, 32, 2, 0x77); + VDUP(vector2, , uint, u, 64, 1, 0x88); + VDUP(vector2, , poly, p, 8, 8, 0x55); + VDUP(vector2, , poly, p, 16, 4, 0x66); + VDUP(vector2, , float, f, 32, 2, 33.6f); + + VDUP(vector2, q, int, s, 8, 16, 0x11); + VDUP(vector2, q, int, s, 16, 8, 0x22); + VDUP(vector2, q, int, s, 32, 4, 0x33); + VDUP(vector2, q, int, s, 64, 2, 0x44); + VDUP(vector2, q, uint, u, 8, 16, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x66); + VDUP(vector2, q, uint, u, 32, 4, 0x77); + VDUP(vector2, q, uint, u, 64, 2, 0x88); + VDUP(vector2, q, poly, p, 8, 16, 0x55); + VDUP(vector2, q, poly, p, 16, 8, 0x66); + VDUP(vector2, q, float, f, 32, 4, 33.2f); + + /* Choose arbitrary extract offsets */ + TEST_VEXT(, int, s, 8, 8, 7); + TEST_VEXT(, int, s, 16, 4, 3); + TEST_VEXT(, int, s, 32, 2, 1); + TEST_VEXT(, int, s, 64, 1, 0); + TEST_VEXT(, uint, u, 8, 8, 6); + TEST_VEXT(, uint, u, 16, 4, 2); + TEST_VEXT(, uint, u, 32, 2, 1); + TEST_VEXT(, uint, u, 64, 1, 0); + TEST_VEXT(, poly, p, 8, 8, 6); + TEST_VEXT(, poly, p, 16, 4, 2); + TEST_VEXT(, float, f, 32, 2, 1); + + TEST_VEXT(q, int, s, 8, 16, 14); + TEST_VEXT(q, int, s, 16, 8, 7); + TEST_VEXT(q, int, s, 32, 4, 3); + TEST_VEXT(q, int, s, 64, 2, 1); + TEST_VEXT(q, uint, u, 8, 16, 12); + TEST_VEXT(q, uint, u, 16, 8, 6); + TEST_VEXT(q, uint, u, 32, 4, 3); + TEST_VEXT(q, uint, u, 64, 2, 1); + TEST_VEXT(q, poly, p, 8, 16, 12); + TEST_VEXT(q, poly, p, 16, 8, 6); + TEST_VEXT(q, float, f, 32, 4, 3); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vget_high.c b/ref_vget_high.c new file mode 100644 index 0000000..3c894ae --- /dev/null +++ b/ref_vget_high.c @@ -0,0 +1,76 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VGET_HIGH" +void exec_vget_high (void) +{ + /* Basic test: vec64=vget_high(vec128), then store the result. */ +#define TEST_VGET_HIGH(T1, T2, W, N, N2) \ + VECT_VAR(vector64, T1, W, N) = \ + vget_high_##T2##W(VECT_VAR(vector128, T1, W, N2)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector64, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector128); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_VARIABLE(vector64, float, 16, 4); + DECL_VARIABLE(vector128, float, 16, 8); +#endif + + TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer); + VLOAD(vector128, buffer, q, float, f, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + VLOAD(vector128, buffer, q, float, f, 16, 8); +#endif + + clean_results (); + + TEST_VGET_HIGH(int, s, 8, 8, 16); + TEST_VGET_HIGH(int, s, 16, 4, 8); + TEST_VGET_HIGH(int, s, 32, 2, 4); + TEST_VGET_HIGH(int, s, 64, 1, 2); + TEST_VGET_HIGH(uint, u, 8, 8, 16); + TEST_VGET_HIGH(uint, u, 16, 4, 8); + TEST_VGET_HIGH(uint, u, 32, 2, 4); + TEST_VGET_HIGH(uint, u, 64, 1, 2); + TEST_VGET_HIGH(poly, p, 8, 8, 16); + TEST_VGET_HIGH(poly, p, 16, 4, 8); + TEST_VGET_HIGH(float, f, 32, 2, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_VGET_HIGH(float, f, 16, 4, 8); +#endif + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vget_lane.c b/ref_vget_lane.c new file mode 100644 index 0000000..0789d24 --- /dev/null +++ b/ref_vget_lane.c @@ -0,0 +1,109 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vget_lane (void) +{ + /* vec=vget_lane(vec, lane), then store the result. */ +#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \ + VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \ + fprintf(ref_file, "%s: %" PRIx##W "\n", "vget"STR(Q)"_lane_"STR(T2##W), VAR(var, T1, W)) + + /* Special variant for poly* types, to clear sign bits in output. */ +#define TEST_VGET_LANE_POLY(Q, T1, T2, W, N, L) \ + VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \ + fprintf(ref_file, "%s: %" PRIx##W "\n", "vget"STR(Q)"_lane_"STR(T2##W), \ + (uint##W##_t)VAR(var, T1, W)) + + /* Special variant for floating-point */ + union { + uint32_t var_int32; + float var_float32; + } var_int32_float32; + +#define TEST_VGET_LANE_F(Q, T1, T2, W, N, L) \ + VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \ + var_int##W##_float##W.var_float##W = VAR(var, T1, W); \ + fprintf(ref_file, "%s: %" PRIx##W "\n", "vget"STR(Q)"_lane_"STR(T2##W), var_int##W##_float##W.var_int##W) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + + /* Scalar variables */ + VAR_DECL(var, int, 8); + VAR_DECL(var, int, 16); + VAR_DECL(var, int, 32); + VAR_DECL(var, int, 64); + VAR_DECL(var, uint, 8); + VAR_DECL(var, uint, 16); + VAR_DECL(var, uint, 32); + VAR_DECL(var, uint, 64); + VAR_DECL(var, poly, 8); + VAR_DECL(var, poly, 16); + VAR_DECL(var, float, 32); + + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + + fprintf(ref_file, "\n%s output:\n", "VGET_LANE/VGETQ_LANE"); + + /* Choose lane arbitrarily */ + TEST_VGET_LANE(, int, s, 8, 8, 7); + TEST_VGET_LANE(, int, s, 16, 4, 3); + TEST_VGET_LANE(, int, s, 32, 2, 1); + TEST_VGET_LANE(, int, s, 64, 1, 0); + TEST_VGET_LANE(, uint, u, 8, 8, 6); + TEST_VGET_LANE(, uint, u, 16, 4, 2); + TEST_VGET_LANE(, uint, u, 32, 2, 1); + TEST_VGET_LANE(, uint, u, 64, 1, 0); + TEST_VGET_LANE_POLY(, poly, p, 8, 8, 6); + TEST_VGET_LANE_POLY(, poly, p, 16, 4, 2); + TEST_VGET_LANE_F(, float, f, 32, 2, 1); + + TEST_VGET_LANE(q, int, s, 8, 16, 15); + TEST_VGET_LANE(q, int, s, 16, 8, 5); + TEST_VGET_LANE(q, int, s, 32, 4, 3); + TEST_VGET_LANE(q, int, s, 64, 2, 1); + TEST_VGET_LANE(q, uint, u, 8, 16, 14); + TEST_VGET_LANE(q, uint, u, 16, 8, 6); + TEST_VGET_LANE(q, uint, u, 32, 4, 2); + TEST_VGET_LANE(q, uint, u, 64, 2, 1); + TEST_VGET_LANE_POLY(q, poly, p, 8, 16, 14); + TEST_VGET_LANE_POLY(q, poly, p, 16, 8, 6); + TEST_VGET_LANE_F(q, float, f, 32, 4, 3); + + fprintf(ref_file, "\n"); +} diff --git a/ref_vget_low.c b/ref_vget_low.c new file mode 100644 index 0000000..1ed87c9 --- /dev/null +++ b/ref_vget_low.c @@ -0,0 +1,76 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VGET_LOW" +void exec_vget_low (void) +{ + /* Basic test: vec64=vget_low(vec128), then store the result. */ +#define TEST_VGET_LOW(T1, T2, W, N, N2) \ + VECT_VAR(vector64, T1, W, N) = \ + vget_low_##T2##W(VECT_VAR(vector128, T1, W, N2)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector64, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector128); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_VARIABLE(vector64, float, 16, 4); + DECL_VARIABLE(vector128, float, 16, 8); +#endif + + TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer); + VLOAD(vector128, buffer, q, float, f, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + VLOAD(vector128, buffer, q, float, f, 16, 8); +#endif + + clean_results (); + + TEST_VGET_LOW(int, s, 8, 8, 16); + TEST_VGET_LOW(int, s, 16, 4, 8); + TEST_VGET_LOW(int, s, 32, 2, 4); + TEST_VGET_LOW(int, s, 64, 1, 2); + TEST_VGET_LOW(uint, u, 8, 8, 16); + TEST_VGET_LOW(uint, u, 16, 4, 8); + TEST_VGET_LOW(uint, u, 32, 2, 4); + TEST_VGET_LOW(uint, u, 64, 1, 2); + TEST_VGET_LOW(poly, p, 8, 8, 16); + TEST_VGET_LOW(poly, p, 16, 4, 8); + TEST_VGET_LOW(float, f, 32, 2, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_VGET_LOW(float, f, 16, 4, 8); +#endif + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vhadd.c b/ref_vhadd.c new file mode 100644 index 0000000..f7ab21b --- /dev/null +++ b/ref_vhadd.c @@ -0,0 +1,31 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vhadd +#define TEST_MSG "VHADD/VHADDQ" + +#define NO_FLOAT_VARIANT + +#include "ref_vmax.c" diff --git a/ref_vhsub.c b/ref_vhsub.c new file mode 100644 index 0000000..859df59 --- /dev/null +++ b/ref_vhsub.c @@ -0,0 +1,31 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vhsub +#define TEST_MSG "VHSUB/VHSUBQ" + +#define NO_FLOAT_VARIANT + +#include "ref_vmax.c" diff --git a/ref_vld1.c b/ref_vld1.c new file mode 100644 index 0000000..47c6db3 --- /dev/null +++ b/ref_vld1.c @@ -0,0 +1,64 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VLD1/VLD1Q" +void exec_vld1 (void) +{ + /* Basic test vec=vld1(buffer); then store vec: vst1(result, vector) */ + /* This test actually tests vdl1 and vst1 at the same time */ +#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \ + VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); +#endif + + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer); + + TEST_VLD1(vector, buffer, , float, f, 32, 2); + TEST_VLD1(vector, buffer, q, float, f, 32, 4); + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_VLD1(vector, buffer, , float, f, 16, 4); + TEST_VLD1(vector, buffer, q, float, f, 16, 8); +#endif + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vld1_dup.c b/ref_vld1_dup.c new file mode 100644 index 0000000..d5eb80d --- /dev/null +++ b/ref_vld1_dup.c @@ -0,0 +1,69 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VLD1_DUP/VLD1_DUPQ" +void exec_vld1_dup (void) +{ + int i; + + /* Fill vector with buffer item #i */ +#define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N) \ + VECT_VAR(VAR, T1, W, N) = \ + vld1##Q##_dup_##T2##W(&VECT_VAR(BUF, T1, W, N)[i]); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); +#endif + + /* Try to read different places from the input buffer */ + for (i=0; i<3; i++) { + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1_DUP, vector, buffer_dup); + + TEST_VLD1_DUP(vector, buffer_dup, , float, f, 32, 2); + TEST_VLD1_DUP(vector, buffer_dup, q, float, f, 32, 4); + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_VLD1_DUP(vector, buffer_dup, , float, f, 16, 4); + TEST_VLD1_DUP(vector, buffer_dup, q, float, f, 16, 8); +#endif + + dump_results_hex (TEST_MSG); + } +} diff --git a/ref_vld1_lane.c b/ref_vld1_lane.c new file mode 100644 index 0000000..9736748 --- /dev/null +++ b/ref_vld1_lane.c @@ -0,0 +1,130 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VLD1_LANE/VLD1_LANEQ" +void exec_vld1_lane (void) +{ + /* Fill vector_src with 0xAA, then load 1 lane */ +#define TEST_VLD1_LANE(Q, T1, T2, W, N, L) \ + memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, W/8*N); \ + VECT_VAR(vector_src, T1, W, N) = \ + vld1##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \ + VECT_VAR(vector, T1, W, N) = \ + vld1##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N), \ + VECT_VAR(vector_src, T1, W, N), L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); +#endif + DECL_VARIABLE_ALL_VARIANTS(vector_src); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_VARIABLE(vector_src, float, 16, 4); + DECL_VARIABLE(vector_src, float, 16, 8); +#endif + + ARRAY(buffer_src, int, 8, 8); + ARRAY(buffer_src, int, 16, 4); + ARRAY(buffer_src, int, 32, 2); + ARRAY(buffer_src, int, 64, 1); + ARRAY(buffer_src, uint, 8, 8); + ARRAY(buffer_src, uint, 16, 4); + ARRAY(buffer_src, uint, 32, 2); + ARRAY(buffer_src, uint, 64, 1); + ARRAY(buffer_src, poly, 8, 8); + ARRAY(buffer_src, poly, 16, 4); + ARRAY(buffer_src, float, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + ARRAY(buffer_src, float, 16, 4); +#endif + + ARRAY(buffer_src, int, 8, 16); + ARRAY(buffer_src, int, 16, 8); + ARRAY(buffer_src, int, 32, 4); + ARRAY(buffer_src, int, 64, 2); + ARRAY(buffer_src, uint, 8, 16); + ARRAY(buffer_src, uint, 16, 8); + ARRAY(buffer_src, uint, 32, 4); + ARRAY(buffer_src, uint, 64, 2); + ARRAY(buffer_src, poly, 8, 16); + ARRAY(buffer_src, poly, 16, 8); + ARRAY(buffer_src, float, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + ARRAY(buffer_src, float, 16, 8); +#endif + + clean_results (); + + /* Choose lane arbitrarily */ + TEST_VLD1_LANE(, int, s, 8, 8, 6); + TEST_VLD1_LANE(, int, s, 16, 4, 3); + TEST_VLD1_LANE(, int, s, 32, 2, 1); + TEST_VLD1_LANE(, int, s, 64, 1, 0); + TEST_VLD1_LANE(, uint, u, 8, 8, 7); + TEST_VLD1_LANE(, uint, u, 16, 4, 3); + TEST_VLD1_LANE(, uint, u, 32, 2, 1); + TEST_VLD1_LANE(, uint, u, 64, 1, 0); + TEST_VLD1_LANE(, poly, p, 8, 8, 7); + TEST_VLD1_LANE(, poly, p, 16, 4, 3); + TEST_VLD1_LANE(, float, f, 32, 2, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_VLD1_LANE(, float, f, 16, 4, 2); +#endif + + TEST_VLD1_LANE(q, int, s, 8, 16, 15); + TEST_VLD1_LANE(q, int, s, 16, 8, 5); + TEST_VLD1_LANE(q, int, s, 32, 4, 2); + TEST_VLD1_LANE(q, int, s, 64, 2, 1); + TEST_VLD1_LANE(q, uint, u, 8, 16, 12); + TEST_VLD1_LANE(q, uint, u, 16, 8, 6); + TEST_VLD1_LANE(q, uint, u, 32, 4, 2); + TEST_VLD1_LANE(q, uint, u, 64, 2, 0); + TEST_VLD1_LANE(q, poly, p, 8, 16, 12); + TEST_VLD1_LANE(q, poly, p, 16, 8, 6); + TEST_VLD1_LANE(q, float, f, 32, 4, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_VLD1_LANE(q, float, f, 16, 8, 5); +#endif + +#ifndef __CC_ARM + /* Check runtime assertions. With RVCT, the check is performed at + compile-time */ + // TEST_VLD1_LANE(, int, s, 64, 1, 1); +#endif + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vldX.c b/ref_vldX.c new file mode 100644 index 0000000..555ea47 --- /dev/null +++ b/ref_vldX.c @@ -0,0 +1,221 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vldX (void) +{ + /* In this case, input variables are arrays of vectors */ +#define DECL_VLDX(T1, W, N, X) \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ + VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] + + /* We need to use a temporary result buffer (result_bis), because + the one used for other tests is not large enough. A subset of the + result data is moved from result_bis to result, and it is this + subset which is used to check the actual behaviour. The next + macro enables to move another chunk of data from result_bis to + result. */ +#define TEST_VLDX(Q, T1, T2, W, N, X) \ + VECT_ARRAY_VAR(vector, T1, W, N, X) = \ + /* Use dedicated init buffer, of size X */ \ + vld##X##Q##_##T2##W(VECT_ARRAY_VAR(buffer_vld##X, T1, W, N, X)); \ + vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ + VECT_ARRAY_VAR(vector, T1, W, N, X)); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* Overwrite "result" with the contents of "result_bis"[Y] */ +#define TEST_EXTRA_CHUNK(T1, W, N, X,Y) \ + memcpy(VECT_VAR(result, T1, W, N), \ + &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* We need all variants in 64 bits, but there is no 64x2 variant */ +#define DECL_ALL_VLDX(X) \ + DECL_VLDX(int, 8, 8, X); \ + DECL_VLDX(int, 16, 4, X); \ + DECL_VLDX(int, 32, 2, X); \ + DECL_VLDX(int, 64, 1, X); \ + DECL_VLDX(uint, 8, 8, X); \ + DECL_VLDX(uint, 16, 4, X); \ + DECL_VLDX(uint, 32, 2, X); \ + DECL_VLDX(uint, 64, 1, X); \ + DECL_VLDX(poly, 8, 8, X); \ + DECL_VLDX(poly, 16, 4, X); \ + DECL_VLDX(float, 32, 2, X); \ + DECL_VLDX(int, 8, 16, X); \ + DECL_VLDX(int, 16, 8, X); \ + DECL_VLDX(int, 32, 4, X); \ + DECL_VLDX(uint, 8, 16, X); \ + DECL_VLDX(uint, 16, 8, X); \ + DECL_VLDX(uint, 32, 4, X); \ + DECL_VLDX(poly, 8, 16, X); \ + DECL_VLDX(poly, 16, 8, X); \ + DECL_VLDX(float, 32, 4, X) + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define DECL_ALL_VLDX_FP16(X) \ + DECL_VLDX(float, 16, 4, X); \ + DECL_VLDX(float, 16, 8, X) +#endif + +#define TEST_ALL_VLDX(X) \ + TEST_VLDX(, int, s, 8, 8, X); \ + TEST_VLDX(, int, s, 16, 4, X); \ + TEST_VLDX(, int, s, 32, 2, X); \ + TEST_VLDX(, int, s, 64, 1, X); \ + TEST_VLDX(, uint, u, 8, 8, X); \ + TEST_VLDX(, uint, u, 16, 4, X); \ + TEST_VLDX(, uint, u, 32, 2, X); \ + TEST_VLDX(, uint, u, 64, 1, X); \ + TEST_VLDX(, poly, p, 8, 8, X); \ + TEST_VLDX(, poly, p, 16, 4, X); \ + TEST_VLDX(, float, f, 32, 2, X); \ + TEST_VLDX(q, int, s, 8, 16, X); \ + TEST_VLDX(q, int, s, 16, 8, X); \ + TEST_VLDX(q, int, s, 32, 4, X); \ + TEST_VLDX(q, uint, u, 8, 16, X); \ + TEST_VLDX(q, uint, u, 16, 8, X); \ + TEST_VLDX(q, uint, u, 32, 4, X); \ + TEST_VLDX(q, poly, p, 8, 16, X); \ + TEST_VLDX(q, poly, p, 16, 8, X); \ + TEST_VLDX(q, float, f, 32, 4, X) + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define TEST_ALL_VLDX_FP16(X) \ + TEST_VLDX(, float, f, 16, 4, X); \ + TEST_VLDX(q, float, f, 16, 8, X) +#endif + +#define TEST_ALL_EXTRA_CHUNKS(X, Y) \ + TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(int, 64, 1, X, Y); \ + TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \ + TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(int, 8, 16, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 8, 16, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \ + TEST_EXTRA_CHUNK(poly, 8, 16, X, Y); \ + TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 4, X, Y) + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define TEST_ALL_EXTRA_CHUNKS_FP16(X, Y) \ + TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(float, 16, 8, X, Y) +#endif + + DECL_ALL_VLDX(2); + DECL_ALL_VLDX(3); + DECL_ALL_VLDX(4); + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_ALL_VLDX_FP16(2); + DECL_ALL_VLDX_FP16(3); + DECL_ALL_VLDX_FP16(4); +#endif + + /* Check vld2/vld2q */ + clean_results (); +#define TEST_MSG "VLD2/VLD2Q" + TEST_ALL_VLDX(2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_VLDX_FP16(2); +#endif + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(2, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(2, 1); +#endif + dump_results_hex2 (TEST_MSG, " chunk 1"); + + /* Check vld3/vld3q */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VLD3/VLD3Q" + TEST_ALL_VLDX(3); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_VLDX_FP16(3); +#endif + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(3, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(3, 1); +#endif + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(3, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(3, 2); +#endif + dump_results_hex2 (TEST_MSG, " chunk 2"); + + /* Check vld4/vld4q */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VLD4/VLD4Q" + TEST_ALL_VLDX(4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_VLDX_FP16(4); +#endif + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(4, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(4, 1); +#endif + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(4, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(4, 2); +#endif + dump_results_hex2 (TEST_MSG, " chunk 2"); + TEST_ALL_EXTRA_CHUNKS(4, 3); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(4, 3); +#endif + dump_results_hex2 (TEST_MSG, " chunk 3"); +} diff --git a/ref_vldX_dup.c b/ref_vldX_dup.c new file mode 100644 index 0000000..15ba2c8 --- /dev/null +++ b/ref_vldX_dup.c @@ -0,0 +1,187 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vldX_dup (void) +{ + /* In this case, input variables are arrays of vectors */ +#define DECL_VLDX_DUP(T1, W, N, X) \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ + VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] + + /* We need to use a temporary result buffer (result_bis), because + the one used for other tests is not large enough. A subset of the + result data is moved from result_bis to result, and it is this + subset which is used to check the actual behaviour. The next + macro enables to move another chunk of data from result_bis to + result. */ +#define TEST_VLDX_DUP(Q, T1, T2, W, N, X) \ + VECT_ARRAY_VAR(vector, T1, W, N, X) = \ + vld##X##Q##_dup_##T2##W(&VECT_VAR(buffer_dup, T1, W, N)[0]); \ + \ + vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ + VECT_ARRAY_VAR(vector, T1, W, N, X)); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ + sizeof(VECT_VAR(result, T1, W, N))); + + + /* Overwrite "result" with the contents of "result_bis"[Y] */ +#define TEST_EXTRA_CHUNK(T1, W, N, X,Y) \ + memcpy(VECT_VAR(result, T1, W, N), \ + &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ +#define DECL_ALL_VLDX_DUP(X) \ + DECL_VLDX_DUP(int, 8, 8, X); \ + DECL_VLDX_DUP(int, 16, 4, X); \ + DECL_VLDX_DUP(int, 32, 2, X); \ + DECL_VLDX_DUP(int, 64, 1, X); \ + DECL_VLDX_DUP(uint, 8, 8, X); \ + DECL_VLDX_DUP(uint, 16, 4, X); \ + DECL_VLDX_DUP(uint, 32, 2, X); \ + DECL_VLDX_DUP(uint, 64, 1, X); \ + DECL_VLDX_DUP(poly, 8, 8, X); \ + DECL_VLDX_DUP(poly, 16, 4, X); \ + DECL_VLDX_DUP(float, 32, 2, X) + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define DECL_ALL_VLDX_DUP_FP16(X) \ + DECL_VLDX_DUP(float, 16, 4, X) +#endif + +#define TEST_ALL_VLDX_DUP(X) \ + TEST_VLDX_DUP(, int, s, 8, 8, X); \ + TEST_VLDX_DUP(, int, s, 16, 4, X); \ + TEST_VLDX_DUP(, int, s, 32, 2, X); \ + TEST_VLDX_DUP(, int, s, 64, 1, X); \ + TEST_VLDX_DUP(, uint, u, 8, 8, X); \ + TEST_VLDX_DUP(, uint, u, 16, 4, X); \ + TEST_VLDX_DUP(, uint, u, 32, 2, X); \ + TEST_VLDX_DUP(, uint, u, 64, 1, X); \ + TEST_VLDX_DUP(, poly, p, 8, 8, X); \ + TEST_VLDX_DUP(, poly, p, 16, 4, X); \ + TEST_VLDX_DUP(, float, f, 32, 2, X) + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define TEST_ALL_VLDX_DUP_FP16(X) \ + TEST_VLDX_DUP(, float, f, 16, 4, X) +#endif + +#define TEST_ALL_EXTRA_CHUNKS(X, Y) \ + TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(int, 64, 1, X, Y); \ + TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \ + TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 2, X, Y) + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define TEST_ALL_EXTRA_CHUNKS_FP16(X, Y) \ + TEST_EXTRA_CHUNK(float, 16, 4, X, Y) +#endif + + + DECL_ALL_VLDX_DUP(2); + DECL_ALL_VLDX_DUP(3); + DECL_ALL_VLDX_DUP(4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_ALL_VLDX_DUP_FP16(2); + DECL_ALL_VLDX_DUP_FP16(3); + DECL_ALL_VLDX_DUP_FP16(4); +#endif + + /* Check vld2_dup/vld2q_dup */ + clean_results (); +#define TEST_MSG "VLD2_DUP/VLD2Q_DUP" + TEST_ALL_VLDX_DUP(2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_VLDX_DUP_FP16(2); +#endif + dump_results_hex2 (TEST_MSG, " chunk 0"); + TEST_ALL_EXTRA_CHUNKS(2, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(2, 1); +#endif + dump_results_hex2 (TEST_MSG, " chunk 1"); + + /* Check vld3_dup/vld3q_dup */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VLD3_DUP/VLD3Q_DUP" + TEST_ALL_VLDX_DUP(3); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_VLDX_DUP_FP16(3); +#endif + dump_results_hex2 (TEST_MSG, " chunk 0"); + TEST_ALL_EXTRA_CHUNKS(3, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(3, 1); +#endif + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(3, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(3, 2); +#endif + dump_results_hex2 (TEST_MSG, " chunk 2"); + + /* Check vld4_dup/vld4q_dup */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VLD4_DUP/VLD4Q_DUP" + TEST_ALL_VLDX_DUP(4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_VLDX_DUP_FP16(4); +#endif + dump_results_hex2 (TEST_MSG, " chunk 0"); + TEST_ALL_EXTRA_CHUNKS(4, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(4, 1); +#endif + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(4, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(4, 2); +#endif + dump_results_hex2 (TEST_MSG, " chunk 2"); + TEST_ALL_EXTRA_CHUNKS(4, 3); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(4, 3); +#endif + dump_results_hex2 (TEST_MSG, " chunk 3"); +} diff --git a/ref_vldX_lane.c b/ref_vldX_lane.c new file mode 100644 index 0000000..7a74232 --- /dev/null +++ b/ref_vldX_lane.c @@ -0,0 +1,243 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vldX_lane (void) +{ + /* In this case, input variables are arrays of vectors */ +#define DECL_VLDX_LANE(T1, W, N, X) \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \ + VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] + + /* We need to use a temporary result buffer (result_bis), because + the one used for other tests is not large enough. A subset of the + result data is moved from result_bis to result, and it is this + subset which is used to check the actual behaviour. The next + macro enables to move another chunk of data from result_bis to + result. */ +#define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L) \ + memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \ + sizeof(VECT_VAR(buffer_src, T1, W, N))); \ + \ + VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \ + vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \ + \ + VECT_ARRAY_VAR(vector, T1, W, N, X) = \ + /* Use dedicated init buffer, of size X */ \ + vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \ + VECT_ARRAY_VAR(vector_src, T1, W, N, X), \ + L); \ + vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ + VECT_ARRAY_VAR(vector, T1, W, N, X)); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ + sizeof(VECT_VAR(result, T1, W, N))) + + /* Overwrite "result" with the contents of "result_bis"[Y] */ +#define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \ + memcpy(VECT_VAR(result, T1, W, N), \ + &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* We need all variants in 64 bits, but there is no 64x2 variant */ +#define DECL_ALL_VLDX_LANE(X) \ + DECL_VLDX_LANE(int, 8, 8, X); \ + DECL_VLDX_LANE(int, 16, 4, X); \ + DECL_VLDX_LANE(int, 32, 2, X); \ + DECL_VLDX_LANE(uint, 8, 8, X); \ + DECL_VLDX_LANE(uint, 16, 4, X); \ + DECL_VLDX_LANE(uint, 32, 2, X); \ + DECL_VLDX_LANE(poly, 8, 8, X); \ + DECL_VLDX_LANE(poly, 16, 4, X); \ + DECL_VLDX_LANE(int, 16, 8, X); \ + DECL_VLDX_LANE(int, 32, 4, X); \ + DECL_VLDX_LANE(uint, 16, 8, X); \ + DECL_VLDX_LANE(uint, 32, 4, X); \ + DECL_VLDX_LANE(poly, 16, 8, X); \ + DECL_VLDX_LANE(float, 32, 2, X); \ + DECL_VLDX_LANE(float, 32, 4, X) + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define DECL_ALL_VLDX_LANE_FP16(X) \ + DECL_VLDX_LANE(float, 16, 4, X); \ + DECL_VLDX_LANE(float, 16, 8, X) +#endif + + /* Add some padding to try to catch out of bound accesses. */ + /* Use an array instead of a plain char to comply with rvct + constraints. */ +#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42} +#define DUMMY_ARRAY(V, T, W, N, L) \ + VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \ + ARRAY1(V##_pad,T,W,N) + + /* Use the same lanes regardless of the size of the array (X), for + simplicity */ +#define TEST_ALL_VLDX_LANE(X) \ + TEST_VLDX_LANE(, int, s, 8, 8, X, 7); \ + TEST_VLDX_LANE(, int, s, 16, 4, X, 2); \ + TEST_VLDX_LANE(, int, s, 32, 2, X, 0); \ + TEST_VLDX_LANE(, uint, u, 8, 8, X, 4); \ + TEST_VLDX_LANE(, uint, u, 16, 4, X, 3); \ + TEST_VLDX_LANE(, uint, u, 32, 2, X, 1); \ + TEST_VLDX_LANE(, poly, p, 8, 8, X, 4); \ + TEST_VLDX_LANE(, poly, p, 16, 4, X, 3); \ + TEST_VLDX_LANE(q, int, s, 16, 8, X, 6); \ + TEST_VLDX_LANE(q, int, s, 32, 4, X, 2); \ + TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5); \ + TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0); \ + TEST_VLDX_LANE(q, poly, p, 16, 8, X, 5); \ + TEST_VLDX_LANE(, float, f, 32, 2, X, 0); \ + TEST_VLDX_LANE(q, float, f, 32, 4, X, 2) + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define TEST_ALL_VLDX_LANE_FP16(X) \ + TEST_VLDX_LANE(, float, f, 16, 4, X, 0); \ + TEST_VLDX_LANE(q, float, f, 16, 8, X, 2) +#endif + +#define TEST_ALL_EXTRA_CHUNKS(X, Y) \ + TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \ + TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 4, X, Y) + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define TEST_ALL_EXTRA_CHUNKS_FP16(X, Y) \ + TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(float, 16, 8, X, Y) +#endif + + /* Declare the temporary buffers / variables */ + DECL_ALL_VLDX_LANE(2); + DECL_ALL_VLDX_LANE(3); + DECL_ALL_VLDX_LANE(4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_ALL_VLDX_LANE_FP16(2); + DECL_ALL_VLDX_LANE_FP16(3); + DECL_ALL_VLDX_LANE_FP16(4); +#endif + + /* Define dummy input arrays, large enough for x4 vectors */ + DUMMY_ARRAY(buffer_src, int, 8, 8, 4); + DUMMY_ARRAY(buffer_src, int, 16, 4, 4); + DUMMY_ARRAY(buffer_src, int, 32, 2, 4); + DUMMY_ARRAY(buffer_src, uint, 8, 8, 4); + DUMMY_ARRAY(buffer_src, uint, 16, 4, 4); + DUMMY_ARRAY(buffer_src, uint, 32, 2, 4); + DUMMY_ARRAY(buffer_src, poly, 8, 8, 4); + DUMMY_ARRAY(buffer_src, poly, 16, 4, 4); + DUMMY_ARRAY(buffer_src, int, 16, 8, 4); + DUMMY_ARRAY(buffer_src, int, 32, 4, 4); + DUMMY_ARRAY(buffer_src, uint, 16, 8, 4); + DUMMY_ARRAY(buffer_src, uint, 32, 4, 4); + DUMMY_ARRAY(buffer_src, poly, 16, 8, 4); + DUMMY_ARRAY(buffer_src, float, 32, 2, 4); + DUMMY_ARRAY(buffer_src, float, 32, 4, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DUMMY_ARRAY(buffer_src, float, 16, 4, 4); + DUMMY_ARRAY(buffer_src, float, 16, 8, 4); +#endif + + /* Check vld2_lane/vld2q_lane */ + clean_results (); +#define TEST_MSG "VLD2_LANE/VLD2Q_LANE" + TEST_ALL_VLDX_LANE(2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_VLDX_LANE_FP16(2); +#endif + dump_results_hex2 (TEST_MSG, " chunk 0"); + TEST_ALL_EXTRA_CHUNKS(2, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(2, 1); +#endif + dump_results_hex2 (TEST_MSG, " chunk 1"); + + /* Check vld3_lane/vld3q_lane */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VLD3_LANE/VLD3Q_LANE" + TEST_ALL_VLDX_LANE(3); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_VLDX_LANE_FP16(3); +#endif + dump_results_hex2 (TEST_MSG, " chunk 0"); + TEST_ALL_EXTRA_CHUNKS(3, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(3, 1); +#endif + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(3, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(3, 2); +#endif + dump_results_hex2 (TEST_MSG, " chunk 2"); + + /* Check vld4_lane/vld4q_lane */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VLD4_LANE/VLD4Q_LANE" + TEST_ALL_VLDX_LANE(4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_VLDX_LANE_FP16(4); +#endif + dump_results_hex2 (TEST_MSG, " chunk 0"); + TEST_ALL_EXTRA_CHUNKS(4, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(4, 1); +#endif + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(4, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(4, 2); +#endif + dump_results_hex2 (TEST_MSG, " chunk 2"); + TEST_ALL_EXTRA_CHUNKS(4, 3); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(4, 3); +#endif + dump_results_hex2 (TEST_MSG, " chunk 3"); +} diff --git a/ref_vmax.c b/ref_vmax.c new file mode 100644 index 0000000..d0086e5 --- /dev/null +++ b/ref_vmax.c @@ -0,0 +1,153 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" +#include <math.h> + +#ifndef INSN_NAME +#define INSN_NAME vmax +#define TEST_MSG "VMAX/VMAXQ" +#endif + +/* Can't use the standard ref_v_binary_op.c template because vmax has + no 64 bits variant */ +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + int i; + + /* Basic test: y=OP(x,x), then store the result. */ +#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#ifndef NO_FLOAT_VARIANT + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); +#endif + + /* Choose init value arbitrarily, will be used as comparison value */ + VDUP(vector2, , int, s, 8, 8, -13); + VDUP(vector2, , int, s, 16, 4, -14); + VDUP(vector2, , int, s, 32, 2, -16); + VDUP(vector2, , uint, u, 8, 8, 0xf3); + VDUP(vector2, , uint, u, 16, 4, 0xfff1); + VDUP(vector2, , uint, u, 32, 2, 0xfffffff0); + VDUP(vector2, q, int, s, 8, 16, -12); + VDUP(vector2, q, int, s, 16, 8, -13); + VDUP(vector2, q, int, s, 32, 4, -15); + VDUP(vector2, q, uint, u, 8, 16, 0xf9); + VDUP(vector2, q, uint, u, 16, 8, 0xfff2); + VDUP(vector2, q, uint, u, 32, 4, 0xfffffff1); +#ifndef NO_FLOAT_VARIANT + VDUP(vector2, , float, f, 32, 2, -15.5f); + VDUP(vector2, q, float, f, 32, 4, -14.5f); +#endif + +#ifndef NO_FLOAT_VARIANT +#define FLOAT_VARIANT(MACRO, VAR) \ + MACRO(VAR, , float, f, 32, 2); \ + MACRO(VAR, q, float, f, 32, 4) +#else +#define FLOAT_VARIANT(MACRO, VAR) +#endif + +#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR) \ + MACRO(VAR, , int, s, 8, 8); \ + MACRO(VAR, , int, s, 16, 4); \ + MACRO(VAR, , int, s, 32, 2); \ + MACRO(VAR, , uint, u, 8, 8); \ + MACRO(VAR, , uint, u, 16, 4); \ + MACRO(VAR, , uint, u, 32, 2); \ + MACRO(VAR, q, int, s, 8, 16); \ + MACRO(VAR, q, int, s, 16, 8); \ + MACRO(VAR, q, int, s, 32, 4); \ + MACRO(VAR, q, uint, u, 8, 16); \ + MACRO(VAR, q, uint, u, 16, 8); \ + MACRO(VAR, q, uint, u, 32, 4); \ + FLOAT_VARIANT(MACRO, VAR) + + /* Apply a binary operator named INSN_NAME */ + TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME); + + dump_results_hex (TEST_MSG); + + +#ifndef NO_FLOAT_VARIANT + /* Extra FP tests with special values (NaN, ....) */ + VDUP(vector, q, float, f, 32, 4, 1.0f); + VDUP(vector2, q, float, f, 32, 4, NAN); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + DUMP_FP(TEST_MSG " FP special (NaN)", float, 32, 4, PRIx32); + + VDUP(vector, q, float, f, 32, 4, -NAN); + VDUP(vector2, q, float, f, 32, 4, 1.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + DUMP_FP(TEST_MSG " FP special (-NaN)", float, 32, 4, PRIx32); + + VDUP(vector, q, float, f, 32, 4, 1.0f); + VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + DUMP_FP(TEST_MSG " FP special (inf)", float, 32, 4, PRIx32); + + VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); + VDUP(vector2, q, float, f, 32, 4, 1.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + DUMP_FP(TEST_MSG " FP special (-inf)", float, 32, 4, PRIx32); + + VDUP(vector, q, float, f, 32, 4, 0.0f); + VDUP(vector2, q, float, f, 32, 4, -0.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + DUMP_FP(TEST_MSG " FP special (-0.0)", float, 32, 4, PRIx32); + + VDUP(vector, q, float, f, 32, 4, -0.0f); + VDUP(vector2, q, float, f, 32, 4, 0.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + DUMP_FP(TEST_MSG " FP special (-0.0)", float, 32, 4, PRIx32); +#endif +} diff --git a/ref_vmin.c b/ref_vmin.c new file mode 100644 index 0000000..496e3ae --- /dev/null +++ b/ref_vmin.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmin +#define TEST_MSG "VMIN/VMINQ" + +#include "ref_vmax.c" diff --git a/ref_vmla.c b/ref_vmla.c new file mode 100644 index 0000000..fd4106b --- /dev/null +++ b/ref_vmla.c @@ -0,0 +1,144 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmla +#define TEST_MSG "VMLA" +#endif + + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ +#define DECL_VMLX(T, W, N) \ + DECL_VARIABLE(vector1, T, W, N); \ + DECL_VARIABLE(vector2, T, W, N); \ + DECL_VARIABLE(vector3, T, W, N); \ + DECL_VARIABLE(vector_res, T, W, N) + + /* vector_res = OP(vector, vector3, vector4), + then store the result. */ +#define TEST_VMLX1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + VECT_VAR(vector3, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLX(INSN, Q, T1, T2, W, N) \ + TEST_VMLX1(INSN, Q, T1, T2, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VMLX(int, 8, 8); + DECL_VMLX(int, 16, 4); + DECL_VMLX(int, 32, 2); + DECL_VMLX(uint, 8, 8); + DECL_VMLX(uint, 16, 4); + DECL_VMLX(uint, 32, 2); + DECL_VMLX(float, 32, 2); + DECL_VMLX(int, 8, 16); + DECL_VMLX(int, 16, 8); + DECL_VMLX(int, 32, 4); + DECL_VMLX(uint, 8, 16); + DECL_VMLX(uint, 16, 8); + DECL_VMLX(uint, 32, 4); + DECL_VMLX(float, 32, 4); + + clean_results (); + + VLOAD(vector1, buffer, , int, s, 8, 8); + VLOAD(vector1, buffer, , int, s, 16, 4); + VLOAD(vector1, buffer, , int, s, 32, 2); + VLOAD(vector1, buffer, , uint, u, 8, 8); + VLOAD(vector1, buffer, , uint, u, 16, 4); + VLOAD(vector1, buffer, , uint, u, 32, 2); + VLOAD(vector1, buffer, , float, f, 32, 2); + VLOAD(vector1, buffer, q, int, s, 8, 16); + VLOAD(vector1, buffer, q, int, s, 16, 8); + VLOAD(vector1, buffer, q, int, s, 32, 4); + VLOAD(vector1, buffer, q, uint, u, 8, 16); + VLOAD(vector1, buffer, q, uint, u, 16, 8); + VLOAD(vector1, buffer, q, uint, u, 32, 4); + VLOAD(vector1, buffer, q, float, f, 32, 4); + + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , uint, u, 8, 8, 0x44); + VDUP(vector2, , uint, u, 16, 4, 0x55); + VDUP(vector2, , uint, u, 32, 2, 0x66); + VDUP(vector2, , float, f, 32, 2, 33.1f); + VDUP(vector2, q, int, s, 8, 16, 0x77); + VDUP(vector2, q, int, s, 16, 8, 0x88); + VDUP(vector2, q, int, s, 32, 4, 0x99); + VDUP(vector2, q, uint, u, 8, 16, 0xAA); + VDUP(vector2, q, uint, u, 16, 8, 0xBB); + VDUP(vector2, q, uint, u, 32, 4, 0xCC); + VDUP(vector2, q, float, f, 32, 4, 99.2f); + + VDUP(vector3, , int, s, 8, 8, 0xFF); + VDUP(vector3, , int, s, 16, 4, 0xEE); + VDUP(vector3, , int, s, 32, 2, 0xDD); + VDUP(vector3, , uint, u, 8, 8, 0xCC); + VDUP(vector3, , uint, u, 16, 4, 0xBB); + VDUP(vector3, , uint, u, 32, 2, 0xAA); + VDUP(vector3, , float, f, 32, 2, 10.23f); + VDUP(vector3, q, int, s, 8, 16, 0x99); + VDUP(vector3, q, int, s, 16, 8, 0x88); + VDUP(vector3, q, int, s, 32, 4, 0x77); + VDUP(vector3, q, uint, u, 8, 16, 0x66); + VDUP(vector3, q, uint, u, 16, 8, 0x55); + VDUP(vector3, q, uint, u, 32, 4, 0x44); + VDUP(vector3, q, float, f, 32, 4, 77.8f); + + TEST_VMLX(INSN_NAME, , int, s, 8, 8); + TEST_VMLX(INSN_NAME, , int, s, 16, 4); + TEST_VMLX(INSN_NAME, , int, s, 32, 2); + TEST_VMLX(INSN_NAME, , uint, u, 8, 8); + TEST_VMLX(INSN_NAME, , uint, u, 16, 4); + TEST_VMLX(INSN_NAME, , uint, u, 32, 2); + TEST_VMLX(INSN_NAME, , float, f, 32, 2); + TEST_VMLX(INSN_NAME, q, int, s, 8, 16); + TEST_VMLX(INSN_NAME, q, int, s, 16, 8); + TEST_VMLX(INSN_NAME, q, int, s, 32, 4); + TEST_VMLX(INSN_NAME, q, uint, u, 8, 16); + TEST_VMLX(INSN_NAME, q, uint, u, 16, 8); + TEST_VMLX(INSN_NAME, q, uint, u, 32, 4); + TEST_VMLX(INSN_NAME, q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmla_lane.c b/ref_vmla_lane.c new file mode 100644 index 0000000..290710c --- /dev/null +++ b/ref_vmla_lane.c @@ -0,0 +1,125 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmla +#define TEST_MSG "VMLA_LANE" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME ##_lane (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ +#define DECL_VMLX_LANE(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + /* vector_res = vmlx_lane(vector, vector2, vector3, lane), + then store the result. */ +#define TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + VECT_VAR(vector3, T1, W, N2), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLX_LANE(INSN, Q, T1, T2, W, N, N2, V) \ + TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VMLX_LANE(vector); + DECL_VMLX_LANE(vector2); + DECL_VMLX_LANE(vector_res); + + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector3, uint, 16, 4); + DECL_VARIABLE(vector3, uint, 32, 2); + DECL_VARIABLE(vector3, float, 32, 2); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x55); + VDUP(vector2, , uint, u, 32, 2, 0x55); + VDUP(vector2, , float, f, 32, 2, 55.3f); + VDUP(vector2, q, int, s, 16, 8, 0x55); + VDUP(vector2, q, int, s, 32, 4, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x55); + VDUP(vector2, q, uint, u, 32, 4, 0x55); + VDUP(vector2, q, float, f, 32, 4, 55.8f); + + VDUP(vector3, , int, s, 16, 4, 0xBB); + VDUP(vector3, , int, s, 32, 2, 0xBB); + VDUP(vector3, , uint, u, 16, 4, 0xBB); + VDUP(vector3, , uint, u, 32, 2, 0xBB); + VDUP(vector3, , float, f, 32, 2, 11.34f); + + /* Choose lane arbitrarily */ + TEST_VMLX_LANE(INSN_NAME, , int, s, 16, 4, 4, 2); + TEST_VMLX_LANE(INSN_NAME, , int, s, 32, 2, 2, 1); + TEST_VMLX_LANE(INSN_NAME, , uint, u, 16, 4, 4, 2); + TEST_VMLX_LANE(INSN_NAME, , uint, u, 32, 2, 2, 1); + TEST_VMLX_LANE(INSN_NAME, , float, f, 32, 2, 2, 1); + TEST_VMLX_LANE(INSN_NAME, q, int, s, 16, 8, 4, 3); + TEST_VMLX_LANE(INSN_NAME, q, int, s, 32, 4, 2, 1); + TEST_VMLX_LANE(INSN_NAME, q, uint, u, 16, 8, 4, 2); + TEST_VMLX_LANE(INSN_NAME, q, uint, u, 32, 4, 2, 1); + TEST_VMLX_LANE(INSN_NAME, q, float, f, 32, 4, 2, 1); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmla_n.c b/ref_vmla_n.c new file mode 100644 index 0000000..9b9384f --- /dev/null +++ b/ref_vmla_n.c @@ -0,0 +1,112 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmla +#define TEST_MSG "VMLA_N" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME ##_n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ +#define DECL_VMLX_N(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, float, 32, 4); \ + DECL_VARIABLE(VAR, uint, 32, 4) + + /* vector_res = vmlx_n(vector, vector2, val), + then store the result. */ +#define TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLX_N(INSN, Q, T1, T2, W, N, V) \ + TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VMLX_N(vector); + DECL_VMLX_N(vector2); + DECL_VMLX_N(vector_res); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, float, f, 32, 4); + + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x55); + VDUP(vector2, , uint, u, 32, 2, 0x55); + VDUP(vector2, , float, f, 32, 2, 55.2f); + VDUP(vector2, q, int, s, 16, 8, 0x55); + VDUP(vector2, q, int, s, 32, 4, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x55); + VDUP(vector2, q, uint, u, 32, 4, 0x55); + VDUP(vector2, q, float, f, 32, 4, 55.9f); + + /* Choose multiplier arbitrarily */ + TEST_VMLX_N(INSN_NAME, , int, s, 16, 4, 0x11); + TEST_VMLX_N(INSN_NAME, , int, s, 32, 2, 0x22); + TEST_VMLX_N(INSN_NAME, , uint, u, 16, 4, 0x33); + TEST_VMLX_N(INSN_NAME, , uint, u, 32, 2, 0x44); + TEST_VMLX_N(INSN_NAME, , float, f, 32, 2, 22.3f); + TEST_VMLX_N(INSN_NAME, q, int, s, 16, 8, 0x55); + TEST_VMLX_N(INSN_NAME, q, int, s, 32, 4, 0x66); + TEST_VMLX_N(INSN_NAME, q, uint, u, 16, 8, 0x77); + TEST_VMLX_N(INSN_NAME, q, uint, u, 32, 4, 0x88); + TEST_VMLX_N(INSN_NAME, q, float, f, 32, 4, 66.7f); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmlal.c b/ref_vmlal.c new file mode 100644 index 0000000..bf0b03b --- /dev/null +++ b/ref_vmlal.c @@ -0,0 +1,119 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmlal +#define TEST_MSG "VMLAL" +#endif + + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = OP(vector, vector3, vector4), + then store the result. */ +#define TEST_VMLXL1(INSN, T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + VECT_VAR(vector4, T1, W2, N)); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLXL(INSN, T1, T2, W, W2, N) \ + TEST_VMLXL1(INSN, T1, T2, W, W2, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector3, int, 8, 8); + DECL_VARIABLE(vector4, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 8); + + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector3, uint, 8, 8); + DECL_VARIABLE(vector4, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 8); + + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector3, uint, 16, 4); + DECL_VARIABLE(vector4, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 4); + + DECL_VARIABLE(vector, uint, 64, 2); + DECL_VARIABLE(vector3, uint, 32, 2); + DECL_VARIABLE(vector4, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + VDUP(vector3, , int, s, 8, 8, 0x55); + VDUP(vector4, , int, s, 8, 8, 0xBB); + VDUP(vector3, , int, s, 16, 4, 0x55); + VDUP(vector4, , int, s, 16, 4, 0xBB); + VDUP(vector3, , int, s, 32, 2, 0x55); + VDUP(vector4, , int, s, 32, 2, 0xBB); + VDUP(vector3, , uint, u, 8, 8, 0x55); + VDUP(vector4, , uint, u, 8, 8, 0xBB); + VDUP(vector3, , uint, u, 16, 4, 0x55); + VDUP(vector4, , uint, u, 16, 4, 0xBB); + VDUP(vector3, , uint, u, 32, 2, 0x55); + VDUP(vector4, , uint, u, 32, 2, 0xBB); + + TEST_VMLXL(INSN_NAME, int, s, 16, 8, 8); + TEST_VMLXL(INSN_NAME, int, s, 32, 16, 4); + TEST_VMLXL(INSN_NAME, int, s, 64, 32, 2); + TEST_VMLXL(INSN_NAME, uint, u, 16, 8, 8); + TEST_VMLXL(INSN_NAME, uint, u, 32, 16, 4); + TEST_VMLXL(INSN_NAME, uint, u, 64, 32, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmlal_lane.c b/ref_vmlal_lane.c new file mode 100644 index 0000000..c276fa8 --- /dev/null +++ b/ref_vmlal_lane.c @@ -0,0 +1,101 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmlal_lane +#define TEST_MSG "VMLAL_LANE" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = vmlxl_lane(vector, vector3, vector4, lane), + then store the result. */ +#define TEST_VMLXL_LANE1(INSN, T1, T2, W, W2, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + VECT_VAR(vector4, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLXL_LANE(INSN, T1, T2, W, W2, N, V) \ + TEST_VMLXL_LANE1(INSN, T1, T2, W, W2, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector3, uint, 16, 4); + DECL_VARIABLE(vector4, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 4); + + DECL_VARIABLE(vector, uint, 64, 2); + DECL_VARIABLE(vector3, uint, 32, 2); + DECL_VARIABLE(vector4, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + VDUP(vector3, , int, s, 16, 4, 0x55); + VDUP(vector4, , int, s, 16, 4, 0xBB); + VDUP(vector3, , int, s, 32, 2, 0x55); + VDUP(vector4, , int, s, 32, 2, 0xBB); + VDUP(vector3, , uint, u, 16, 4, 0x55); + VDUP(vector4, , uint, u, 16, 4, 0xBB); + VDUP(vector3, , uint, u, 32, 2, 0x55); + VDUP(vector4, , uint, u, 32, 2, 0xBB); + + TEST_VMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 2); + TEST_VMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 1); + TEST_VMLXL_LANE(INSN_NAME, uint, u, 32, 16, 4, 2); + TEST_VMLXL_LANE(INSN_NAME, uint, u, 64, 32, 2, 1); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmlal_n.c b/ref_vmlal_n.c new file mode 100644 index 0000000..45979c7 --- /dev/null +++ b/ref_vmlal_n.c @@ -0,0 +1,92 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmlal_n +#define TEST_MSG "VMLAL_N" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = vmlxl_n(vector, vector2, val), + then store the result. */ +#define TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V) \ + VECT_VAR(vector_res, T1, W, N) = INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLXL_N(INSN, T1, T2, W, W2, N, V) \ + TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 4); + + DECL_VARIABLE(vector, uint, 64, 2); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x55); + VDUP(vector2, , uint, u, 32, 2, 0x55); + + /* Choose multiplier arbitrarily */ + TEST_VMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x11); + TEST_VMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x22); + TEST_VMLXL_N(INSN_NAME, uint, u, 32, 16, 4, 0x33); + TEST_VMLXL_N(INSN_NAME, uint, u, 64, 32, 2, 0x33); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmls.c b/ref_vmls.c new file mode 100644 index 0000000..9eacdb2 --- /dev/null +++ b/ref_vmls.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmls +#define TEST_MSG "VMLS" + +#include "ref_vmla.c" diff --git a/ref_vmls_lane.c b/ref_vmls_lane.c new file mode 100644 index 0000000..68cce02 --- /dev/null +++ b/ref_vmls_lane.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmls +#define TEST_MSG "VMLS_LANE" + +#include "ref_vmla_lane.c" diff --git a/ref_vmls_n.c b/ref_vmls_n.c new file mode 100644 index 0000000..050a4b9 --- /dev/null +++ b/ref_vmls_n.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmls +#define TEST_MSG "VMLS_N" + +#include "ref_vmla_n.c" diff --git a/ref_vmlsl.c b/ref_vmlsl.c new file mode 100644 index 0000000..22b11aa --- /dev/null +++ b/ref_vmlsl.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmlsl +#define TEST_MSG "VMLSL" + +#include "ref_vmlal.c" diff --git a/ref_vmlsl_lane.c b/ref_vmlsl_lane.c new file mode 100644 index 0000000..a972b93 --- /dev/null +++ b/ref_vmlsl_lane.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmlsl_lane +#define TEST_MSG "VMLSL_LANE" + +#include "ref_vmlal_lane.c" diff --git a/ref_vmlsl_n.c b/ref_vmlsl_n.c new file mode 100644 index 0000000..b7f7000 --- /dev/null +++ b/ref_vmlsl_n.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmlsl_n +#define TEST_MSG "VMLSL_N" + +#include "ref_vmlal_n.c" diff --git a/ref_vmovl.c b/ref_vmovl.c new file mode 100644 index 0000000..af48791 --- /dev/null +++ b/ref_vmovl.c @@ -0,0 +1,60 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VMOVL" +void exec_vmovl (void) +{ + /* Basic test: vec128=vmovl(vec64), then store the result. */ +#define TEST_VMOVL(T1, T2, W, W2, N) \ + VECT_VAR(vector128, T1, W2, N) = \ + vmovl_##T2##W(VECT_VAR(vector64, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector128, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector128); + + TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector64, buffer); + + clean_results (); + + TEST_VMOVL(int, s, 8, 16, 8); + TEST_VMOVL(int, s, 16, 32, 4); + TEST_VMOVL(int, s, 32, 64, 2); + TEST_VMOVL(uint, u, 8, 16, 8); + TEST_VMOVL(uint, u, 16, 32, 4); + TEST_VMOVL(uint, u, 32, 64, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmovn.c b/ref_vmovn.c new file mode 100644 index 0000000..24aa0af --- /dev/null +++ b/ref_vmovn.c @@ -0,0 +1,60 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VMOVN" +void exec_vmovn (void) +{ + /* Basic test: vec64=vmovn(vec128), then store the result. */ +#define TEST_VMOVN(T1, T2, W, W2, N) \ + VECT_VAR(vector64, T1, W2, N) = \ + vmovn_##T2##W(VECT_VAR(vector128, T1, W, N)); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector128); + + TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer); + + clean_results (); + + TEST_VMOVN(int, s, 16, 8, 8); + TEST_VMOVN(int, s, 32, 16, 4); + TEST_VMOVN(int, s, 64, 32, 2); + TEST_VMOVN(uint, u, 16, 8, 8); + TEST_VMOVN(uint, u, 32, 16, 4); + TEST_VMOVN(uint, u, 64, 32, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmul.c b/ref_vmul.c new file mode 100644 index 0000000..c7b1814 --- /dev/null +++ b/ref_vmul.c @@ -0,0 +1,134 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmul +#define TEST_MSG "VMUL" +#endif + + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ +#define DECL_VMUL(T, W, N) \ + DECL_VARIABLE(vector1, T, W, N); \ + DECL_VARIABLE(vector2, T, W, N); \ + DECL_VARIABLE(vector_res, T, W, N) + + /* vector_res = OP(vector1, vector2), then store the result. */ +#define TEST_VMUL1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMUL(INSN, Q, T1, T2, W, N) \ + TEST_VMUL1(INSN, Q, T1, T2, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VMUL(int, 8, 8); + DECL_VMUL(int, 16, 4); + DECL_VMUL(int, 32, 2); + DECL_VMUL(uint, 8, 8); + DECL_VMUL(uint, 16, 4); + DECL_VMUL(uint, 32, 2); + DECL_VMUL(poly, 8, 8); + DECL_VMUL(float, 32, 2); + DECL_VMUL(int, 8, 16); + DECL_VMUL(int, 16, 8); + DECL_VMUL(int, 32, 4); + DECL_VMUL(uint, 8, 16); + DECL_VMUL(uint, 16, 8); + DECL_VMUL(uint, 32, 4); + DECL_VMUL(poly, 8, 16); + DECL_VMUL(float, 32, 4); + + clean_results (); + + VLOAD(vector1, buffer, , int, s, 8, 8); + VLOAD(vector1, buffer, , int, s, 16, 4); + VLOAD(vector1, buffer, , int, s, 32, 2); + VLOAD(vector1, buffer, , uint, u, 8, 8); + VLOAD(vector1, buffer, , uint, u, 16, 4); + VLOAD(vector1, buffer, , uint, u, 32, 2); + VLOAD(vector1, buffer, , poly, p, 8, 8); + VLOAD(vector1, buffer, , float, f, 32, 2); + VLOAD(vector1, buffer, q, int, s, 8, 16); + VLOAD(vector1, buffer, q, int, s, 16, 8); + VLOAD(vector1, buffer, q, int, s, 32, 4); + VLOAD(vector1, buffer, q, uint, u, 8, 16); + VLOAD(vector1, buffer, q, uint, u, 16, 8); + VLOAD(vector1, buffer, q, uint, u, 32, 4); + VLOAD(vector1, buffer, q, poly, p, 8, 16); + VLOAD(vector1, buffer, q, float, f, 32, 4); + + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , uint, u, 8, 8, 0x44); + VDUP(vector2, , uint, u, 16, 4, 0x55); + VDUP(vector2, , uint, u, 32, 2, 0x66); + VDUP(vector2, , poly, p, 8, 8, 0x44); + VDUP(vector2, , float, f, 32, 2, 33.3f); + VDUP(vector2, q, int, s, 8, 16, 0x77); + VDUP(vector2, q, int, s, 16, 8, 0x88); + VDUP(vector2, q, int, s, 32, 4, 0x99); + VDUP(vector2, q, uint, u, 8, 16, 0xAA); + VDUP(vector2, q, uint, u, 16, 8, 0xBB); + VDUP(vector2, q, uint, u, 32, 4, 0xCC); + VDUP(vector2, q, poly, p, 8, 16, 0xAA); + VDUP(vector2, q, float, f, 32, 4, 99.6f); + + TEST_VMUL(INSN_NAME, , int, s, 8, 8); + TEST_VMUL(INSN_NAME, , int, s, 16, 4); + TEST_VMUL(INSN_NAME, , int, s, 32, 2); + TEST_VMUL(INSN_NAME, , uint, u, 8, 8); + TEST_VMUL(INSN_NAME, , uint, u, 16, 4); + TEST_VMUL(INSN_NAME, , uint, u, 32, 2); + TEST_VMUL(INSN_NAME, , poly, p, 8, 8); + TEST_VMUL(INSN_NAME, , float, f, 32, 2); + TEST_VMUL(INSN_NAME, q, int, s, 8, 16); + TEST_VMUL(INSN_NAME, q, int, s, 16, 8); + TEST_VMUL(INSN_NAME, q, int, s, 32, 4); + TEST_VMUL(INSN_NAME, q, uint, u, 8, 16); + TEST_VMUL(INSN_NAME, q, uint, u, 16, 8); + TEST_VMUL(INSN_NAME, q, uint, u, 32, 4); + TEST_VMUL(INSN_NAME, q, poly, p, 8, 16); + TEST_VMUL(INSN_NAME, q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmul_lane.c b/ref_vmul_lane.c new file mode 100644 index 0000000..f67d68e --- /dev/null +++ b/ref_vmul_lane.c @@ -0,0 +1,105 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VMUL_LANE" +void exec_vmul_lane (void) +{ +#define DECL_VMUL(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + /* vector_res = vmul_lane(vector,vector2,lane), then store the result. */ +#define TEST_VMUL_LANE(Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vector_res, T1, W, N) = \ + vmul##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N2), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VMUL(vector); + DECL_VMUL(vector_res); + + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector2, float, 32, 2); + + clean_results (); + + /* Initialize vector from pre-initialized values */ + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Initialize vector2 */ + VDUP(vector2, , int, s, 16, 4, 0x4); + VDUP(vector2, , int, s, 32, 2, 0x22); + VDUP(vector2, , uint, u, 16, 4, 0x444); + VDUP(vector2, , uint, u, 32, 2, 0x532); + VDUP(vector2, , float, f, 32, 2, 22.8f); + + /* Choose lane arbitrarily */ + TEST_VMUL_LANE(, int, s, 16, 4, 4, 2); + TEST_VMUL_LANE(, int, s, 32, 2, 2, 1); + TEST_VMUL_LANE(, uint, u, 16, 4, 4, 2); + TEST_VMUL_LANE(, uint, u, 32, 2, 2, 1); + TEST_VMUL_LANE(, float, f, 32, 2, 2, 1); + TEST_VMUL_LANE(q, int, s, 16, 8, 4, 2); + TEST_VMUL_LANE(q, int, s, 32, 4, 2, 0); + TEST_VMUL_LANE(q, uint, u, 16, 8, 4, 2); + TEST_VMUL_LANE(q, uint, u, 32, 4, 2, 1); + TEST_VMUL_LANE(q, float, f, 32, 4, 2, 0); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmul_n.c b/ref_vmul_n.c new file mode 100644 index 0000000..1128377 --- /dev/null +++ b/ref_vmul_n.c @@ -0,0 +1,91 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VMUL_N" +void exec_vmul_n (void) +{ +#define DECL_VMUL(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + /* vector_res = vmul_n(vector,val), then store the result. */ +#define TEST_VMUL_N(Q, T1, T2, W, N, L) \ + VECT_VAR(vector_res, T1, W, N) = \ + vmul##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VMUL(vector); + DECL_VMUL(vector_res); + + clean_results (); + + /* Initialize vector from pre-initialized values */ + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose multiplier arbitrarily */ + TEST_VMUL_N(, int, s, 16, 4, 0x11); + TEST_VMUL_N(, int, s, 32, 2, 0x22); + TEST_VMUL_N(, uint, u, 16, 4, 0x33); + TEST_VMUL_N(, uint, u, 32, 2, 0x44); + TEST_VMUL_N(, float, f, 32, 2, 22.3f); + TEST_VMUL_N(q, int, s, 16, 8, 0x55); + TEST_VMUL_N(q, int, s, 32, 4, 0x66); + TEST_VMUL_N(q, uint, u, 16, 8, 0x77); + TEST_VMUL_N(q, uint, u, 32, 4, 0x88); + TEST_VMUL_N(q, float, f, 32, 4, 88.9f); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmull.c b/ref_vmull.c new file mode 100644 index 0000000..6fc5be6 --- /dev/null +++ b/ref_vmull.c @@ -0,0 +1,81 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VMULL" +void exec_vmull (void) +{ + /* Basic test: y=vmull(x,x), then store the result. */ +#define TEST_VMULL(T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vmull_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, poly, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + DECL_VARIABLE(vector_res, poly, 16, 8); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 8, 8); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , poly, p, 8, 8); + + TEST_VMULL(int, s, 8, 16, 8); + TEST_VMULL(int, s, 16, 32, 4); + TEST_VMULL(int, s, 32, 64, 2); + TEST_VMULL(uint, u, 8, 16, 8); + TEST_VMULL(uint, u, 16, 32, 4); + TEST_VMULL(uint, u, 32, 64, 2); + TEST_VMULL(poly, p, 8, 16, 8); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmull_lane.c b/ref_vmull_lane.c new file mode 100644 index 0000000..769abc6 --- /dev/null +++ b/ref_vmull_lane.c @@ -0,0 +1,84 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VMULL_LANE" +void exec_vmull_lane (void) +{ + /* vector_res = vmull_lane(vector,vector2,lane), then store the result. */ +#define TEST_VMULL_LANE(T1, T2, W, W2, N, L) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vmull##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize vector */ + VDUP(vector, , int, s, 16, 4, 0x1000); + VDUP(vector, , int, s, 32, 2, 0x1000); + VDUP(vector, , uint, u, 16, 4, 0x1000); + VDUP(vector, , uint, u, 32, 2, 0x1000); + + /* Initialize vector2 */ + VDUP(vector2, , int, s, 16, 4, 0x4); + VDUP(vector2, , int, s, 32, 2, 0x2); + VDUP(vector2, , uint, u, 16, 4, 0x4); + VDUP(vector2, , uint, u, 32, 2, 0x2); + + /* Choose lane arbitrarily */ + TEST_VMULL_LANE(int, s, 16, 32, 4, 2); + TEST_VMULL_LANE(int, s, 32, 64, 2, 1); + TEST_VMULL_LANE(uint, u, 16, 32, 4, 2); + TEST_VMULL_LANE(uint, u, 32, 64, 2, 1); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmull_n.c b/ref_vmull_n.c new file mode 100644 index 0000000..200280e --- /dev/null +++ b/ref_vmull_n.c @@ -0,0 +1,82 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vmull +#define TEST_MSG "VMULL_N" +void exec_vmull_n (void) +{ + int i; + + /* vector_res = vmull_n(vector,val), then store the result. */ +#define TEST_VMULL_N1(INSN, T1, T2, W, W2, N, L) \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + +#define TEST_VMULL_N(INSN, T1, T2, W, W2, N, L) \ + TEST_VMULL_N1(INSN, T1, T2, W, W2, N, L) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize vector */ + VDUP(vector, , int, s, 16, 4, 0x1000); + VDUP(vector, , int, s, 32, 2, 0x1000); + VDUP(vector, , uint, u, 16, 4, 0x1000); + VDUP(vector, , uint, u, 32, 2, 0x1000); + + /* Choose multiplier arbitrarily */ + TEST_VMULL_N(INSN_NAME, int, s, 16, 32, 4, 0x11); + TEST_VMULL_N(INSN_NAME, int, s, 32, 64, 2, 0x22); + TEST_VMULL_N(INSN_NAME, uint, u, 16, 32, 4, 0x33); + TEST_VMULL_N(INSN_NAME, uint, u, 32, 64, 2, 0x44); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + fprintf(gcc_tests_file, "\n%s output:\n", TEST_MSG); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); +} diff --git a/ref_vmvn.c b/ref_vmvn.c new file mode 100644 index 0000000..5ea6a33 --- /dev/null +++ b/ref_vmvn.c @@ -0,0 +1,120 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vmvn +#define TEST_MSG "VMVN/VMVNQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, poly, 8, 8); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, uint, 8, 16); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, poly, 8, 16); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, poly, 8, 8); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, uint, 8, 16); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, poly, 8, 16); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 8, 8); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , poly, p, 8, 8); + VLOAD(vector, buffer, q, int, s, 8, 16); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 8, 16); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, poly, p, 8, 16); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8); + TEST_UNARY_OP(INSN_NAME, , uint, u, 16, 4); + TEST_UNARY_OP(INSN_NAME, , uint, u, 32, 2); + TEST_UNARY_OP(INSN_NAME, , poly, p, 8, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4); + TEST_UNARY_OP(INSN_NAME, q, poly, p, 8, 16); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vneg.c b/ref_vneg.c new file mode 100644 index 0000000..1eabe20 --- /dev/null +++ b/ref_vneg.c @@ -0,0 +1,54 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vneg +#define TEST_MSG "VNEG/VNEGQ" + +/* Extra tests for functions requiring floating-point types */ +void exec_vneg_f32(void); +#define EXTRA_TESTS exec_vneg_f32 + +#include "ref_v_unary_op.c" + +void exec_vneg_f32(void) +{ + int i; + + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + VDUP(vector, , float, f, 32, 2, 2.3f); + VDUP(vector, q, float, f, 32, 4, 3.4f); + + TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4); + + fprintf(ref_file, "\nfloat32:\n"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vorn.c b/ref_vorn.c new file mode 100644 index 0000000..df9823e --- /dev/null +++ b/ref_vorn.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vorn +#define TEST_MSG "VORN/VORNQ" + +#include "ref_v_binary_op.c" diff --git a/ref_vorr.c b/ref_vorr.c new file mode 100644 index 0000000..0762e10 --- /dev/null +++ b/ref_vorr.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vorr +#define TEST_MSG "VORR/VORRQ" + +#include "ref_v_binary_op.c" diff --git a/ref_vpadal.c b/ref_vpadal.c new file mode 100644 index 0000000..fab5f05 --- /dev/null +++ b/ref_vpadal.c @@ -0,0 +1,140 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vpadal +#define TEST_MSG "VPADAL/VPADALQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_VPADAL1(INSN, Q, T1, T2, W, N, W2, N2) \ + VECT_VAR(vector_res, T1, W2, N2) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W2, N2), VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W2(VECT_VAR(result, T1, W2, N2), \ + VECT_VAR(vector_res, T1, W2, N2)) + +#define TEST_VPADAL(INSN, Q, T1, T2, W, N, W2, N2) \ + TEST_VPADAL1(INSN, Q, T1, T2, W, N, W2, N2) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 64, 1); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, uint, 64, 1); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector2, int, 8, 8); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 8, 8); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector2, int, 8, 16); + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + DECL_VARIABLE(vector2, uint, 8, 16); + DECL_VARIABLE(vector2, uint, 16, 8); + DECL_VARIABLE(vector2, uint, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 1); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 1); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , int, s, 64, 1); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , uint, u, 64, 1); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Initialize input "vector2" from "buffer" */ + VLOAD(vector2, buffer, , int, s, 8, 8); + VLOAD(vector2, buffer, , int, s, 16, 4); + VLOAD(vector2, buffer, , int, s, 32, 2); + VLOAD(vector2, buffer, , uint, u, 8, 8); + VLOAD(vector2, buffer, , uint, u, 16, 4); + VLOAD(vector2, buffer, , uint, u, 32, 2); + VLOAD(vector2, buffer, q, int, s, 8, 16); + VLOAD(vector2, buffer, q, int, s, 16, 8); + VLOAD(vector2, buffer, q, int, s, 32, 4); + VLOAD(vector2, buffer, q, uint, u, 8, 16); + VLOAD(vector2, buffer, q, uint, u, 16, 8); + VLOAD(vector2, buffer, q, uint, u, 32, 4); + + /* Apply a unary operator named INSN_NAME */ + TEST_VPADAL(INSN_NAME, , int, s, 8, 8, 16, 4); + TEST_VPADAL(INSN_NAME, , int, s, 16, 4, 32, 2); + TEST_VPADAL(INSN_NAME, , int, s, 32, 2, 64 ,1); + TEST_VPADAL(INSN_NAME, , uint, u, 8, 8, 16, 4); + TEST_VPADAL(INSN_NAME, , uint, u, 16, 4, 32, 2); + TEST_VPADAL(INSN_NAME, , uint, u, 32, 2, 64, 1); + TEST_VPADAL(INSN_NAME, q, int, s, 8, 16, 16, 8); + TEST_VPADAL(INSN_NAME, q, int, s, 16, 8, 32, 4); + TEST_VPADAL(INSN_NAME, q, int, s, 32, 4, 64 ,2); + TEST_VPADAL(INSN_NAME, q, uint, u, 8, 16, 16, 8); + TEST_VPADAL(INSN_NAME, q, uint, u, 16, 8, 32, 4); + TEST_VPADAL(INSN_NAME, q, uint, u, 32, 4, 64, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vpadd.c b/ref_vpadd.c new file mode 100644 index 0000000..09f8f5c --- /dev/null +++ b/ref_vpadd.c @@ -0,0 +1,96 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vpadd +#define TEST_MSG "VPADD" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_VPADD1(INSN, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector, T1, W, N)); \ + vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VPADD(INSN, T1, T2, W, N) \ + TEST_VPADD1(INSN, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, float, 32, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 8, 8); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , float, f, 32, 2); + + /* Apply a binary operator named INSN_NAME */ + TEST_VPADD(INSN_NAME, int, s, 8, 8); + TEST_VPADD(INSN_NAME, int, s, 16, 4); + TEST_VPADD(INSN_NAME, int, s, 32, 2); + TEST_VPADD(INSN_NAME, uint, u, 8, 8); + TEST_VPADD(INSN_NAME, uint, u, 16, 4); + TEST_VPADD(INSN_NAME, uint, u, 32, 2); + TEST_VPADD(INSN_NAME, float, f, 32, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vpaddl.c b/ref_vpaddl.c new file mode 100644 index 0000000..e3a4c5b --- /dev/null +++ b/ref_vpaddl.c @@ -0,0 +1,113 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vpaddl +#define TEST_MSG "VPADDL/VPADDLQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_VPADDL1(INSN, Q, T1, T2, W, N, W2, N2) \ + VECT_VAR(vector_res, T1, W2, N2) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W2(VECT_VAR(result, T1, W2, N2), \ + VECT_VAR(vector_res, T1, W2, N2)) + +#define TEST_VPADDL(INSN, Q, T1, T2, W, N, W2, N2) \ + TEST_VPADDL1(INSN, Q, T1, T2, W, N, W2, N2) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, uint, 8, 16); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 1); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 1); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 8, 8); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, q, int, s, 8, 16); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 8, 16); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + + /* Apply a unary operator named INSN_NAME */ + TEST_VPADDL(INSN_NAME, , int, s, 8, 8, 16, 4); + TEST_VPADDL(INSN_NAME, , int, s, 16, 4, 32, 2); + TEST_VPADDL(INSN_NAME, , int, s, 32, 2, 64, 1); + TEST_VPADDL(INSN_NAME, , uint, u, 8, 8, 16, 4); + TEST_VPADDL(INSN_NAME, , uint, u, 16, 4, 32, 2); + TEST_VPADDL(INSN_NAME, , uint, u, 32, 2, 64, 1); + TEST_VPADDL(INSN_NAME, q, int, s, 8, 16, 16, 8); + TEST_VPADDL(INSN_NAME, q, int, s, 16, 8, 32, 4); + TEST_VPADDL(INSN_NAME, q, int, s, 32, 4, 64, 2); + TEST_VPADDL(INSN_NAME, q, uint, u, 8, 16, 16, 8); + TEST_VPADDL(INSN_NAME, q, uint, u, 16, 8, 32, 4); + TEST_VPADDL(INSN_NAME, q, uint, u, 32, 4, 64, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vpmax.c b/ref_vpmax.c new file mode 100644 index 0000000..bac8d20 --- /dev/null +++ b/ref_vpmax.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vpmax +#define TEST_MSG "VPMAX" + +#include "ref_vpadd.c" diff --git a/ref_vpmin.c b/ref_vpmin.c new file mode 100644 index 0000000..cf0a044 --- /dev/null +++ b/ref_vpmin.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vpmin +#define TEST_MSG "VPMIN" + +#include "ref_vpadd.c" diff --git a/ref_vqabs.c b/ref_vqabs.c new file mode 100644 index 0000000..e0ef53f --- /dev/null +++ b/ref_vqabs.c @@ -0,0 +1,73 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqabs +#define TEST_MSG "VQABS/VQABSQ" + +/* Extra tests for functions requiring corner cases tests */ +void vqabs_extra(void); +#define EXTRA_TESTS vqabs_extra + +#include "ref_v_unary_sat_op.c" + +void vqabs_extra() +{ + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + /* Initialize input "vector" with max negative values to check + saturation */ + VDUP(vector, , int, s, 8, 8, 0x80); + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector, q, int, s, 8, 16, 0x80); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqadd.c b/ref_vqadd.c new file mode 100644 index 0000000..6bf70bd --- /dev/null +++ b/ref_vqadd.c @@ -0,0 +1,157 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqadd +#define TEST_MSG "VQADD/VQADDQ" + +/* Extra tests for functions requiring types larger than 64 bits to + compute saturation */ +void vqadd_64(void); +#define EXTRA_TESTS vqadd_64 + +#include "ref_v_binary_sat_op.c" + +void vqadd_64(void) +{ + int i; + + DECL_VARIABLE_ALL_VARIANTS(vector1); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + /* Initialize input "vector1" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); + + VDUP(vector2, , int, s, 64, 1, 0x0); + VDUP(vector2, , uint, u, 64, 1, 0x0); + VDUP(vector2, q, int, s, 64, 2, 0x0); + VDUP(vector2, q, uint, u, 64, 2, 0x0); + + fprintf(ref_file, + "\n%s 64 bits saturation cumulative saturation output:\n", TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + fprintf(ref_file, "\n64 bits saturation:\n"); + DUMP(TEST_MSG, int, 64, 1, PRIx64); + DUMP(TEST_MSG, uint, 64, 1, PRIx64); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); + + /* Another set of tests */ + VDUP(vector2, , int, s, 64, 1, 0x44); + VDUP(vector2, , uint, u, 64, 1, 0x88); + VDUP(vector2, q, int, s, 64, 2, 0x44); + VDUP(vector2, q, uint, u, 64, 2, 0x88); + + fprintf(ref_file, + "\n%s 64 bits saturation cumulative saturation output:\n", TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + DUMP(TEST_MSG, int, 64, 1, PRIx64); + DUMP(TEST_MSG, uint, 64, 1, PRIx64); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); + + /* Another set of tests */ + VDUP(vector2, , int, s, 64, 1, 0x8000000000000003LL); + VDUP(vector2, , uint, u, 64, 1, 0x88); + + VDUP(vector1, q, int, s, 64, 2, 0x4000000000000000LL); + VDUP(vector2, q, int, s, 64, 2, 0x4000000000000000LL); + + VDUP(vector2, q, uint, u, 64, 2, 0x22); + + fprintf(ref_file, + "\n%s 64 bits saturation cumulative saturation output:\n", TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + DUMP(TEST_MSG, int, 64, 1, PRIx64); + DUMP(TEST_MSG, uint, 64, 1, PRIx64); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); + + /* To improve coverage, check saturation with less than 64 bits too */ + VDUP(vector2, , int, s, 8, 8, 0x81); + VDUP(vector2, , int, s, 16, 4, 0x8001); + VDUP(vector2, , int, s, 32, 2, 0x80000001); + VDUP(vector2, q, int, s, 8, 16, 0x81); + VDUP(vector2, q, int, s, 16, 8, 0x8001); + VDUP(vector2, q, int, s, 32, 4, 0x80000001); + + fprintf(ref_file, "\nless than 64 bits saturation:\n"); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4); + + DUMP(TEST_MSG, int, 8, 8, PRIx8); + DUMP(TEST_MSG, int, 16, 4, PRIx16); + DUMP(TEST_MSG, int, 32, 2, PRIx32); + DUMP(TEST_MSG, int, 8, 16, PRIx8); + DUMP(TEST_MSG, int, 16, 8, PRIx16); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + + VDUP(vector1, , uint, u, 8, 8, 0xF0); + VDUP(vector1, , uint, u, 16, 4, 0xFFF0); + VDUP(vector1, , uint, u, 32, 2, 0xFFFFFFF0); + VDUP(vector1, q, uint, u, 8, 16, 0xF0); + VDUP(vector1, q, uint, u, 16, 8, 0xFFF0); + VDUP(vector1, q, uint, u, 32, 4, 0xFFFFFFF0); + + VDUP(vector2, , uint, u, 8, 8, 0x20); + VDUP(vector2, , uint, u, 16, 4, 0x20); + VDUP(vector2, , uint, u, 32, 2, 0x20); + VDUP(vector2, q, uint, u, 8, 16, 0x20); + VDUP(vector2, q, uint, u, 16, 8, 0x20); + VDUP(vector2, q, uint, u, 32, 4, 0x20); + + fprintf(ref_file, + "\n%s less than 64 bits saturation cumulative saturation output:\n", + TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4); + + DUMP(TEST_MSG, uint, 8, 8, PRIx8); + DUMP(TEST_MSG, uint, 16, 4, PRIx16); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 8, 16, PRIx8); + DUMP(TEST_MSG, uint, 16, 8, PRIx16); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); +} diff --git a/ref_vqdmlal.c b/ref_vqdmlal.c new file mode 100644 index 0000000..59c3672 --- /dev/null +++ b/ref_vqdmlal.c @@ -0,0 +1,98 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vqdmlal +#define TEST_MSG "VQDMLAL" +#endif + + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = OP(vector, vector3, vector4), + then store the result. */ +#define TEST_VQDMLXL1(INSN, T1, T2, W, W2, N) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + VECT_VAR(vector4, T1, W2, N)); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W2), \ + xSTR(T1), W, N) + +#define TEST_VQDMLXL(INSN, T1, T2, W, W2, N) \ + TEST_VQDMLXL1(INSN, T1, T2, W, W2, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + + VDUP(vector3, , int, s, 16, 4, 0x55); + VDUP(vector4, , int, s, 16, 4, 0xBB); + VDUP(vector3, , int, s, 32, 2, 0x55); + VDUP(vector4, , int, s, 32, 2, 0xBB); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQDMLXL(INSN_NAME, int, s, 32, 16, 4); + TEST_VQDMLXL(INSN_NAME, int, s, 64, 32, 2); + dump_results_hex (TEST_MSG); + + + VDUP(vector3, , int, s, 16, 4, 0x8000); + VDUP(vector4, , int, s, 16, 4, 0x8000); + VDUP(vector3, , int, s, 32, 2, 0x80000000); + VDUP(vector4, , int, s, 32, 2, 0x80000000); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); + TEST_VQDMLXL(INSN_NAME, int, s, 32, 16, 4); + TEST_VQDMLXL(INSN_NAME, int, s, 64, 32, 2); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); +} diff --git a/ref_vqdmlal_lane.c b/ref_vqdmlal_lane.c new file mode 100644 index 0000000..e7d42f7 --- /dev/null +++ b/ref_vqdmlal_lane.c @@ -0,0 +1,105 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vqdmlal_lane +#define TEST_MSG "VQDMLAL_LANE" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = vqdmlxl_lane(vector, vector3, vector4, lane), + then store the result. */ +#define TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + VECT_VAR(vector4, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W2), \ + xSTR(T1), W, N) + +#define TEST_VQDMLXL_LANE(INSN, T1, T2, W, W2, N, V) \ + TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + + VDUP(vector3, , int, s, 16, 4, 0x55); + VDUP(vector4, , int, s, 16, 4, 0xBB); + VDUP(vector3, , int, s, 32, 2, 0x55); + VDUP(vector4, , int, s, 32, 2, 0xBB); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0); + dump_results_hex (TEST_MSG); + + VDUP(vector3, , int, s, 16, 4, 0); + VDUP(vector3, , int, s, 32, 2, 0); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (mul with input=0)"); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0); + dump_results_hex2 (TEST_MSG, " (mul with input=0)"); + + VDUP(vector3, , int, s, 16, 4, 0x8000); + VDUP(vector3, , int, s, 32, 2, 0x80000000); + VDUP(vector4, , int, s, 16, 4, 0x8000); + VDUP(vector4, , int, s, 32, 2, 0x80000000); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); +} diff --git a/ref_vqdmlal_n.c b/ref_vqdmlal_n.c new file mode 100644 index 0000000..86f1e30 --- /dev/null +++ b/ref_vqdmlal_n.c @@ -0,0 +1,92 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vqdmlal_n +#define TEST_MSG "VQDMLAL_N" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = vqdmlxl_n(vector, vector3, val), + then store the result. */ +#define TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W2), \ + xSTR(T1), W, N) + +#define TEST_VQDMLXL_N(INSN, T1, T2, W, W2, N, V) \ + TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + + VDUP(vector3, , int, s, 16, 4, 0x55); + VDUP(vector3, , int, s, 32, 2, 0x55); + + /* Choose val arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQDMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x22); + TEST_VQDMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x33); + + dump_results_hex (TEST_MSG); + + VDUP(vector3, , int, s, 16, 4, 0x8000); + VDUP(vector3, , int, s, 32, 2, 0x80000000); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); + TEST_VQDMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x8000); + TEST_VQDMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x80000000); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); +} diff --git a/ref_vqdmlsl.c b/ref_vqdmlsl.c new file mode 100644 index 0000000..ceb0b6b --- /dev/null +++ b/ref_vqdmlsl.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqdmlsl +#define TEST_MSG "VQDMLSL" + +#include "ref_vqdmlal.c" diff --git a/ref_vqdmlsl_lane.c b/ref_vqdmlsl_lane.c new file mode 100644 index 0000000..7724d98 --- /dev/null +++ b/ref_vqdmlsl_lane.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqdmlsl_lane +#define TEST_MSG "VQDMLSL_LANE" + +#include "ref_vqdmlal_lane.c" diff --git a/ref_vqdmlsl_n.c b/ref_vqdmlsl_n.c new file mode 100644 index 0000000..c6f8818 --- /dev/null +++ b/ref_vqdmlsl_n.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqdmlsl_n +#define TEST_MSG "VQDMLSL_N" + +#include "ref_vqdmlal_n.c" diff --git a/ref_vqdmulh.c b/ref_vqdmulh.c new file mode 100644 index 0000000..59ff820 --- /dev/null +++ b/ref_vqdmulh.c @@ -0,0 +1,115 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqdmulh +#define TEST_MSG "VQDMULH" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* vector_res = vqdmulh(vector,vector2,lane), then store the result. */ +#define TEST_VQDMULH2(INSN, Q, T1, T2, W, N) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQDMULH1(INSN, Q, T1, T2, W, N) \ + TEST_VQDMULH2(INSN, Q, T1, T2, W, N) + +#define TEST_VQDMULH(Q, T1, T2, W, N) \ + TEST_VQDMULH1(INSN, Q, T1, T2, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2 */ + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0xBB); + VDUP(vector2, q, int, s, 16, 8, 0x33); + VDUP(vector2, q, int, s, 32, 4, 0x22); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQDMULH(, int, s, 16, 4); + TEST_VQDMULH(, int, s, 32, 2); + TEST_VQDMULH(q, int, s, 16, 8); + TEST_VQDMULH(q, int, s, 32, 4); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector2, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector2, , int, s, 32, 2, 0x80000000); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector2, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + VDUP(vector2, q, int, s, 32, 4, 0x80000000); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQDMULH(, int, s, 16, 4); + TEST_VQDMULH(, int, s, 32, 2); + TEST_VQDMULH(q, int, s, 16, 8); + TEST_VQDMULH(q, int, s, 32, 4); + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqdmulh_lane.c b/ref_vqdmulh_lane.c new file mode 100644 index 0000000..ae52667 --- /dev/null +++ b/ref_vqdmulh_lane.c @@ -0,0 +1,116 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqdmulh +#define TEST_MSG "VQDMULH_LANE" +#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* vector_res = vqdmulh_lane(vector,vector2,lane), then store the result. */ +#define TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N2), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_lane_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L) \ + TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) + +#define TEST_VQDMULH_LANE(Q, T1, T2, W, N, N2, L) \ + TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + /* vector2: vqdmulh_lane and vqdmulhq_lane have a 2nd argument with + the same number of elements, so we need only one variable of each + type. */ + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2 */ + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0xBB); + + /* Choose lane arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 2); + TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1); + TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 3); + TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 0); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); + + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + VDUP(vector2, , int, s, 16, 4, 0x8000); + VDUP(vector2, , int, s, 32, 2, 0x80000000); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); + TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 3); + TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1); + TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 2); + TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 1); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); +} diff --git a/ref_vqdmulh_n.c b/ref_vqdmulh_n.c new file mode 100644 index 0000000..376259e --- /dev/null +++ b/ref_vqdmulh_n.c @@ -0,0 +1,110 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqdmulh +#define TEST_MSG "VQDMULH_N" +#define FNNAME1(NAME) void exec_ ## NAME ## _n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + int i; + + /* vector_res = vqdmulh_n(vector,val), then store the result. */ +#define TEST_VQDMULH_N2(INSN, Q, T1, T2, W, N, L) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQDMULH_N1(INSN, Q, T1, T2, W, N, L) \ + TEST_VQDMULH_N2(INSN, Q, T1, T2, W, N, L) + +#define TEST_VQDMULH_N(Q, T1, T2, W, N, L) \ + TEST_VQDMULH_N1(INSN, Q, T1, T2, W, N, L) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + + /* Initialize vector */ + VDUP(vector, , int, s, 16, 4, 0x1000); + VDUP(vector, , int, s, 32, 2, 0x100023); + VDUP(vector, q, int, s, 16, 8, 0x1000); + VDUP(vector, q, int, s, 32, 4, 0x100045); + + /* Choose multiplier arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQDMULH_N(, int, s, 16, 4, 0xCF); + TEST_VQDMULH_N(, int, s, 32, 2, 0x2344); + TEST_VQDMULH_N(q, int, s, 16, 8, 0x80); + TEST_VQDMULH_N(q, int, s, 32, 4, 0x5422); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + fprintf(gcc_tests_file, "\n%s output:\n", TEST_MSG); + DUMP(TEST_MSG, int, 16, 4, PRIx16); + DUMP(TEST_MSG, int, 32, 2, PRIx32); + DUMP(TEST_MSG, int, 16, 8, PRIx16); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + + + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); + TEST_VQDMULH_N(, int, s, 16, 4, 0x8000); + TEST_VQDMULH_N(, int, s, 32, 2, 0x80000000); + TEST_VQDMULH_N(q, int, s, 16, 8, 0x8000); + TEST_VQDMULH_N(q, int, s, 32, 4, 0x80000000); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); +} diff --git a/ref_vqdmull.c b/ref_vqdmull.c new file mode 100644 index 0000000..478181d --- /dev/null +++ b/ref_vqdmull.c @@ -0,0 +1,93 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqdmull +#define TEST_MSG "VQDMULL" +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqdmull(x,x), then store the result. */ +#define TEST_VQDMULL2(INSN, T1, T2, W, W2, N) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQDMULL1(INSN, T1, T2, W, W2, N) \ + TEST_VQDMULL2(INSN, T1, T2, W, W2, N) + +#define TEST_VQDMULL(T1, T2, W, W2, N) \ + TEST_VQDMULL1(INSN, T1, T2, W, W2, N) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector2, buffer, , int, s, 16, 4); + VLOAD(vector2, buffer, , int, s, 32, 2); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQDMULL(int, s, 16, 32, 4); + TEST_VQDMULL(int, s, 32, 64, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector2, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector2, , int, s, 32, 2, 0x80000000); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); + TEST_VQDMULL(int, s, 16, 32, 4); + TEST_VQDMULL(int, s, 32, 64, 2); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); +} diff --git a/ref_vqdmull_lane.c b/ref_vqdmull_lane.c new file mode 100644 index 0000000..bf92c6b --- /dev/null +++ b/ref_vqdmull_lane.c @@ -0,0 +1,109 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqdmull +#define TEST_MSG "VQDMULL_LANE" +#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + int i; + + /* vector_res = vqdmull_lane(vector,vector2,lane), then store the result. */ +#define TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_lane_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQDMULL_LANE1(INSN, T1, T2, W, W2, N, L) \ + TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L) + +#define TEST_VQDMULL_LANE(T1, T2, W, W2, N, L) \ + TEST_VQDMULL_LANE1(INSN, T1, T2, W, W2, N, L) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + /* Initialize vector */ + VDUP(vector, , int, s, 16, 4, 0x1000); + VDUP(vector, , int, s, 32, 2, 0x1000); + + /* Initialize vector2 */ + VDUP(vector2, , int, s, 16, 4, 0x4); + VDUP(vector2, , int, s, 32, 2, 0x2); + + /* Choose lane arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2); + TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1); + + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + fprintf(gcc_tests_file, "\n%s output:\n", TEST_MSG); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + + + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector2, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector2, , int, s, 32, 2, 0x80000000); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); + TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2); + TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1); + + fprintf (ref_file, "\n%s output:\n", + TEST_MSG " (check mul cumulative saturation)"); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + DUMP(TEST_MSG, int, 64, 2, PRIx64); +} diff --git a/ref_vqdmull_n.c b/ref_vqdmull_n.c new file mode 100644 index 0000000..7a482b2 --- /dev/null +++ b/ref_vqdmull_n.c @@ -0,0 +1,104 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqdmull +#define TEST_MSG "VQDMULL_N" +#define FNNAME1(NAME) void exec_ ## NAME ## _n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + int i; + + /* vector_res = vqdmull_n(vector,val), then store the result. */ +#define TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_n_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQDMULL_N1(INSN, T1, T2, W, W2, N, L) \ + TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L) + +#define TEST_VQDMULL_N(T1, T2, W, W2, N, L) \ + TEST_VQDMULL_N1(INSN, T1, T2, W, W2, N, L) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + /* Initialize vector */ + VDUP(vector, , int, s, 16, 4, 0x1000); + VDUP(vector, , int, s, 32, 2, 0x1000); + + /* Initialize vector2 */ + VDUP(vector2, , int, s, 16, 4, 0x4); + VDUP(vector2, , int, s, 32, 2, 0x2); + + /* Choose multiplier arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQDMULL_N(int, s, 16, 32, 4, 0x22); + TEST_VQDMULL_N(int, s, 32, 64, 2, 0x55); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + fprintf(gcc_tests_file, "\n%s output:\n", TEST_MSG); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); + TEST_VQDMULL_N(int, s, 16, 32, 4, 0x8000); + TEST_VQDMULL_N(int, s, 32, 64, 2, 0x80000000); + + fprintf(ref_file, "\n%s output:\n", + TEST_MSG " (check mul cumulative saturation)"); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + DUMP(TEST_MSG, int, 64, 2, PRIx64); +} diff --git a/ref_vqmovn.c b/ref_vqmovn.c new file mode 100644 index 0000000..0f7c933 --- /dev/null +++ b/ref_vqmovn.c @@ -0,0 +1,114 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vqmovn +#define TEST_MSG "VQMOVN" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W2, N)); \ + vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W2), \ + xSTR(T1), W, N) + +#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N) \ + TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + /* Fill input vector with arbitrary values */ + VDUP(vector, q, int, s, 16, 8, 0x12); + VDUP(vector, q, int, s, 32, 4, 0x1278); + VDUP(vector, q, int, s, 64, 2, 0x12345678); + VDUP(vector, q, uint, u, 16, 8, 0x82); + VDUP(vector, q, uint, u, 32, 4, 0x8765); + VDUP(vector, q, uint, u, 64, 2, 0x87654321); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8); + TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4); + TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2); + TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8); + TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4); + TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2); + + dump_results_hex (TEST_MSG); + + + /* Fill input vector with arbitrary values which cause an cumulative + saturation. */ + VDUP(vector, q, int, s, 16, 8, 0x1234); + VDUP(vector, q, int, s, 32, 4, 0x12345678); + VDUP(vector, q, int, s, 64, 2, 0x1234567890ABLL); + VDUP(vector, q, uint, u, 16, 8, 0x8234); + VDUP(vector, q, uint, u, 32, 4, 0x87654321); + VDUP(vector, q, uint, u, 64, 2, 0x8765432187654321ULL); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8); + TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4); + TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2); + TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8); + TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4); + TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqmovun.c b/ref_vqmovun.c new file mode 100644 index 0000000..5582cea --- /dev/null +++ b/ref_vqmovun.c @@ -0,0 +1,95 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vqmovun +#define TEST_MSG "VQMOVUN" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_s##W2(VECT_VAR(vector, int, W2, N)); \ + vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_s##W2), \ + xSTR(T1), W, N) + +#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N) \ + TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + /* Fill input vector with arbitrary values */ + VDUP(vector, q, int, s, 16, 8, 0x34); + VDUP(vector, q, int, s, 32, 4, 0x5678); + VDUP(vector, q, int, s, 64, 2, 0x12345678); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8); + TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4); + TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2); + + dump_results_hex (TEST_MSG); + + /* Fill input vector with negative values */ + VDUP(vector, q, int, s, 16, 8, 0x8234); + VDUP(vector, q, int, s, 32, 4, 0x87654321); + VDUP(vector, q, int, s, 64, 2, 0x8765432187654321LL); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (negative input)"); + TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8); + TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4); + TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2); + + dump_results_hex2 (TEST_MSG, " (negative input)"); +} diff --git a/ref_vqneg.c b/ref_vqneg.c new file mode 100644 index 0000000..49a685b --- /dev/null +++ b/ref_vqneg.c @@ -0,0 +1,73 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqneg +#define TEST_MSG "VQNEG/VQNEGQ" + +/* Extra tests for functions requiring corner cases tests */ +void vqneg_extra(void); +#define EXTRA_TESTS vqneg_extra + +#include "ref_v_unary_sat_op.c" + +void vqneg_extra() +{ + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + /* Initialize input "vector" with max negative values to check + saturation */ + VDUP(vector, , int, s, 8, 8, 0x80); + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector, q, int, s, 8, 16, 0x80); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqrdmulh.c b/ref_vqrdmulh.c new file mode 100644 index 0000000..f499b47 --- /dev/null +++ b/ref_vqrdmulh.c @@ -0,0 +1,135 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqrdmulh +#define TEST_MSG "VQRDMULH" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* vector_res = vqrdmulh(vector,vector2), then store the result. */ +#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRDMULH1(INSN, Q, T1, T2, W, N) \ + TEST_VQRDMULH2(INSN, Q, T1, T2, W, N) + +#define TEST_VQRDMULH(Q, T1, T2, W, N) \ + TEST_VQRDMULH1(INSN, Q, T1, T2, W, N) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2 */ + VDUP(vector2, , int, s, 16, 4, 0x5555); + VDUP(vector2, , int, s, 32, 2, 0xBB); + VDUP(vector2, q, int, s, 16, 8, 0x33); + VDUP(vector2, q, int, s, 32, 4, 0x22); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQRDMULH(, int, s, 16, 4); + TEST_VQRDMULH(, int, s, 32, 2); + TEST_VQRDMULH(q, int, s, 16, 8); + TEST_VQRDMULH(q, int, s, 32, 4); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + VDUP(vector2, , int, s, 16, 4, 0x8000); + VDUP(vector2, , int, s, 32, 2, 0x80000000); + VDUP(vector2, q, int, s, 16, 8, 0x8000); + VDUP(vector2, q, int, s, 32, 4, 0x80000000); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); + TEST_VQRDMULH(, int, s, 16, 4); + TEST_VQRDMULH(, int, s, 32, 2); + TEST_VQRDMULH(q, int, s, 16, 8); + TEST_VQRDMULH(q, int, s, 32, 4); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); + + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + VDUP(vector2, , int, s, 16, 4, 0x8001); + VDUP(vector2, , int, s, 32, 2, 0x80000001); + VDUP(vector2, q, int, s, 16, 8, 0x8001); + VDUP(vector2, q, int, s, 32, 4, 0x80000001); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check rounding cumulative saturation)"); + TEST_VQRDMULH(, int, s, 16, 4); + TEST_VQRDMULH(, int, s, 32, 2); + TEST_VQRDMULH(q, int, s, 16, 8); + TEST_VQRDMULH(q, int, s, 32, 4); + dump_results_hex2 (TEST_MSG, " (check rounding cumulative saturation)"); +} diff --git a/ref_vqrdmulh_lane.c b/ref_vqrdmulh_lane.c new file mode 100644 index 0000000..b2c37db --- /dev/null +++ b/ref_vqrdmulh_lane.c @@ -0,0 +1,134 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqrdmulh +#define TEST_MSG "VQRDMULH_LANE" + +#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* vector_res = vqrdmulh_lane(vector,vector2,lane), then store the result. */ +#define TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N2), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_lane_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L) \ + TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) + +#define TEST_VQRDMULH_LANE(Q, T1, T2, W, N, N2, L) \ + TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + /* vector2: vqrdmulh_lane and vqrdmulhq_lane have a 2nd argument with + the same number of elements, so we need only one variable of each + type. */ + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2 */ + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0xBB); + + /* Choose lane arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2); + TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1); + TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3); + TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); + + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + VDUP(vector2, , int, s, 16, 4, 0x8000); + VDUP(vector2, , int, s, 32, 2, 0x80000000); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); + TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2); + TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1); + TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3); + TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); + + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + VDUP(vector2, , int, s, 16, 4, 0x8001); + VDUP(vector2, , int, s, 32, 2, 0x80000001); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check rounding cumulative saturation)"); + TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2); + TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1); + TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3); + TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0); + dump_results_hex2 (TEST_MSG, " (check rounding cumulative saturation)"); +} diff --git a/ref_vqrdmulh_n.c b/ref_vqrdmulh_n.c new file mode 100644 index 0000000..01f0e0b --- /dev/null +++ b/ref_vqrdmulh_n.c @@ -0,0 +1,124 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqrdmulh +#define TEST_MSG "VQRDMULH_N" + +#define FNNAME1(NAME) void exec_ ## NAME ## _n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + int i; + + /* vector_res = vqrdmulh_n(vector,val), then store the result. */ +#define TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L) \ + TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L) + +#define TEST_VQRDMULH_N(Q, T1, T2, W, N, L) \ + TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Choose multiplier arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQRDMULH_N(, int, s, 16, 4, 0x2233); + TEST_VQRDMULH_N(, int, s, 32, 2, 0x12345678); + TEST_VQRDMULH_N(q, int, s, 16, 8, 0xCD12); + TEST_VQRDMULH_N(q, int, s, 32, 4, 0xFA23456); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + fprintf(gcc_tests_file, "\n%s output:\n", TEST_MSG); + DUMP(TEST_MSG, int, 16, 4, PRIx16); + DUMP(TEST_MSG, int, 32, 2, PRIx32); + DUMP(TEST_MSG, int, 16, 8, PRIx16); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check mul cumulative saturation)"); + TEST_VQRDMULH_N(, int, s, 16, 4, 0x8000); + TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000000); + TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8000); + TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000000); + dump_results_hex2 (TEST_MSG, " (check mul cumulative saturation)"); + + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check rounding cumulative saturation)"); + TEST_VQRDMULH_N(, int, s, 16, 4, 0x8001); + TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000001); + TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8001); + TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000001); + dump_results_hex2 (TEST_MSG, " (check rounding cumulative saturation)"); +} diff --git a/ref_vqrshl.c b/ref_vqrshl.c new file mode 100644 index 0000000..e4a33e5 --- /dev/null +++ b/ref_vqrshl.c @@ -0,0 +1,303 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqrshl +#define TEST_MSG "VQRSHL/VQRSHLQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: v3=vqrshl(v1,v2), then store the result. */ +#define TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector_shift, T3, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N) \ + TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N) + +#define TEST_VQRSHL(T3, Q, T1, T2, W, N) \ + TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); + + clean_results (); + + /* Fill input vector with 0, to check saturation on limits */ + VDUP(vector, , int, s, 8, 8, 0); + VDUP(vector, , int, s, 16, 4, 0); + VDUP(vector, , int, s, 32, 2, 0); + VDUP(vector, , int, s, 64, 1, 0); + VDUP(vector, , uint, u, 8, 8, 0); + VDUP(vector, , uint, u, 16, 4, 0); + VDUP(vector, , uint, u, 32, 2, 0); + VDUP(vector, , uint, u, 64, 1, 0); + VDUP(vector, q, int, s, 8, 16, 0); + VDUP(vector, q, int, s, 16, 8, 0); + VDUP(vector, q, int, s, 32, 4, 0); + VDUP(vector, q, int, s, 64, 2, 0); + VDUP(vector, q, uint, u, 8, 16, 0); + VDUP(vector, q, uint, u, 16, 8, 0); + VDUP(vector, q, uint, u, 32, 4, 0); + VDUP(vector, q, uint, u, 64, 2, 0); + + /* Choose init value arbitrarily, will be used as shift amount */ + /* Use values equal or one-less-than the type width to check + behaviour on limits */ + VDUP(vector_shift, , int, s, 8, 8, 7); + VDUP(vector_shift, , int, s, 16, 4, 15); + VDUP(vector_shift, , int, s, 32, 2, 31); + VDUP(vector_shift, , int, s, 64, 1, 63); + VDUP(vector_shift, q, int, s, 8, 16, 8); + VDUP(vector_shift, q, int, s, 16, 8, 16); + VDUP(vector_shift, q, int, s, 32, 4, 32); + VDUP(vector_shift, q, int, s, 64, 2, 64); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (with input = 0)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, " (with input = 0)"); + + /* Use negative shift amounts */ + VDUP(vector_shift, , int, s, 8, 8, -1); + VDUP(vector_shift, , int, s, 16, 4, -2); + VDUP(vector_shift, , int, s, 32, 2, -3); + VDUP(vector_shift, , int, s, 64, 1, -4); + VDUP(vector_shift, q, int, s, 8, 16, -7); + VDUP(vector_shift, q, int, s, 16, 8, -11); + VDUP(vector_shift, q, int, s, 32, 4, -13); + VDUP(vector_shift, q, int, s, 64, 2, -20); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (input 0 and negative shift amount)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, " (input 0 and negative shift amount)"); + + /* Test again, with predefined input values */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Choose init value arbitrarily, will be used as shift amount */ + VDUP(vector_shift, , int, s, 8, 8, 1); + VDUP(vector_shift, , int, s, 16, 4, 3); + VDUP(vector_shift, , int, s, 32, 2, 8); + VDUP(vector_shift, , int, s, 64, 1, 3); + VDUP(vector_shift, q, int, s, 8, 16, 10); + VDUP(vector_shift, q, int, s, 16, 8, 12); + VDUP(vector_shift, q, int, s, 32, 4, 31); + VDUP(vector_shift, q, int, s, 64, 2, 63); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex (TEST_MSG); + + /* Use negative shift amounts */ + VDUP(vector_shift, , int, s, 8, 8, -2); + VDUP(vector_shift, , int, s, 16, 4, -2); + VDUP(vector_shift, , int, s, 32, 2, -3); + VDUP(vector_shift, , int, s, 64, 1, -4); + VDUP(vector_shift, q, int, s, 8, 16, -7); + VDUP(vector_shift, q, int, s, 16, 8, -11); + VDUP(vector_shift, q, int, s, 32, 4, -13); + VDUP(vector_shift, q, int, s, 64, 2, -20); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (negative shift amount)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, " (negative shift amount)"); + + + /* Fill input vector with max value, to check saturation on limits */ + VDUP(vector, , int, s, 8, 8, 0x7F); + VDUP(vector, , int, s, 16, 4, 0x7FFF); + VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, , uint, u, 8, 8, 0xFF); + VDUP(vector, , uint, u, 16, 4, 0xFFFF); + VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + VDUP(vector, q, int, s, 8, 16, 0x7F); + VDUP(vector, q, int, s, 16, 8, 0x7FFF); + VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, q, uint, u, 8, 16, 0xFF); + VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* Use -1 shift amount to check cumulative saturation with round_const */ + VDUP(vector_shift, , int, s, 8, 8, -1); + VDUP(vector_shift, , int, s, 16, 4, -1); + VDUP(vector_shift, , int, s, 32, 2, -1); + VDUP(vector_shift, , int, s, 64, 1, -1); + VDUP(vector_shift, q, int, s, 8, 16, -1); + VDUP(vector_shift, q, int, s, 16, 8, -1); + VDUP(vector_shift, q, int, s, 32, 4, -1); + VDUP(vector_shift, q, int, s, 64, 2, -1); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (checking cumulative saturation: shift by -1)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, + " (checking cumulative saturation: shift by -1)"); + + + /* Use -3 shift amount to check cumulative saturation with round_const */ + VDUP(vector_shift, , int, s, 8, 8, -3); + VDUP(vector_shift, , int, s, 16, 4, -3); + VDUP(vector_shift, , int, s, 32, 2, -3); + VDUP(vector_shift, , int, s, 64, 1, -3); + VDUP(vector_shift, q, int, s, 8, 16, -3); + VDUP(vector_shift, q, int, s, 16, 8, -3); + VDUP(vector_shift, q, int, s, 32, 4, -3); + VDUP(vector_shift, q, int, s, 64, 2, -3); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (checking cumulative saturation: shift by -3)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, + " (checking cumulative saturation: shift by -3)"); + + + /* Use large shift amount */ + VDUP(vector_shift, , int, s, 8, 8, 10); + VDUP(vector_shift, , int, s, 16, 4, 20); + VDUP(vector_shift, , int, s, 32, 2, 40); + VDUP(vector_shift, , int, s, 64, 1, 70); + VDUP(vector_shift, q, int, s, 8, 16, 10); + VDUP(vector_shift, q, int, s, 16, 8, 20); + VDUP(vector_shift, q, int, s, 32, 4, 40); + VDUP(vector_shift, q, int, s, 64, 2, 70); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (checking cumulative saturation: large shift amount)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, + " (checking cumulative saturation: large shift amount)"); + + + /* Fill input vector with negative values, to check saturation on limits */ + VDUP(vector, , int, s, 8, 8, 0x80); + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector, , int, s, 64, 1, 0x8000000000000000LL); + VDUP(vector, q, int, s, 8, 16, 0x80); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); + + /* Use large shift amount */ + VDUP(vector_shift, , int, s, 8, 8, 10); + VDUP(vector_shift, , int, s, 16, 4, 20); + VDUP(vector_shift, , int, s, 32, 2, 40); + VDUP(vector_shift, , int, s, 64, 1, 70); + VDUP(vector_shift, q, int, s, 8, 16, 10); + VDUP(vector_shift, q, int, s, 16, 8, 20); + VDUP(vector_shift, q, int, s, 32, 4, 40); + VDUP(vector_shift, q, int, s, 64, 2, 70); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (checking cumulative saturation: large shift amount with negative input)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, + " (checking cumulative saturation: large shift amount with negative input)"); + + + /* Fill input vector with negative and positive values, to check + * saturation on limits */ + VDUP(vector, , int, s, 8, 8, 0x7F); + VDUP(vector, , int, s, 16, 4, 0x7FFF); + VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, q, int, s, 8, 16, 0x80); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); + + /* Use large negative shift amount */ + VDUP(vector_shift, , int, s, 8, 8, -10); + VDUP(vector_shift, , int, s, 16, 4, -20); + VDUP(vector_shift, , int, s, 32, 2, -40); + VDUP(vector_shift, , int, s, 64, 1, -70); + VDUP(vector_shift, q, int, s, 8, 16, -10); + VDUP(vector_shift, q, int, s, 16, 8, -20); + VDUP(vector_shift, q, int, s, 32, 4, -40); + VDUP(vector_shift, q, int, s, 64, 2, -70); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (checking cumulative saturation: large negative shift amount)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, + " (checking cumulative saturation: large negative shift amount)"); + + + /* Fill input vector with 0, to check saturation in case of large + * shift amount */ + VDUP(vector, , int, s, 8, 8, 0); + VDUP(vector, , int, s, 16, 4, 0); + VDUP(vector, , int, s, 32, 2, 0); + VDUP(vector, , int, s, 64, 1, 0); + VDUP(vector, q, int, s, 8, 16, 0); + VDUP(vector, q, int, s, 16, 8, 0); + VDUP(vector, q, int, s, 32, 4, 0); + VDUP(vector, q, int, s, 64, 2, 0); + + /* Use large shift amount */ + VDUP(vector_shift, , int, s, 8, 8, -10); + VDUP(vector_shift, , int, s, 16, 4, -20); + VDUP(vector_shift, , int, s, 32, 2, -40); + VDUP(vector_shift, , int, s, 64, 1, -70); + VDUP(vector_shift, q, int, s, 8, 16, -10); + VDUP(vector_shift, q, int, s, 16, 8, -20); + VDUP(vector_shift, q, int, s, 32, 4, -40); + VDUP(vector_shift, q, int, s, 64, 2, -70); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (checking cumulative saturation: large shift amount with 0 input)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, + " (checking cumulative saturation: large shift amount with 0 input)"); +} diff --git a/ref_vqrshrn_n.c b/ref_vqrshrn_n.c new file mode 100644 index 0000000..34bf082 --- /dev/null +++ b/ref_vqrshrn_n.c @@ -0,0 +1,134 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqrshrn_n +#define TEST_MSG "VQRSHRN_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqrshrn_n(x,v), then store the result. */ +#define TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V) \ + TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V) + +#define TEST_VQRSHRN_N(T1, T2, W, W2, N, V) \ + TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* vector is twice as large as vector_res */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Choose shift amount arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQRSHRN_N(int, s, 16, 8, 8, 1); + TEST_VQRSHRN_N(int, s, 32, 16, 4, 1); + TEST_VQRSHRN_N(int, s, 64, 32, 2, 2); + TEST_VQRSHRN_N(uint, u, 16, 8, 8, 2); + TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3); + TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + /* Another set of tests */ + VDUP(vector, q, int, s, 16, 8, 0x7FFF); + VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* shift by 3 to exercise saturation code in the lib */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check saturation: shift by 3)"); + TEST_VQRSHRN_N(int, s, 16, 8, 8, 3); + TEST_VQRSHRN_N(int, s, 32, 16, 4, 3); + TEST_VQRSHRN_N(int, s, 64, 32, 2, 3); + TEST_VQRSHRN_N(uint, u, 16, 8, 8, 3); + TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3); + TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (check saturation: shift by 3)"); + + + /* shift by max to exercise saturation code in the lib */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check saturation: shift by max)"); + TEST_VQRSHRN_N(int, s, 16, 8, 8, 8); + TEST_VQRSHRN_N(int, s, 32, 16, 4, 16); + TEST_VQRSHRN_N(int, s, 64, 32, 2, 32); + TEST_VQRSHRN_N(uint, u, 16, 8, 8, 8); + TEST_VQRSHRN_N(uint, u, 32, 16, 4, 16); + TEST_VQRSHRN_N(uint, u, 64, 32, 2, 32); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (check saturation: shift by max)"); +} diff --git a/ref_vqrshrun_n.c b/ref_vqrshrun_n.c new file mode 100644 index 0000000..53d11f5 --- /dev/null +++ b/ref_vqrshrun_n.c @@ -0,0 +1,142 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqrshrun_n +#define TEST_MSG "VQRSHRUN_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqrshrun_n(x,v), then store the result. */ +#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N)); \ + VECT_VAR(vector_res, uint, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_u##W2(VECT_VAR(result, uint, W2, N), \ + VECT_VAR(vector_res, uint, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V) \ + TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V) + +#define TEST_VQRSHRUN_N(T1, T2, W, W2, N, V) \ + TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* vector is twice as large as vector_res */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + /* Fill input vector with negative values, to check saturation on limits */ + VDUP(vector, q, int, s, 16, 8, -2); + VDUP(vector, q, int, s, 32, 4, -3); + VDUP(vector, q, int, s, 64, 2, -4); + + /* Choose shift amount arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (negative input)"); + TEST_VQRSHRUN_N(int, s, 16, 8, 8, 3); + TEST_VQRSHRUN_N(int, s, 32, 16, 4, 4); + TEST_VQRSHRUN_N(int, s, 64, 32, 2, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (negative input)"); + + /* Fill input vector with max value, to check saturation on limits */ + VDUP(vector, q, int, s, 16, 8, 0x7FFF); + VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + + /* shift by 1 */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation: shift by 1)"); + TEST_VQRSHRUN_N(int, s, 16, 8, 8, 1); + TEST_VQRSHRUN_N(int, s, 32, 16, 4, 1); + TEST_VQRSHRUN_N(int, s, 64, 32, 2, 1); + + dump_results_hex2 (TEST_MSG, " (check cumulative saturation: shift by 1)"); + + /* shift by max */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation: shift by max, positive input)"); + TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8); + TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16); + TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32); + + dump_results_hex2 (TEST_MSG, + " (check cumulative saturation: shift by max, positive input)"); + + + /* Fill input vector with min value, to check saturation on limits */ + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); + + /* shift by max */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation: shift by max, negative input)"); + TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8); + TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16); + TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32); + + dump_results_hex2 (TEST_MSG, + " (check cumulative saturation: shift by max, negative input)"); + + /* Fill input vector with positive values, to check normal case */ + VDUP(vector, q, int, s, 16, 8, 0x1234); + VDUP(vector, q, int, s, 32, 4, 0x87654321); + VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF); + + /* shift arbitrary amount */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQRSHRUN_N(int, s, 16, 8, 8, 6); + TEST_VQRSHRUN_N(int, s, 32, 16, 4, 7); + TEST_VQRSHRUN_N(int, s, 64, 32, 2, 8); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqshl.c b/ref_vqshl.c new file mode 100644 index 0000000..a9d29d7 --- /dev/null +++ b/ref_vqshl.c @@ -0,0 +1,241 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqshl +#define TEST_MSG "VQSHL/VQSHLQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: v3=vqshl(v1,v2), then store the result. */ +#define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector_shift, T3, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N) \ + TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N) + +#define TEST_VQSHL(T3, Q, T1, T2, W, N) \ + TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); + + clean_results (); + + /* Fill input vector with 0, to check saturation on limits */ + VDUP(vector, , int, s, 8, 8, 0); + VDUP(vector, , int, s, 16, 4, 0); + VDUP(vector, , int, s, 32, 2, 0); + VDUP(vector, , int, s, 64, 1, 0); + VDUP(vector, , uint, u, 8, 8, 0); + VDUP(vector, , uint, u, 16, 4, 0); + VDUP(vector, , uint, u, 32, 2, 0); + VDUP(vector, , uint, u, 64, 1, 0); + VDUP(vector, q, int, s, 8, 16, 0); + VDUP(vector, q, int, s, 16, 8, 0); + VDUP(vector, q, int, s, 32, 4, 0); + VDUP(vector, q, int, s, 64, 2, 0); + VDUP(vector, q, uint, u, 8, 16, 0); + VDUP(vector, q, uint, u, 16, 8, 0); + VDUP(vector, q, uint, u, 32, 4, 0); + VDUP(vector, q, uint, u, 64, 2, 0); + + /* Choose init value arbitrarily, will be used as shift amount */ + /* Use values equal or one-less-than the type width to check + behaviour on limits */ + /* Shift all lanes by 7 ... */ + VDUP(vector_shift, , int, s, 8, 8, 7); + /* except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9) */ + TEST_VSET_LANE(vector_shift, , int, s, 8, 8, 0, 6); + TEST_VSET_LANE(vector_shift, , int, s, 8, 8, 1, 8); + TEST_VSET_LANE(vector_shift, , int, s, 8, 8, 2, 9); + + VDUP(vector_shift, , int, s, 16, 4, 15); + TEST_VSET_LANE(vector_shift, , int, s, 16, 4, 0, 14); + TEST_VSET_LANE(vector_shift, , int, s, 16, 4, 1, 16); + TEST_VSET_LANE(vector_shift, , int, s, 16, 4, 2, 17); + + VDUP(vector_shift, , int, s, 32, 2, 31); + TEST_VSET_LANE(vector_shift, , int, s, 32, 2, 1, 30); + + VDUP(vector_shift, , int, s, 64, 1, 63); + + VDUP(vector_shift, q, int, s, 8, 16, 8); + VDUP(vector_shift, q, int, s, 16, 8, 16); + VDUP(vector_shift, q, int, s, 32, 4, 32); + TEST_VSET_LANE(vector_shift, q, int, s, 32, 4, 1, 33); + + VDUP(vector_shift, q, int, s, 64, 2, 64); + TEST_VSET_LANE(vector_shift, q, int, s, 64, 2, 1, 62); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (with input = 0)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (with input = 0)"); + + + /* Use negative shift amounts */ + VDUP(vector_shift, , int, s, 8, 8, -1); + VDUP(vector_shift, , int, s, 16, 4, -2); + VDUP(vector_shift, , int, s, 32, 2, -3); + VDUP(vector_shift, , int, s, 64, 1, -4); + VDUP(vector_shift, q, int, s, 8, 16, -7); + VDUP(vector_shift, q, int, s, 16, 8, -11); + VDUP(vector_shift, q, int, s, 32, 4, -13); + VDUP(vector_shift, q, int, s, 64, 2, -20); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (input 0 and negative shift amount)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (input 0 and negative shift amount)"); + + /* Test again, with predefined input values */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Choose init value arbitrarily, will be used as shift amount */ + VDUP(vector_shift, , int, s, 8, 8, 1); + VDUP(vector_shift, , int, s, 16, 4, 3); + VDUP(vector_shift, , int, s, 32, 2, 8); + VDUP(vector_shift, , int, s, 64, 1, -3); + VDUP(vector_shift, q, int, s, 8, 16, 10); + VDUP(vector_shift, q, int, s, 16, 8, 12); + VDUP(vector_shift, q, int, s, 32, 4, 32); + VDUP(vector_shift, q, int, s, 64, 2, 63); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex (TEST_MSG); + + /* Use negative shift amounts */ + VDUP(vector_shift, , int, s, 8, 8, -1); + VDUP(vector_shift, , int, s, 16, 4, -2); + VDUP(vector_shift, , int, s, 32, 2, -3); + VDUP(vector_shift, , int, s, 64, 1, -4); + VDUP(vector_shift, q, int, s, 8, 16, -7); + VDUP(vector_shift, q, int, s, 16, 8, -11); + VDUP(vector_shift, q, int, s, 32, 4, -13); + VDUP(vector_shift, q, int, s, 64, 2, -20); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (negative shift amount)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (negative shift amount)"); + + /* Use large shift amounts */ + VDUP(vector_shift, , int, s, 8, 8, 8); + VDUP(vector_shift, , int, s, 16, 4, 16); + VDUP(vector_shift, , int, s, 32, 2, 32); + VDUP(vector_shift, , int, s, 64, 1, 64); + VDUP(vector_shift, q, int, s, 8, 16, 8); + VDUP(vector_shift, q, int, s, 16, 8, 16); + VDUP(vector_shift, q, int, s, 32, 4, 32); + VDUP(vector_shift, q, int, s, 64, 2, 64); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (large shift amount, negative input)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (large shift amount, negative input)"); + + /* Fill input vector with max value, to check saturation on limits */ + VDUP(vector, , int, s, 8, 8, 0x7F); + VDUP(vector, , int, s, 16, 4, 0x7FFF); + VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, , uint, u, 8, 8, 0xFF); + VDUP(vector, , uint, u, 16, 4, 0xFFFF); + VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + VDUP(vector, q, int, s, 8, 16, 0x7F); + VDUP(vector, q, int, s, 16, 8, 0x7FFF); + VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, q, uint, u, 8, 16, 0xFF); + VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* Shift by -1 */ + VDUP(vector_shift, , int, s, 8, 8, -1); + VDUP(vector_shift, , int, s, 16, 4, -1); + VDUP(vector_shift, , int, s, 32, 2, -1); + VDUP(vector_shift, , int, s, 64, 1, -1); + VDUP(vector_shift, q, int, s, 8, 16, -1); + VDUP(vector_shift, q, int, s, 16, 8, -1); + VDUP(vector_shift, q, int, s, 32, 4, -1); + VDUP(vector_shift, q, int, s, 64, 2, -1); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (check cumulative saturation)"); + + /* Use large shift amounts */ + VDUP(vector_shift, , int, s, 8, 8, 8); + VDUP(vector_shift, , int, s, 16, 4, 16); + VDUP(vector_shift, , int, s, 32, 2, 32); + VDUP(vector_shift, , int, s, 64, 1, 64); + VDUP(vector_shift, q, int, s, 8, 16, 8); + VDUP(vector_shift, q, int, s, 16, 8, 16); + VDUP(vector_shift, q, int, s, 32, 4, 32); + VDUP(vector_shift, q, int, s, 64, 2, 64); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (large shift amount, positive input)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (large shift amount, positive input)"); + + /* Check 64 bits saturation */ + VDUP(vector, , int, s, 64, 1, -10); + VDUP(vector_shift, , int, s, 64, 1, 64); + VDUP(vector, q, int, s, 64, 2, 10); + VDUP(vector_shift, q, int, s, 64, 2, 64); + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check saturation on 64 bits)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (check saturation on 64 bits)"); +} diff --git a/ref_vqshl_n.c b/ref_vqshl_n.c new file mode 100644 index 0000000..3ee26b5 --- /dev/null +++ b/ref_vqshl_n.c @@ -0,0 +1,132 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqshl +#define TEST_MSG "VQSHL_N/VQSHLQ_N" + +#define FNNAME1(NAME) void exec_ ## NAME ##_n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: v2=vqshl_n(v1,v), then store the result. */ +#define TEST_VQSHL_N2(INSN, Q, T1, T2, W, N, V) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N) \ + TEST_VQSHL_N2(INSN, T3, Q, T1, T2, W, N) + +#define TEST_VQSHL_N(T3, Q, T1, T2, W, N) \ + TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Choose shift amount arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQSHL_N(, int, s, 8, 8, 2); + TEST_VQSHL_N(, int, s, 16, 4, 1); + TEST_VQSHL_N(, int, s, 32, 2, 1); + TEST_VQSHL_N(, int, s, 64, 1, 2); + TEST_VQSHL_N(, uint, u, 8, 8, 3); + TEST_VQSHL_N(, uint, u, 16, 4, 2); + TEST_VQSHL_N(, uint, u, 32, 2, 3); + TEST_VQSHL_N(, uint, u, 64, 1, 3); + + TEST_VQSHL_N(q, int, s, 8, 16, 2); + TEST_VQSHL_N(q, int, s, 16, 8, 1); + TEST_VQSHL_N(q, int, s, 32, 4, 1); + TEST_VQSHL_N(q, int, s, 64, 2, 2); + TEST_VQSHL_N(q, uint, u, 8, 16, 3); + TEST_VQSHL_N(q, uint, u, 16, 8, 2); + TEST_VQSHL_N(q, uint, u, 32, 4, 3); + TEST_VQSHL_N(q, uint, u, 64, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + + /* Fill input vector with max value, to check saturation on limits */ + VDUP(vector, , int, s, 8, 8, 0x7F); + VDUP(vector, , int, s, 16, 4, 0x7FFF); + VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, , uint, u, 8, 8, 0xFF); + VDUP(vector, , uint, u, 16, 4, 0xFFFF); + VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + VDUP(vector, q, int, s, 8, 16, 0x7F); + VDUP(vector, q, int, s, 16, 8, 0x7FFF); + VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, q, uint, u, 8, 16, 0xFF); + VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check saturation with large positive input)"); + TEST_VQSHL_N(, int, s, 8, 8, 2); + TEST_VQSHL_N(, int, s, 16, 4, 1); + TEST_VQSHL_N(, int, s, 32, 2, 1); + TEST_VQSHL_N(, int, s, 64, 1, 2); + TEST_VQSHL_N(, uint, u, 8, 8, 3); + TEST_VQSHL_N(, uint, u, 16, 4, 2); + TEST_VQSHL_N(, uint, u, 32, 2, 3); + TEST_VQSHL_N(, uint, u, 64, 1, 3); + + TEST_VQSHL_N(q, int, s, 8, 16, 2); + TEST_VQSHL_N(q, int, s, 16, 8, 1); + TEST_VQSHL_N(q, int, s, 32, 4, 1); + TEST_VQSHL_N(q, int, s, 64, 2, 2); + TEST_VQSHL_N(q, uint, u, 8, 16, 3); + TEST_VQSHL_N(q, uint, u, 16, 8, 2); + TEST_VQSHL_N(q, uint, u, 32, 4, 3); + TEST_VQSHL_N(q, uint, u, 64, 2, 3); + + dump_results_hex2 (TEST_MSG, " (check saturation with large positive input)"); +} diff --git a/ref_vqshlu_n.c b/ref_vqshlu_n.c new file mode 100644 index 0000000..27d53de --- /dev/null +++ b/ref_vqshlu_n.c @@ -0,0 +1,157 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqshlu +#define TEST_MSG "VQSHLU_N/VQSHLUQ_N" + +#define FNNAME1(NAME) void exec_ ## NAME ## _n(void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: v2=vqshlu_n(v1,v), then store the result. */ +#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T3, W, N)); \ + VECT_VAR(vector_res, T3, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N), \ + VECT_VAR(vector_res, T3, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V) \ + TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V) + +#define TEST_VQSHLU_N(Q, T1, T2, T3, T4, W, N, V) \ + TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Fill input vector with negative values, to check saturation on limits */ + VDUP(vector, , int, s, 8, 8, -1); + VDUP(vector, , int, s, 16, 4, -2); + VDUP(vector, , int, s, 32, 2, -3); + VDUP(vector, , int, s, 64, 1, -4); + VDUP(vector, q, int, s, 8, 16, -1); + VDUP(vector, q, int, s, 16, 8, -2); + VDUP(vector, q, int, s, 32, 4, -3); + VDUP(vector, q, int, s, 64, 2, -4); + + /* Choose shift amount arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (negative input)"); + TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2); + TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1); + TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1); + TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2); + + TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2); + TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1); + TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1); + TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (negative input)"); + + /* Fill input vector with max value, to check saturation on limits */ + VDUP(vector, , int, s, 8, 8, 0x7F); + VDUP(vector, , int, s, 16, 4, 0x7FFF); + VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, q, int, s, 8, 16, 0x7F); + VDUP(vector, q, int, s, 16, 8, 0x7FFF); + VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFULL); + + /* shift by 1 */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation: shift by 1)"); + TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1); + TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1); + TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1); + TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 1); + + TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 1); + TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1); + TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1); + TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 1); + + dump_results_hex2 (TEST_MSG, " (check cumulative saturation: shift by 1)"); + + /* shift by 2 to force saturation */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation: shift by 2)"); + TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2); + TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2); + TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 2); + TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2); + + TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2); + TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 2); + TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 2); + TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2); + + dump_results_hex2 (TEST_MSG, " (check cumulative saturation: shift by 2)"); + + /* Fill input vector with positive values, to check normal case */ + VDUP(vector, , int, s, 8, 8, 1); + VDUP(vector, , int, s, 16, 4, 2); + VDUP(vector, , int, s, 32, 2, 3); + VDUP(vector, , int, s, 64, 1, 4); + VDUP(vector, q, int, s, 8, 16, 5); + VDUP(vector, q, int, s, 16, 8, 6); + VDUP(vector, q, int, s, 32, 4, 7); + VDUP(vector, q, int, s, 64, 2, 8); + + /* shift arbitrary amount */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1); + TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2); + TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 3); + TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 4); + + TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 5); + TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 6); + TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 7); + TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 8); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqshrn_n.c b/ref_vqshrn_n.c new file mode 100644 index 0000000..96b8d61 --- /dev/null +++ b/ref_vqshrn_n.c @@ -0,0 +1,135 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqshrn_n +#define TEST_MSG "VQSHRN_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqshrn_n(x,v), then store the result. */ +#define TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V) \ + TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V) + +#define TEST_VQSHRN_N(T1, T2, W, W2, N, V) \ + TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* vector is twice as large as vector_res */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Choose shift amount arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQSHRN_N(int, s, 16, 8, 8, 1); + TEST_VQSHRN_N(int, s, 32, 16, 4, 1); + TEST_VQSHRN_N(int, s, 64, 32, 2, 2); + TEST_VQSHRN_N(uint, u, 16, 8, 8, 2); + TEST_VQSHRN_N(uint, u, 32, 16, 4, 3); + TEST_VQSHRN_N(uint, u, 64, 32, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + + /* Another set of tests */ + VDUP(vector, q, int, s, 16, 8, 0x7FFF); + VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* shift by 3 to exercise saturation code in the lib */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check saturation: shift by 3)"); + TEST_VQSHRN_N(int, s, 16, 8, 8, 3); + TEST_VQSHRN_N(int, s, 32, 16, 4, 3); + TEST_VQSHRN_N(int, s, 64, 32, 2, 3); + TEST_VQSHRN_N(uint, u, 16, 8, 8, 3); + TEST_VQSHRN_N(uint, u, 32, 16, 4, 3); + TEST_VQSHRN_N(uint, u, 64, 32, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (check saturation: shift by 3)"); + + + /* shift by max to exercise saturation code in the lib */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check saturation: shift by max)"); + TEST_VQSHRN_N(int, s, 16, 8, 8, 8); + TEST_VQSHRN_N(int, s, 32, 16, 4, 16); + TEST_VQSHRN_N(int, s, 64, 32, 2, 32); + TEST_VQSHRN_N(uint, u, 16, 8, 8, 8); + TEST_VQSHRN_N(uint, u, 32, 16, 4, 16); + TEST_VQSHRN_N(uint, u, 64, 32, 2, 32); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (check saturation: shift by max)"); +} diff --git a/ref_vqshrun_n.c b/ref_vqshrun_n.c new file mode 100644 index 0000000..871da96 --- /dev/null +++ b/ref_vqshrun_n.c @@ -0,0 +1,116 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqshrun_n +#define TEST_MSG "VQSHRUN_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqshrun_n(x,v), then store the result. */ +#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N)); \ + VECT_VAR(vector_res, uint, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_u##W2(VECT_VAR(result, uint, W2, N), \ + VECT_VAR(vector_res, uint, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ + xSTR(T1), W, N) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V) \ + TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V) + +#define TEST_VQSHRUN_N(T1, T2, W, W2, N, V) \ + TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* vector is twice as large as vector_res */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + /* Fill input vector with negative values, to check saturation on limits */ + VDUP(vector, q, int, s, 16, 8, -2); + VDUP(vector, q, int, s, 32, 4, -3); + VDUP(vector, q, int, s, 64, 2, -4); + + /* Choose shift amount arbitrarily */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (negative input)"); + TEST_VQSHRUN_N(int, s, 16, 8, 8, 3); + TEST_VQSHRUN_N(int, s, 32, 16, 4, 4); + TEST_VQSHRUN_N(int, s, 64, 32, 2, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (negative input)"); + + /* Fill input vector with max value, to check saturation on limits */ + VDUP(vector, q, int, s, 16, 8, 0x7FFF); + VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + + /* shift by 1 */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", + TEST_MSG " (check cumulative saturation)"); + TEST_VQSHRUN_N(int, s, 16, 8, 8, 1); + TEST_VQSHRUN_N(int, s, 32, 16, 4, 1); + TEST_VQSHRUN_N(int, s, 64, 32, 2, 1); + + dump_results_hex2 (TEST_MSG, " (check cumulative saturation)"); + + /* Fill input vector with positive values, to check normal case */ + VDUP(vector, q, int, s, 16, 8, 0x1234); + VDUP(vector, q, int, s, 32, 4, 0x87654321); + VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF); + + /* shift arbitrary amount */ + fprintf(ref_file, "\n%s cumulative saturation output:\n", TEST_MSG); + TEST_VQSHRUN_N(int, s, 16, 8, 8, 6); + TEST_VQSHRUN_N(int, s, 32, 16, 4, 7); + TEST_VQSHRUN_N(int, s, 64, 32, 2, 8); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqsub.c b/ref_vqsub.c new file mode 100644 index 0000000..dc44505 --- /dev/null +++ b/ref_vqsub.c @@ -0,0 +1,163 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqsub +#define TEST_MSG "VQSUB/VQSUBQ" + +/* Extra tests for functions requiring types larger than 64 bits to + compute saturation */ +void vqsub_64(void); +#define EXTRA_TESTS vqsub_64 + +#include "ref_v_binary_sat_op.c" + +void vqsub_64(void) +{ + int i; + + DECL_VARIABLE_ALL_VARIANTS(vector1); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + /* Initialize input "vector1" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); + + VDUP(vector2, , int, s, 64, 1, 0x0); + VDUP(vector2, , uint, u, 64, 1, 0x0); + VDUP(vector2, q, int, s, 64, 2, 0x0); + VDUP(vector2, q, uint, u, 64, 2, 0x0); + + fprintf(ref_file, + "\n%s 64 bits saturation cumulative saturation output:\n", + TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + fprintf(ref_file, "\n64 bits saturation:\n"); + DUMP(TEST_MSG, int, 64, 1, PRIx64); + DUMP(TEST_MSG, uint, 64, 1, PRIx64); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); + + /* Another set of tests */ + VDUP(vector2, , int, s, 64, 1, 0x44); + VDUP(vector2, , uint, u, 64, 1, 0x88); + VDUP(vector2, q, int, s, 64, 2, 0x44); + VDUP(vector2, q, uint, u, 64, 2, 0x88); + + fprintf(ref_file, + "\n%s 64 bits saturation cumulative saturation output:\n", + TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + DUMP(TEST_MSG, int, 64, 1, PRIx64); + DUMP(TEST_MSG, uint, 64, 1, PRIx64); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); + + /* Another set of tests */ + VDUP(vector2, , int, s, 64, 1, 0x7fffffffffffffffLL); + VDUP(vector2, , uint, u, 64, 1, 0xffffffffffffffffULL); + + /* To check positive saturation, we need to write a positive value + in vector1 */ + VDUP(vector1, q, int, s, 64, 2, 0x3fffffffffffffffLL); + VDUP(vector2, q, int, s, 64, 2, 0x8000000000000000LL); + + VDUP(vector2, q, uint, u, 64, 2, 0xffffffffffffffffULL); + + fprintf(ref_file, + "\n%s 64 bits saturation cumulative saturation output:\n", + TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + DUMP(TEST_MSG, int, 64, 1, PRIx64); + DUMP(TEST_MSG, uint, 64, 1, PRIx64); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); + + /* To improve coverage, check saturation with less than 64 bits too */ + fprintf(ref_file, "\nless than 64 bits saturation:\n"); + VDUP(vector2, , int, s, 8, 8, 0x7F); + VDUP(vector2, , int, s, 16, 4, 0x7FFF); + VDUP(vector2, , int, s, 32, 2, 0x7FFFFFFF); + VDUP(vector2, q, int, s, 8, 16, 0x7F); + VDUP(vector2, q, int, s, 16, 8, 0x7FFF); + VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF); + + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4); + + DUMP(TEST_MSG, int, 8, 8, PRIx8); + DUMP(TEST_MSG, int, 16, 4, PRIx16); + DUMP(TEST_MSG, int, 32, 2, PRIx32); + DUMP(TEST_MSG, int, 8, 16, PRIx8); + DUMP(TEST_MSG, int, 16, 8, PRIx16); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + + + VDUP(vector1, , uint, u, 8, 8, 0x10); + VDUP(vector1, , uint, u, 16, 4, 0x10); + VDUP(vector1, , uint, u, 32, 2, 0x10); + VDUP(vector1, q, uint, u, 8, 16, 0x10); + VDUP(vector1, q, uint, u, 16, 8, 0x10); + VDUP(vector1, q, uint, u, 32, 4, 0x10); + + VDUP(vector2, , uint, u, 8, 8, 0x20); + VDUP(vector2, , uint, u, 16, 4, 0x20); + VDUP(vector2, , uint, u, 32, 2, 0x20); + VDUP(vector2, q, uint, u, 8, 16, 0x20); + VDUP(vector2, q, uint, u, 16, 8, 0x20); + VDUP(vector2, q, uint, u, 32, 4, 0x20); + + fprintf(ref_file, + "\n%s less than 64 bits saturation cumulative saturation output:\n", + TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4); + + DUMP(TEST_MSG, uint, 8, 8, PRIx8); + DUMP(TEST_MSG, uint, 16, 4, PRIx16); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 8, 16, PRIx8); + DUMP(TEST_MSG, uint, 16, 8, PRIx16); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); +} diff --git a/ref_vraddhn.c b/ref_vraddhn.c new file mode 100644 index 0000000..c6aac33 --- /dev/null +++ b/ref_vraddhn.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vraddhn +#define TEST_MSG "VRADDHN" + +#include "ref_vaddhn.c" diff --git a/ref_vrecpe.c b/ref_vrecpe.c new file mode 100644 index 0000000..5bbbf7c --- /dev/null +++ b/ref_vrecpe.c @@ -0,0 +1,143 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" +#include <math.h> + +#define TEST_MSG "VRECPE/VRECPEQ" +void exec_vrecpe(void) +{ + int i; + + /* Basic test: y=vrecpe(x), then store the result. */ +#define TEST_VRECPE(Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrecpe##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + clean_results (); + + /* Choose init value arbitrarily */ + VDUP(vector, , uint, u, 32, 2, 0x12345678); + VDUP(vector, , float, f, 32, 2, 1.9f); + VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10); + VDUP(vector, q, float, f, 32, 4, 125.0f); + + /* Apply the operator */ + TEST_VRECPE(, uint, u, 32, 2); + TEST_VRECPE(, float, f, 32, 2); + TEST_VRECPE(q, uint, u, 32, 4); + TEST_VRECPE(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s %s output:\n", TEST_MSG, " (positive input)"); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + /* Choose init value arbitrarily */ + VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + VDUP(vector, , float, f, 32, 2, -10.0f); + VDUP(vector, q, uint, u, 32, 4, 0x89081234); + VDUP(vector, q, float, f, 32, 4, -125.0f); + + /* Apply the operator */ + TEST_VRECPE(, uint, u, 32, 2); + TEST_VRECPE(, float, f, 32, 2); + TEST_VRECPE(q, uint, u, 32, 4); + TEST_VRECPE(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s %s output:\n", TEST_MSG, " (negative input)"); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + /* Test FP variants with special input values (NaN, infinity) */ + VDUP(vector, , float, f, 32, 2, NAN); + VDUP(vector, q, float, f, 32, 4, HUGE_VALF); + + /* Apply the operator */ + TEST_VRECPE(, float, f, 32, 2); + TEST_VRECPE(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s %s output:\n", TEST_MSG, " FP special (NaN, infinity)"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + /* Test FP variants with special input values (zero, large value) */ + VDUP(vector, , float, f, 32, 2, 0.0f); + VDUP(vector, q, float, f, 32, 4, 9.0e37f); + + /* Apply the operator */ + TEST_VRECPE(, float, f, 32, 2); + TEST_VRECPE(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s %s output:\n", TEST_MSG, " FP special (zero, large value)"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + /* Test FP variants with special input values (-0, -infinity) */ + VDUP(vector, , float, f, 32, 2, -0.0f); + VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); + + /* Apply the operator */ + TEST_VRECPE(, float, f, 32, 2); + TEST_VRECPE(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s %s output:\n", TEST_MSG, " FP special (-0, -infinity)"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + /* Test FP variants with special input values (large negative value) */ + VDUP(vector, , float, f, 32, 2, -9.0e37f); + + /* Apply the operator */ + TEST_VRECPE(, float, f, 32, 2); + + fprintf (ref_file, "\n%s %s output:\n", TEST_MSG, " FP special (large negative value)"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); +} diff --git a/ref_vrecps.c b/ref_vrecps.c new file mode 100644 index 0000000..15f841e --- /dev/null +++ b/ref_vrecps.c @@ -0,0 +1,120 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" +#include <math.h> + +#define TEST_MSG "VRECPS/VRECPSQ" +void exec_vrecps(void) +{ + int i; + + /* Basic test: y=vrecps(x), then store the result. */ +#define TEST_VRECPS(Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrecps##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for integer variants */ + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector2, float, 32, 4); + + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + clean_results (); + + /* Choose init value arbitrarily */ + VDUP(vector, , float, f, 32, 2, 12.9f); + VDUP(vector, q, float, f, 32, 4, 9.2f); + + VDUP(vector2, , float, f, 32, 2, 8.9f); + VDUP(vector2, q, float, f, 32, 4, 3.2f); + + /* Apply the operator */ + TEST_VRECPS(, float, f, 32, 2); + TEST_VRECPS(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG); + fprintf (gcc_tests_file, "\n%s output:\n", TEST_MSG); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + + /* Test FP variants with special input values (NaN) */ + VDUP(vector, , float, f, 32, 2, NAN); + VDUP(vector2, q, float, f, 32, 4, NAN); + + /* Apply the operator */ + TEST_VRECPS(, float, f, 32, 2); + TEST_VRECPS(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " FP special (NAN) and normal values"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + + /* Test FP variants with special input values (infinity, 0) */ + VDUP(vector, , float, f, 32, 2, HUGE_VALF); + VDUP(vector, q, float, f, 32, 4, 0.0f); + VDUP(vector2, q, float, f, 32, 4, 3.2f); /* Restore a normal value */ + + /* Apply the operator */ + TEST_VRECPS(, float, f, 32, 2); + TEST_VRECPS(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " FP special (infinity, 0) and normal values"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + + /* Test FP variants with only special input values (infinity, 0) */ + VDUP(vector, , float, f, 32, 2, HUGE_VALF); + VDUP(vector, q, float, f, 32, 4, 0.0f); + VDUP(vector2, , float, f, 32, 2, 0.0f); + VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); + + /* Apply the operator */ + TEST_VRECPS(, float, f, 32, 2); + TEST_VRECPS(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " FP special (infinity, 0)"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vreinterpret.c b/ref_vreinterpret.c new file mode 100644 index 0000000..1b3241c --- /dev/null +++ b/ref_vreinterpret.c @@ -0,0 +1,398 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VREINTERPRET/VREINTERPRETQ" + +void exec_vreinterpret (void) +{ + int i; + + /* Basic test: y=vreinterpret(x), then store the result. */ +#define TEST_VREINTERPRET(Q, T1, T2, W, N, TS1, TS2, WS, NS) \ + VECT_VAR(vector_res, T1, W, N) = \ + vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP(TEST_MSG, T1, W, N, PRIx##W); + +#define TEST_VREINTERPRET_POLY(Q, T1, T2, W, N, TS1, TS2, WS, NS) \ + VECT_VAR(vector_res, T1, W, N) = \ + vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP_POLY(TEST_MSG, T1, W, N, PRIx##W); + +#define TEST_VREINTERPRET_FP(Q, T1, T2, W, N, TS1, TS2, WS, NS) \ + VECT_VAR(vector_res, T1, W, N) = \ + vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP_FP(TEST_MSG, T1, W, N, PRIx##W); + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define TEST_VREINTERPRET_FP16(Q, T1, T2, W, N, TS1, TS2, WS, NS) \ + VECT_VAR(vector_res, T1, W, N) = \ + vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP_FP16(TEST_MSG, T1, W, N, PRIx##W); +#endif + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector_res, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); + DECL_VARIABLE(vector_res, float, 16, 8); +#endif + + clean_results (); + + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif + + /* The same result buffers are used multiple times, so output them + before overwriting them */ + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + fprintf(gcc_tests_file, "\n%s output:\n", TEST_MSG); + + /* vreinterpret_s8_xx */ + TEST_VREINTERPRET(, int, s, 8, 8, int, s, 16, 4); + TEST_VREINTERPRET(, int, s, 8, 8, int, s, 32, 2); + TEST_VREINTERPRET(, int, s, 8, 8, int, s, 64, 1); + TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 8, 8); + TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 16, 4); + TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 32, 2); + TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 64, 1); + TEST_VREINTERPRET(, int, s, 8, 8, poly, p, 8, 8); + TEST_VREINTERPRET(, int, s, 8, 8, poly, p, 16, 4); + + /* vreinterpret_s16_xx */ + TEST_VREINTERPRET(, int, s, 16, 4, int, s, 8, 8); + TEST_VREINTERPRET(, int, s, 16, 4, int, s, 32, 2); + TEST_VREINTERPRET(, int, s, 16, 4, int, s, 64, 1); + TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 8, 8); + TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 16, 4); + TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 32, 2); + TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 64, 1); + TEST_VREINTERPRET(, int, s, 16, 4, poly, p, 8, 8); + TEST_VREINTERPRET(, int, s, 16, 4, poly, p, 16, 4); + + /* vreinterpret_s32_xx */ + TEST_VREINTERPRET(, int, s, 32, 2, int, s, 8, 8); + TEST_VREINTERPRET(, int, s, 32, 2, int, s, 16, 4); + TEST_VREINTERPRET(, int, s, 32, 2, int, s, 64, 1); + TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 8, 8); + TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 16, 4); + TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 32, 2); + TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 64, 1); + TEST_VREINTERPRET(, int, s, 32, 2, poly, p, 8, 8); + TEST_VREINTERPRET(, int, s, 32, 2, poly, p, 16, 4); + + /* vreinterpret_s64_xx */ + TEST_VREINTERPRET(, int, s, 64, 1, int, s, 8, 8); + TEST_VREINTERPRET(, int, s, 64, 1, int, s, 16, 4); + TEST_VREINTERPRET(, int, s, 64, 1, int, s, 32, 2); + TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 8, 8); + TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 16, 4); + TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 32, 2); + TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 64, 1); + TEST_VREINTERPRET(, int, s, 64, 1, poly, p, 8, 8); + TEST_VREINTERPRET(, int, s, 64, 1, poly, p, 16, 4); + + /* vreinterpret_u8_xx */ + TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 8, 8); + TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 16, 4); + TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 32, 2); + TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 64, 1); + TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 16, 4); + TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 32, 2); + TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 64, 1); + TEST_VREINTERPRET(, uint, u, 8, 8, poly, p, 8, 8); + TEST_VREINTERPRET(, uint, u, 8, 8, poly, p, 16, 4); + + /* vreinterpret_u16_xx */ + TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 8, 8); + TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 16, 4); + TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 32, 2); + TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 64, 1); + TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 8, 8); + TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 32, 2); + TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 64, 1); + TEST_VREINTERPRET(, uint, u, 16, 4, poly, p, 8, 8); + TEST_VREINTERPRET(, uint, u, 16, 4, poly, p, 16, 4); + + /* vreinterpret_u32_xx */ + TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 8, 8); + TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 16, 4); + TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 32, 2); + TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 64, 1); + TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 8, 8); + TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 16, 4); + TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 64, 1); + TEST_VREINTERPRET(, uint, u, 32, 2, poly, p, 8, 8); + TEST_VREINTERPRET(, uint, u, 32, 2, poly, p, 16, 4); + + /* vreinterpret_u64_xx */ + TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 8, 8); + TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 16, 4); + TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 32, 2); + TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 64, 1); + TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 8, 8); + TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 16, 4); + TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 32, 2); + TEST_VREINTERPRET(, uint, u, 64, 1, poly, p, 8, 8); + TEST_VREINTERPRET(, uint, u, 64, 1, poly, p, 16, 4); + + /* vreinterpret_p8_xx */ + TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 8, 8); + TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 16, 4); + TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 32, 2); + TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 64, 1); + TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 8, 8); + TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 16, 4); + TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 32, 2); + TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 64, 1); + TEST_VREINTERPRET_POLY(, poly, p, 8, 8, poly, p, 16, 4); + + /* vreinterpret_p16_xx */ + TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 8, 8); + TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 16, 4); + TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 32, 2); + TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 64, 1); + TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 8, 8); + TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 16, 4); + TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 32, 2); + TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 64, 1); + TEST_VREINTERPRET_POLY(, poly, p, 16, 4, poly, p, 8, 8); + + /* vreinterpretq_s8_xx */ + TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 16, 8); + TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 32, 4); + TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 64, 2); + TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 8, 16); + TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 16, 8); + TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 32, 4); + TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 64, 2); + TEST_VREINTERPRET(q, int, s, 8, 16, poly, p, 8, 16); + TEST_VREINTERPRET(q, int, s, 8, 16, poly, p, 16, 8); + + /* vreinterpretq_s16_xx */ + TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 8, 16); + TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 32, 4); + TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 64, 2); + TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 8, 16); + TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 16, 8); + TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 32, 4); + TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 64, 2); + TEST_VREINTERPRET(q, int, s, 16, 8, poly, p, 8, 16); + TEST_VREINTERPRET(q, int, s, 16, 8, poly, p, 16, 8); + + /* vreinterpretq_s32_xx */ + TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 8, 16); + TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 16, 8); + TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 64, 2); + TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 8, 16); + TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 16, 8); + TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 32, 4); + TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 64, 2); + TEST_VREINTERPRET(q, int, s, 32, 4, poly, p, 8, 16); + TEST_VREINTERPRET(q, int, s, 32, 4, poly, p, 16, 8); + + /* vreinterpretq_s64_xx */ + TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 8, 16); + TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 16, 8); + TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 32, 4); + TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 8, 16); + TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 16, 8); + TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 32, 4); + TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 64, 2); + TEST_VREINTERPRET(q, int, s, 64, 2, poly, p, 8, 16); + TEST_VREINTERPRET(q, int, s, 64, 2, poly, p, 16, 8); + + /* vreinterpretq_u16_xx */ + TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 8, 16); + TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 16, 8); + TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 32, 4); + TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 64, 2); + TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 8, 16); + TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 32, 4); + TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 64, 2); + TEST_VREINTERPRET(q, uint, u, 16, 8, poly, p, 8, 16); + TEST_VREINTERPRET(q, uint, u, 16, 8, poly, p, 16, 8); + + /* vreinterpretq_u32_xx */ + TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 8, 16); + TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 16, 8); + TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 32, 4); + TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 64, 2); + TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 8, 16); + TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 16, 8); + TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 64, 2); + TEST_VREINTERPRET(q, uint, u, 32, 4, poly, p, 8, 16); + TEST_VREINTERPRET(q, uint, u, 32, 4, poly, p, 16, 8); + + /* vreinterpretq_u64_xx */ + TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 8, 16); + TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 16, 8); + TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 32, 4); + TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 64, 2); + TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 8, 16); + TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 16, 8); + TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 32, 4); + TEST_VREINTERPRET(q, uint, u, 64, 2, poly, p, 8, 16); + TEST_VREINTERPRET(q, uint, u, 64, 2, poly, p, 16, 8); + + /* vreinterpretq_u8_xx */ + TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 8, 16); + TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 16, 8); + TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 32, 4); + TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 64, 2); + TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 16, 8); + TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 32, 4); + TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 64, 2); + TEST_VREINTERPRET(q, uint, u, 8, 16, poly, p, 8, 16); + TEST_VREINTERPRET(q, uint, u, 8, 16, poly, p, 16, 8); + + /* vreinterpret_f32_xx */ + TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 8, 8); + TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 16, 4); + TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 32, 2); + TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 64, 1); + TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 8, 8); + TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 16, 4); + TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 32, 2); + TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 64, 1); + TEST_VREINTERPRET_FP(, float, f, 32, 2, poly, p, 8, 8); + TEST_VREINTERPRET_FP(, float, f, 32, 2, poly, p, 16, 4); + + /* vreinterpretq_f32_xx */ + TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 8, 16); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 16, 8); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 32, 4); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 64, 2); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 8, 16); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 16, 8); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 32, 4); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 64, 2); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, poly, p, 8, 16); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, poly, p, 16, 8); + + /* vreinterpret_xx_f32 */ + TEST_VREINTERPRET(, int, s, 8, 8, float, f, 32, 2); + TEST_VREINTERPRET(, int, s, 16, 4, float, f, 32, 2); + TEST_VREINTERPRET(, int, s, 32, 2, float, f, 32, 2); + TEST_VREINTERPRET(, int, s, 64, 1, float, f, 32, 2); + TEST_VREINTERPRET(, uint, u, 8, 8, float, f, 32, 2); + TEST_VREINTERPRET(, uint, u, 16, 4, float, f, 32, 2); + TEST_VREINTERPRET(, uint, u, 32, 2, float, f, 32, 2); + TEST_VREINTERPRET(, uint, u, 64, 1, float, f, 32, 2); + TEST_VREINTERPRET_POLY(, poly, p, 8, 8, float, f, 32, 2); + TEST_VREINTERPRET_POLY(, poly, p, 16, 4, float, f, 32, 2); + + /* vreinterpretq_xx_f32 */ + TEST_VREINTERPRET(q, int, s, 8, 16, float, f, 32, 4); + TEST_VREINTERPRET(q, int, s, 16, 8, float, f, 32, 4); + TEST_VREINTERPRET(q, int, s, 32, 4, float, f, 32, 4); + TEST_VREINTERPRET(q, int, s, 64, 2, float, f, 32, 4); + TEST_VREINTERPRET(q, uint, u, 8, 16, float, f, 32, 4); + TEST_VREINTERPRET(q, uint, u, 16, 8, float, f, 32, 4); + TEST_VREINTERPRET(q, uint, u, 32, 4, float, f, 32, 4); + TEST_VREINTERPRET(q, uint, u, 64, 2, float, f, 32, 4); + TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, float, f, 32, 4); + TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, float, f, 32, 4); + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + /* vreinterpret_f16_xx */ + TEST_VREINTERPRET_FP16(, float, f, 16, 4, int, s, 8, 8); + TEST_VREINTERPRET_FP16(, float, f, 16, 4, int, s, 16, 4); + TEST_VREINTERPRET_FP16(, float, f, 16, 4, int, s, 32, 2); + TEST_VREINTERPRET_FP16(, float, f, 16, 4, int, s, 64, 1); + TEST_VREINTERPRET_FP16(, float, f, 16, 4, uint, u, 8, 8); + TEST_VREINTERPRET_FP16(, float, f, 16, 4, uint, u, 16, 4); + TEST_VREINTERPRET_FP16(, float, f, 16, 4, uint, u, 32, 2); + TEST_VREINTERPRET_FP16(, float, f, 16, 4, uint, u, 64, 1); + TEST_VREINTERPRET_FP16(, float, f, 16, 4, poly, p, 8, 8); + TEST_VREINTERPRET_FP16(, float, f, 16, 4, poly, p, 16, 4); + TEST_VREINTERPRET_FP16(, float, f, 16, 4, float, f, 32, 2); + + /* vreinterpretq_f16_xx */ + TEST_VREINTERPRET_FP16(q, float, f, 16, 8, int, s, 8, 16); + TEST_VREINTERPRET_FP16(q, float, f, 16, 8, int, s, 16, 8); + TEST_VREINTERPRET_FP16(q, float, f, 16, 8, int, s, 32, 4); + TEST_VREINTERPRET_FP16(q, float, f, 16, 8, int, s, 64, 2); + TEST_VREINTERPRET_FP16(q, float, f, 16, 8, uint, u, 8, 16); + TEST_VREINTERPRET_FP16(q, float, f, 16, 8, uint, u, 16, 8); + TEST_VREINTERPRET_FP16(q, float, f, 16, 8, uint, u, 32, 4); + TEST_VREINTERPRET_FP16(q, float, f, 16, 8, uint, u, 64, 2); + TEST_VREINTERPRET_FP16(q, float, f, 16, 8, poly, p, 8, 16); + TEST_VREINTERPRET_FP16(q, float, f, 16, 8, poly, p, 16, 8); + TEST_VREINTERPRET_FP16(q, float, f, 16, 8, float, f, 32, 4); + + /* vreinterpret_xx_f16 */ + TEST_VREINTERPRET(, int, s, 8, 8, float, f, 16, 4); + TEST_VREINTERPRET(, int, s, 16, 4, float, f, 16, 4); + TEST_VREINTERPRET(, int, s, 32, 2, float, f, 16, 4); + TEST_VREINTERPRET(, int, s, 64, 1, float, f, 16, 4); + TEST_VREINTERPRET(, uint, u, 8, 8, float, f, 16, 4); + TEST_VREINTERPRET(, uint, u, 16, 4, float, f, 16, 4); + TEST_VREINTERPRET(, uint, u, 32, 2, float, f, 16, 4); + TEST_VREINTERPRET(, uint, u, 64, 1, float, f, 16, 4); + TEST_VREINTERPRET_POLY(, poly, p, 8, 8, float, f, 16, 4); + TEST_VREINTERPRET_POLY(, poly, p, 16, 4, float, f, 16, 4); + TEST_VREINTERPRET_FP(, float, f, 32, 2, float, f, 16, 4); + + /* vreinterpretq_xx_f16 */ + TEST_VREINTERPRET(q, int, s, 8, 16, float, f, 16, 8); + TEST_VREINTERPRET(q, int, s, 16, 8, float, f, 16, 8); + TEST_VREINTERPRET(q, int, s, 32, 4, float, f, 16, 8); + TEST_VREINTERPRET(q, int, s, 64, 2, float, f, 16, 8); + TEST_VREINTERPRET(q, uint, u, 8, 16, float, f, 16, 8); + TEST_VREINTERPRET(q, uint, u, 16, 8, float, f, 16, 8); + TEST_VREINTERPRET(q, uint, u, 32, 4, float, f, 16, 8); + TEST_VREINTERPRET(q, uint, u, 64, 2, float, f, 16, 8); + TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, float, f, 16, 8); + TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, float, f, 16, 8); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, float, f, 16, 8); +#endif +} diff --git a/ref_vrev.c b/ref_vrev.c new file mode 100644 index 0000000..f639182 --- /dev/null +++ b/ref_vrev.c @@ -0,0 +1,106 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* Template file for unary operator validation */ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vrev (void) +{ + /* Basic test: y=vrev(x), then store the result. */ +#define TEST_VREV(Q, T1, T2, W, N, W2) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrev##W2##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Check vrev in each of the existing combinations */ +#define TEST_MSG "VREV16" + TEST_VREV(, int, s, 8, 8, 16); + TEST_VREV(, uint, u, 8, 8, 16); + TEST_VREV(, poly, p, 8, 8, 16); + TEST_VREV(q, int, s, 8, 16, 16); + TEST_VREV(q, uint, u, 8, 16, 16); + TEST_VREV(q, poly, p, 8, 16, 16); + dump_results_hex (TEST_MSG); + +#undef TEST_MSG +#define TEST_MSG "VREV32" + TEST_VREV(, int, s, 8, 8, 32); + TEST_VREV(, int, s, 16, 4, 32); + TEST_VREV(, uint, u, 8, 8, 32); + TEST_VREV(, uint, u, 16, 4, 32); + TEST_VREV(, poly, p, 8, 8, 32); + TEST_VREV(, poly, p, 16, 4, 32); + TEST_VREV(q, int, s, 8, 16, 32); + TEST_VREV(q, int, s, 16, 8, 32); + TEST_VREV(q, uint, u, 8, 16, 32); + TEST_VREV(q, uint, u, 16, 8, 32); + TEST_VREV(q, poly, p, 8, 16, 32); + TEST_VREV(q, poly, p, 16, 8, 32); + dump_results_hex (TEST_MSG); + +#undef TEST_MSG +#define TEST_MSG "VREV64" + TEST_VREV(, int, s, 8, 8, 64); + TEST_VREV(, int, s, 16, 4, 64); + TEST_VREV(, int, s, 32, 2, 64); + TEST_VREV(, uint, u, 8, 8, 64); + TEST_VREV(, uint, u, 16, 4, 64); + TEST_VREV(, uint, u, 32, 2, 64); + TEST_VREV(, poly, p, 8, 8, 64); + TEST_VREV(, poly, p, 16, 4, 64); + TEST_VREV(q, int, s, 8, 16, 64); + TEST_VREV(q, int, s, 16, 8, 64); + TEST_VREV(q, int, s, 32, 4, 64); + TEST_VREV(q, uint, u, 8, 16, 64); + TEST_VREV(q, uint, u, 16, 8, 64); + TEST_VREV(q, uint, u, 32, 4, 64); + TEST_VREV(q, poly, p, 8, 16, 64); + TEST_VREV(q, poly, p, 16, 8, 64); + + TEST_VREV(, float, f, 32, 2, 64); + TEST_VREV(q, float, f, 32, 4, 64); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vrhadd.c b/ref_vrhadd.c new file mode 100644 index 0000000..20872a1 --- /dev/null +++ b/ref_vrhadd.c @@ -0,0 +1,31 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vrhadd +#define TEST_MSG "VRHADD/VRHADDQ" + +#define NO_FLOAT_VARIANT + +#include "ref_vmax.c" diff --git a/ref_vrshl.c b/ref_vrshl.c new file mode 100644 index 0000000..8790619 --- /dev/null +++ b/ref_vrshl.c @@ -0,0 +1,220 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VRSHL/VRSHLQ" +void exec_vrshl (void) +{ + /* Basic test: v3=vrshl(v1,v2), then store the result. */ +#define TEST_VRSHL(T3, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector_shift, T3, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); + + clean_results (); + + /* Fill input vector with 0, to check behavior on limits */ + VDUP(vector, , int, s, 8, 8, 0); + VDUP(vector, , int, s, 16, 4, 0); + VDUP(vector, , int, s, 32, 2, 0); + VDUP(vector, , int, s, 64, 1, 0); + VDUP(vector, , uint, u, 8, 8, 0); + VDUP(vector, , uint, u, 16, 4, 0); + VDUP(vector, , uint, u, 32, 2, 0); + VDUP(vector, , uint, u, 64, 1, 0); + VDUP(vector, q, int, s, 8, 16, 0); + VDUP(vector, q, int, s, 16, 8, 0); + VDUP(vector, q, int, s, 32, 4, 0); + VDUP(vector, q, int, s, 64, 2, 0); + VDUP(vector, q, uint, u, 8, 16, 0); + VDUP(vector, q, uint, u, 16, 8, 0); + VDUP(vector, q, uint, u, 32, 4, 0); + VDUP(vector, q, uint, u, 64, 2, 0); + + /* Choose init value arbitrarily, will be used as shift amount */ + /* Use values equal to one-less-than the type width to check + behaviour on limits */ + VDUP(vector_shift, , int, s, 8, 8, 7); + VDUP(vector_shift, , int, s, 16, 4, 15); + VDUP(vector_shift, , int, s, 32, 2, 31); + VDUP(vector_shift, , int, s, 64, 1, 63); + VDUP(vector_shift, q, int, s, 8, 16, 7); + VDUP(vector_shift, q, int, s, 16, 8, 15); + VDUP(vector_shift, q, int, s, 32, 4, 31); + VDUP(vector_shift, q, int, s, 64, 2, 63); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (with input = 0)"); + + /* Use negative shift amounts */ + VDUP(vector_shift, , int, s, 8, 8, -1); + VDUP(vector_shift, , int, s, 16, 4, -2); + VDUP(vector_shift, , int, s, 32, 2, -3); + VDUP(vector_shift, , int, s, 64, 1, -4); + VDUP(vector_shift, q, int, s, 8, 16, -7); + VDUP(vector_shift, q, int, s, 16, 8, -11); + VDUP(vector_shift, q, int, s, 32, 4, -13); + VDUP(vector_shift, q, int, s, 64, 2, -20); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (input 0 and negative shift amount)"); + + /* Test again, with predefined input values */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Choose init value arbitrarily, will be used as shift amount */ + VDUP(vector_shift, , int, s, 8, 8, 1); + VDUP(vector_shift, , int, s, 16, 4, 3); + VDUP(vector_shift, , int, s, 32, 2, 8); + VDUP(vector_shift, , int, s, 64, 1, -3); + VDUP(vector_shift, q, int, s, 8, 16, 10); + VDUP(vector_shift, q, int, s, 16, 8, 12); + VDUP(vector_shift, q, int, s, 32, 4, 32); + VDUP(vector_shift, q, int, s, 64, 2, 63); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex (TEST_MSG); + + + /* Use negative shift amounts */ + VDUP(vector_shift, , int, s, 8, 8, -1); + VDUP(vector_shift, , int, s, 16, 4, -2); + VDUP(vector_shift, , int, s, 32, 2, -3); + VDUP(vector_shift, , int, s, 64, 1, -4); + VDUP(vector_shift, q, int, s, 8, 16, -7); + VDUP(vector_shift, q, int, s, 16, 8, -11); + VDUP(vector_shift, q, int, s, 32, 4, -13); + VDUP(vector_shift, q, int, s, 64, 2, -20); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (negative shift amount)"); + + /* Fill input vector with max value, to check behavior on limits */ + VDUP(vector, , int, s, 8, 8, 0x7F); + VDUP(vector, , int, s, 16, 4, 0x7FFF); + VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, , uint, u, 8, 8, 0xFF); + VDUP(vector, , uint, u, 16, 4, 0xFFFF); + VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + VDUP(vector, q, int, s, 8, 16, 0x7F); + VDUP(vector, q, int, s, 16, 8, 0x7FFF); + VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, q, uint, u, 8, 16, 0xFF); + VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* Use -1 shift amount to check overflow with round_const */ + VDUP(vector_shift, , int, s, 8, 8, -1); + VDUP(vector_shift, , int, s, 16, 4, -1); + VDUP(vector_shift, , int, s, 32, 2, -1); + VDUP(vector_shift, , int, s, 64, 1, -1); + VDUP(vector_shift, q, int, s, 8, 16, -1); + VDUP(vector_shift, q, int, s, 16, 8, -1); + VDUP(vector_shift, q, int, s, 32, 4, -1); + VDUP(vector_shift, q, int, s, 64, 2, -1); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (checking round_const overflow: shift by -1)"); + + /* Use -3 shift amount to check overflow with round_const */ + VDUP(vector_shift, , int, s, 8, 8, -3); + VDUP(vector_shift, , int, s, 16, 4, -3); + VDUP(vector_shift, , int, s, 32, 2, -3); + VDUP(vector_shift, , int, s, 64, 1, -3); + VDUP(vector_shift, q, int, s, 8, 16, -3); + VDUP(vector_shift, q, int, s, 16, 8, -3); + VDUP(vector_shift, q, int, s, 32, 4, -3); + VDUP(vector_shift, q, int, s, 64, 2, -3); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (checking round_const overflow: shift by -3)"); + + /* Use negative shift amount as large as input vector width */ + VDUP(vector_shift, , int, s, 8, 8, -8); + VDUP(vector_shift, , int, s, 16, 4, -16); + VDUP(vector_shift, , int, s, 32, 2, -32); + VDUP(vector_shift, , int, s, 64, 1, -64); + VDUP(vector_shift, q, int, s, 8, 16, -8); + VDUP(vector_shift, q, int, s, 16, 8, -16); + VDUP(vector_shift, q, int, s, 32, 4, -32); + VDUP(vector_shift, q, int, s, 64, 2, -64); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (checking negative shift amount as large as input vector width)"); + + /* Test large shift amount */ + VDUP(vector_shift, , int, s, 8, 8, 10); + VDUP(vector_shift, , int, s, 16, 4, 20); + VDUP(vector_shift, , int, s, 32, 2, 33); + VDUP(vector_shift, , int, s, 64, 1, 65); + VDUP(vector_shift, q, int, s, 8, 16, 9); + VDUP(vector_shift, q, int, s, 16, 8, 16); + VDUP(vector_shift, q, int, s, 32, 4, 32); + VDUP(vector_shift, q, int, s, 64, 2, 64); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (large shift amount)"); + + /* Test large negative shift amount */ + VDUP(vector_shift, , int, s, 8, 8, -10); + VDUP(vector_shift, , int, s, 16, 4, -20); + VDUP(vector_shift, , int, s, 32, 2, -33); + VDUP(vector_shift, , int, s, 64, 1, -65); + VDUP(vector_shift, q, int, s, 8, 16, -9); + VDUP(vector_shift, q, int, s, 16, 8, -16); + VDUP(vector_shift, q, int, s, 32, 4, -32); + VDUP(vector_shift, q, int, s, 64, 2, -64); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (large negative shift amount)"); +} diff --git a/ref_vrshr_n.c b/ref_vrshr_n.c new file mode 100644 index 0000000..c19c7a4 --- /dev/null +++ b/ref_vrshr_n.c @@ -0,0 +1,217 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VRSHR_N" +void exec_vrshr_n (void) +{ + /* Basic test: y=vrshr_n(x,v), then store the result. */ +#define TEST_VRSHR_N(Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrshr##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Choose shift amount arbitrarily */ + TEST_VRSHR_N(, int, s, 8, 8, 1); + TEST_VRSHR_N(, int, s, 16, 4, 12); + TEST_VRSHR_N(, int, s, 32, 2, 2); + TEST_VRSHR_N(, int, s, 64, 1, 32); + TEST_VRSHR_N(, uint, u, 8, 8, 2); + TEST_VRSHR_N(, uint, u, 16, 4, 3); + TEST_VRSHR_N(, uint, u, 32, 2, 5); + TEST_VRSHR_N(, uint, u, 64, 1, 33); + + TEST_VRSHR_N(q, int, s, 8, 16, 1); + TEST_VRSHR_N(q, int, s, 16, 8, 12); + TEST_VRSHR_N(q, int, s, 32, 4, 2); + TEST_VRSHR_N(q, int, s, 64, 2, 32); + TEST_VRSHR_N(q, uint, u, 8, 16, 2); + TEST_VRSHR_N(q, uint, u, 16, 8, 3); + TEST_VRSHR_N(q, uint, u, 32, 4, 5); + TEST_VRSHR_N(q, uint, u, 64, 2, 33); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + /* Another set of tests */ + VDUP(vector, , int, s, 8, 8, 0x7F); + VDUP(vector, , int, s, 16, 4, 0x7FFF); + VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, , uint, u, 8, 8, 0xFF); + VDUP(vector, , uint, u, 16, 4, 0xFFFF); + VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + VDUP(vector, q, int, s, 8, 16, 0x7F); + VDUP(vector, q, int, s, 16, 8, 0x7FFF); + VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector, q, uint, u, 8, 16, 0xFF); + VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* Use max shift amount, to exercise saturation code in the lib */ + TEST_VRSHR_N(, int, s, 8, 8, 8); + TEST_VRSHR_N(, int, s, 16, 4, 16); + TEST_VRSHR_N(, int, s, 32, 2, 32); + TEST_VRSHR_N(, int, s, 64, 1, 64); + TEST_VRSHR_N(, uint, u, 8, 8, 8); + TEST_VRSHR_N(, uint, u, 16, 4, 16); + TEST_VRSHR_N(, uint, u, 32, 2, 32); + TEST_VRSHR_N(, uint, u, 64, 1, 64); + TEST_VRSHR_N(q, int, s, 8, 16, 8); + TEST_VRSHR_N(q, int, s, 16, 8, 16); + TEST_VRSHR_N(q, int, s, 32, 4, 32); + TEST_VRSHR_N(q, int, s, 64, 2, 64); + TEST_VRSHR_N(q, uint, u, 8, 16, 8); + TEST_VRSHR_N(q, uint, u, 16, 8, 16); + TEST_VRSHR_N(q, uint, u, 32, 4, 32); + TEST_VRSHR_N(q, uint, u, 64, 2, 64); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (overflow test: max shift amount, positive input)"); + + /* Use 1 as shift amount, to exercise saturation code in the lib */ + TEST_VRSHR_N(, int, s, 8, 8, 1); + TEST_VRSHR_N(, int, s, 16, 4, 1); + TEST_VRSHR_N(, int, s, 32, 2, 1); + TEST_VRSHR_N(, int, s, 64, 1, 1); + TEST_VRSHR_N(, uint, u, 8, 8, 1); + TEST_VRSHR_N(, uint, u, 16, 4, 1); + TEST_VRSHR_N(, uint, u, 32, 2, 1); + TEST_VRSHR_N(, uint, u, 64, 1, 1); + TEST_VRSHR_N(q, int, s, 8, 16, 1); + TEST_VRSHR_N(q, int, s, 16, 8, 1); + TEST_VRSHR_N(q, int, s, 32, 4, 1); + TEST_VRSHR_N(q, int, s, 64, 2, 1); + TEST_VRSHR_N(q, uint, u, 8, 16, 1); + TEST_VRSHR_N(q, uint, u, 16, 8, 1); + TEST_VRSHR_N(q, uint, u, 32, 4, 1); + TEST_VRSHR_N(q, uint, u, 64, 2, 1); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (overflow test: shift by 1, with negative input)"); + + /* Use 3 as shift amount, to exercise saturation code in the lib */ + TEST_VRSHR_N(, int, s, 8, 8, 3); + TEST_VRSHR_N(, int, s, 16, 4, 3); + TEST_VRSHR_N(, int, s, 32, 2, 3); + TEST_VRSHR_N(, int, s, 64, 1, 3); + TEST_VRSHR_N(, uint, u, 8, 8, 3); + TEST_VRSHR_N(, uint, u, 16, 4, 3); + TEST_VRSHR_N(, uint, u, 32, 2, 3); + TEST_VRSHR_N(, uint, u, 64, 1, 3); + TEST_VRSHR_N(q, int, s, 8, 16, 3); + TEST_VRSHR_N(q, int, s, 16, 8, 3); + TEST_VRSHR_N(q, int, s, 32, 4, 3); + TEST_VRSHR_N(q, int, s, 64, 2, 3); + TEST_VRSHR_N(q, uint, u, 8, 16, 3); + TEST_VRSHR_N(q, uint, u, 16, 8, 3); + TEST_VRSHR_N(q, uint, u, 32, 4, 3); + TEST_VRSHR_N(q, uint, u, 64, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (overflow test: shift by 3, positive input)"); + + VDUP(vector, , int, s, 8, 8, 0x80); + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x80000000); + VDUP(vector, , int, s, 64, 1, 0x8000000000000000LL); + VDUP(vector, , uint, u, 8, 8, 0xFF); + VDUP(vector, , uint, u, 16, 4, 0xFFFF); + VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + VDUP(vector, q, int, s, 8, 16, 0x80); + VDUP(vector, q, int, s, 16, 8, 0x8000); + VDUP(vector, q, int, s, 32, 4, 0x80000000); + VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); + VDUP(vector, q, uint, u, 8, 16, 0xFF); + VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + + /* Use 1 as shift amount, to exercise saturation code in the lib */ + TEST_VRSHR_N(, int, s, 8, 8, 1); + TEST_VRSHR_N(, int, s, 16, 4, 1); + TEST_VRSHR_N(, int, s, 32, 2, 1); + TEST_VRSHR_N(, int, s, 64, 1, 1); + TEST_VRSHR_N(, uint, u, 8, 8, 1); + TEST_VRSHR_N(, uint, u, 16, 4, 1); + TEST_VRSHR_N(, uint, u, 32, 2, 1); + TEST_VRSHR_N(, uint, u, 64, 1, 1); + TEST_VRSHR_N(q, int, s, 8, 16, 1); + TEST_VRSHR_N(q, int, s, 16, 8, 1); + TEST_VRSHR_N(q, int, s, 32, 4, 1); + TEST_VRSHR_N(q, int, s, 64, 2, 1); + TEST_VRSHR_N(q, uint, u, 8, 16, 1); + TEST_VRSHR_N(q, uint, u, 16, 8, 1); + TEST_VRSHR_N(q, uint, u, 32, 4, 1); + TEST_VRSHR_N(q, uint, u, 64, 2, 1); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (overflow test: shift by 1, with negative input)"); + + /* Use 3 as shift amount, to exercise saturation code in the lib */ + TEST_VRSHR_N(, int, s, 8, 8, 3); + TEST_VRSHR_N(, int, s, 16, 4, 3); + TEST_VRSHR_N(, int, s, 32, 2, 3); + TEST_VRSHR_N(, int, s, 64, 1, 3); + TEST_VRSHR_N(, uint, u, 8, 8, 3); + TEST_VRSHR_N(, uint, u, 16, 4, 3); + TEST_VRSHR_N(, uint, u, 32, 2, 3); + TEST_VRSHR_N(, uint, u, 64, 1, 3); + TEST_VRSHR_N(q, int, s, 8, 16, 3); + TEST_VRSHR_N(q, int, s, 16, 8, 3); + TEST_VRSHR_N(q, int, s, 32, 4, 3); + TEST_VRSHR_N(q, int, s, 64, 2, 3); + TEST_VRSHR_N(q, uint, u, 8, 16, 3); + TEST_VRSHR_N(q, uint, u, 16, 8, 3); + TEST_VRSHR_N(q, uint, u, 32, 4, 3); + TEST_VRSHR_N(q, uint, u, 64, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (overflow test: shift by 3, with negative input)"); +} diff --git a/ref_vrshrn_n.c b/ref_vrshrn_n.c new file mode 100644 index 0000000..94f42b0 --- /dev/null +++ b/ref_vrshrn_n.c @@ -0,0 +1,119 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VRSHRN_N" +void exec_vrshrn_n (void) +{ + /* Basic test: v2=vrshrn_n(v1,v), then store the result. */ +#define TEST_VRSHRN_N(T1, T2, W, N, W2, V) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vrshrn_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* vector is twice as large as vector_res */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + /* Fill input vector with 0, to check behavior on limits */ + VDUP(vector, q, int, s, 16, 8, 0); + VDUP(vector, q, int, s, 32, 4, 0); + VDUP(vector, q, int, s, 64, 2, 0); + VDUP(vector, q, uint, u, 16, 8, 0); + VDUP(vector, q, uint, u, 32, 4, 0); + VDUP(vector, q, uint, u, 64, 2, 0); + + /* Choose shift amount arbitrarily */ + TEST_VRSHRN_N(int, s, 16, 8, 8, 1); + TEST_VRSHRN_N(int, s, 32, 4, 16, 1); + TEST_VRSHRN_N(int, s, 64, 2, 32, 2); + TEST_VRSHRN_N(uint, u, 16, 8, 8, 2); + TEST_VRSHRN_N(uint, u, 32, 4, 16, 3); + TEST_VRSHRN_N(uint, u, 64, 2, 32, 3); + + dump_results_hex2 (TEST_MSG, " (with input = 0)"); + + /* Test again, with predefined input values */ + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Choose shift amount arbitrarily */ + TEST_VRSHRN_N(int, s, 16, 8, 8, 1); + TEST_VRSHRN_N(int, s, 32, 4, 16, 1); + TEST_VRSHRN_N(int, s, 64, 2, 32, 2); + TEST_VRSHRN_N(uint, u, 16, 8, 8, 2); + TEST_VRSHRN_N(uint, u, 32, 4, 16, 3); + TEST_VRSHRN_N(uint, u, 64, 2, 32, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + /* Fill input arbitrary values */ + VDUP(vector, q, int, s, 16, 8, 30); + VDUP(vector, q, int, s, 32, 4, 0); + VDUP(vector, q, int, s, 64, 2, 0); + VDUP(vector, q, uint, u, 16, 8, 0xFFF0); + VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFF0); + VDUP(vector, q, uint, u, 64, 2, 0); + + /* Choose shift amount arbitrarily */ + TEST_VRSHRN_N(int, s, 16, 8, 8, 7); + TEST_VRSHRN_N(int, s, 32, 4, 16, 14); + TEST_VRSHRN_N(int, s, 64, 2, 32, 31); + TEST_VRSHRN_N(uint, u, 16, 8, 8, 7); + TEST_VRSHRN_N(uint, u, 32, 4, 16, 16); + TEST_VRSHRN_N(uint, u, 64, 2, 32, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (with large shift amount)"); +} diff --git a/ref_vrsqrte.c b/ref_vrsqrte.c new file mode 100644 index 0000000..d96f9d2 --- /dev/null +++ b/ref_vrsqrte.c @@ -0,0 +1,143 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" +#include <math.h> + +#define TEST_MSG "VRSQRTE/VRSQRTEQ" +void exec_vrsqrte(void) +{ + int i; + + /* Basic test: y=vrsqrte(x), then store the result. */ +#define TEST_VRSQRTE(Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrsqrte##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, float, 32, 4); + + clean_results (); + + /* Choose init value arbitrarily */ + VDUP(vector, , uint, u, 32, 2, 0x12345678); + VDUP(vector, , float, f, 32, 2, 25.799999f); + VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10); + VDUP(vector, q, float, f, 32, 4, 18.2f); + + /* Apply the operator */ + TEST_VRSQRTE(, uint, u, 32, 2); + TEST_VRSQRTE(, float, f, 32, 2); + TEST_VRSQRTE(q, uint, u, 32, 4); + TEST_VRSQRTE(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG); + fprintf (gcc_tests_file, "\n%s output:\n", TEST_MSG); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + /* Don't test FP variants with negative inputs: the result depends + on the platform */ + /* Choose init value arbitrarily */ + VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + VDUP(vector, q, uint, u, 32, 4, 0x89081234); + + /* Apply the operator */ + TEST_VRSQRTE(, uint, u, 32, 2); + TEST_VRSQRTE(q, uint, u, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " (2)"); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + + /* Choose init value arbitrarily */ + VDUP(vector, , uint, u, 32, 2, 0x80000000); + VDUP(vector, q, uint, u, 32, 4, 0x4ABCDEF0); + + /* Apply the operator */ + TEST_VRSQRTE(, uint, u, 32, 2); + TEST_VRSQRTE(q, uint, u, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " (3)"); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + + /* Test FP variants with special input values (NaNs, ...) */ + VDUP(vector, , float, f, 32, 2, NAN); + VDUP(vector, q, float, f, 32, 4, 0.0f); + + /* Apply the operator */ + TEST_VRSQRTE(, float, f, 32, 2); + TEST_VRSQRTE(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " FP special (NaN, 0)"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + /* Test FP variants with special input values (negative, infinity) */ + VDUP(vector, , float, f, 32, 2, -1.0f); + VDUP(vector, q, float, f, 32, 4, HUGE_VALF); + + /* Apply the operator */ + TEST_VRSQRTE(, float, f, 32, 2); + TEST_VRSQRTE(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " FP special (negative, infinity)"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + /* Test FP variants with special input values (-0, -infinity) */ + VDUP(vector, , float, f, 32, 2, -0.0f); + VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); + + /* Apply the operator */ + TEST_VRSQRTE(, float, f, 32, 2); + TEST_VRSQRTE(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " FP special (-0, -infinity)"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vrsqrts.c b/ref_vrsqrts.c new file mode 100644 index 0000000..4e355b9 --- /dev/null +++ b/ref_vrsqrts.c @@ -0,0 +1,120 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" +#include <math.h> + +#define TEST_MSG "VRSQRTS/VRSQRTSQ" +void exec_vrsqrts(void) +{ + int i; + + /* Basic test: y=vrsqrts(x), then store the result. */ +#define TEST_VRSQRTS(Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrsqrts##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for integer variants */ + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector2, float, 32, 4); + + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + clean_results (); + + /* Choose init value arbitrarily */ + VDUP(vector, , float, f, 32, 2, 12.9f); + VDUP(vector, q, float, f, 32, 4, 9.1f); + + VDUP(vector2, , float, f, 32, 2, 9.9f); + VDUP(vector2, q, float, f, 32, 4, 1.9f); + + /* Apply the operator */ + TEST_VRSQRTS(, float, f, 32, 2); + TEST_VRSQRTS(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG); + fprintf (gcc_tests_file, "\n%s output:\n", TEST_MSG); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + + /* Test FP variants with special input values (NaN) */ + VDUP(vector, , float, f, 32, 2, NAN); + VDUP(vector2, q, float, f, 32, 4, NAN); + + /* Apply the operator */ + TEST_VRSQRTS(, float, f, 32, 2); + TEST_VRSQRTS(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " FP special (NAN) and normal values"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + + /* Test FP variants with special input values (infinity, 0) */ + VDUP(vector, , float, f, 32, 2, HUGE_VALF); + VDUP(vector, q, float, f, 32, 4, 0.0f); + VDUP(vector2, q, float, f, 32, 4, 3.2f); /* Restore a normal value */ + + /* Apply the operator */ + TEST_VRSQRTS(, float, f, 32, 2); + TEST_VRSQRTS(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " FP special (infinity, 0) and normal values"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + + /* Test FP variants with only special input values (infinity, 0) */ + VDUP(vector, , float, f, 32, 2, HUGE_VALF); + VDUP(vector, q, float, f, 32, 4, 0.0f); + VDUP(vector2, , float, f, 32, 2, -0.0f); + VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); + + /* Apply the operator */ + TEST_VRSQRTS(, float, f, 32, 2); + TEST_VRSQRTS(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " FP special (infinity, 0)"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vrsra_n.c b/ref_vrsra_n.c new file mode 100644 index 0000000..1546ff2 --- /dev/null +++ b/ref_vrsra_n.c @@ -0,0 +1,238 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VRSRA_N" +void exec_vrsra_n (void) +{ + /* Basic test: y=vrsra_n(x,v), then store the result. */ +#define TEST_VRSRA_N(Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrsra##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Choose arbitrary initialization values */ + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , int, s, 64, 1, 0x44); + VDUP(vector2, , uint, u, 8, 8, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x66); + VDUP(vector2, , uint, u, 32, 2, 0x77); + VDUP(vector2, , uint, u, 64, 1, 0x88); + + VDUP(vector2, q, int, s, 8, 16, 0x11); + VDUP(vector2, q, int, s, 16, 8, 0x22); + VDUP(vector2, q, int, s, 32, 4, 0x33); + VDUP(vector2, q, int, s, 64, 2, 0x44); + VDUP(vector2, q, uint, u, 8, 16, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x66); + VDUP(vector2, q, uint, u, 32, 4, 0x77); + VDUP(vector2, q, uint, u, 64, 2, 0x88); + + /* Choose shift amount arbitrarily */ + TEST_VRSRA_N(, int, s, 8, 8, 1); + TEST_VRSRA_N(, int, s, 16, 4, 12); + TEST_VRSRA_N(, int, s, 32, 2, 2); + TEST_VRSRA_N(, int, s, 64, 1, 32); + TEST_VRSRA_N(, uint, u, 8, 8, 2); + TEST_VRSRA_N(, uint, u, 16, 4, 3); + TEST_VRSRA_N(, uint, u, 32, 2, 5); + TEST_VRSRA_N(, uint, u, 64, 1, 33); + + TEST_VRSRA_N(q, int, s, 8, 16, 1); + TEST_VRSRA_N(q, int, s, 16, 8, 12); + TEST_VRSRA_N(q, int, s, 32, 4, 2); + TEST_VRSRA_N(q, int, s, 64, 2, 32); + TEST_VRSRA_N(q, uint, u, 8, 16, 2); + TEST_VRSRA_N(q, uint, u, 16, 8, 3); + TEST_VRSRA_N(q, uint, u, 32, 4, 5); + TEST_VRSRA_N(q, uint, u, 64, 2, 33); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + /* Initialize the accumulator with 0 */ + VDUP(vector, , int, s, 8, 8, 0); + VDUP(vector, , int, s, 16, 4, 0); + VDUP(vector, , int, s, 32, 2, 0); + VDUP(vector, , int, s, 64, 1, 0); + VDUP(vector, , uint, u, 8, 8, 0); + VDUP(vector, , uint, u, 16, 4, 0); + VDUP(vector, , uint, u, 32, 2, 0); + VDUP(vector, , uint, u, 64, 1, 0); + VDUP(vector, q, int, s, 8, 16, 0); + VDUP(vector, q, int, s, 16, 8, 0); + VDUP(vector, q, int, s, 32, 4, 0); + VDUP(vector, q, int, s, 64, 2, 0); + VDUP(vector, q, uint, u, 8, 16, 0); + VDUP(vector, q, uint, u, 16, 8, 0); + VDUP(vector, q, uint, u, 32, 4, 0); + VDUP(vector, q, uint, u, 64, 2, 0); + + /* Initialize with max values to check overflow */ + VDUP(vector2, , int, s, 8, 8, 0x7F); + VDUP(vector2, , int, s, 16, 4, 0x7FFF); + VDUP(vector2, , int, s, 32, 2, 0x7FFFFFFF); + VDUP(vector2, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector2, , uint, u, 8, 8, 0xFF); + VDUP(vector2, , uint, u, 16, 4, 0xFFFF); + VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFFF); + VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + VDUP(vector2, q, int, s, 8, 16, 0x7F); + VDUP(vector2, q, int, s, 16, 8, 0x7FFF); + VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector2, q, uint, u, 8, 16, 0xFF); + VDUP(vector2, q, uint, u, 16, 8, 0xFFFF); + VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFFF); + VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* Shift by 1 to check overflow with rounding constant */ + TEST_VRSRA_N(, int, s, 8, 8, 1); + TEST_VRSRA_N(, int, s, 16, 4, 1); + TEST_VRSRA_N(, int, s, 32, 2, 1); + TEST_VRSRA_N(, int, s, 64, 1, 1); + TEST_VRSRA_N(, uint, u, 8, 8, 1); + TEST_VRSRA_N(, uint, u, 16, 4, 1); + TEST_VRSRA_N(, uint, u, 32, 2, 1); + TEST_VRSRA_N(, uint, u, 64, 1, 1); + TEST_VRSRA_N(q, int, s, 8, 16, 1); + TEST_VRSRA_N(q, int, s, 16, 8, 1); + TEST_VRSRA_N(q, int, s, 32, 4, 1); + TEST_VRSRA_N(q, int, s, 64, 2, 1); + TEST_VRSRA_N(q, uint, u, 8, 16, 1); + TEST_VRSRA_N(q, uint, u, 16, 8, 1); + TEST_VRSRA_N(q, uint, u, 32, 4, 1); + TEST_VRSRA_N(q, uint, u, 64, 2, 1); + + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by 1, positive input)"); + + /* Shift by 3 to check overflow with rounding constant */ + TEST_VRSRA_N(, int, s, 8, 8, 3); + TEST_VRSRA_N(, int, s, 16, 4, 3); + TEST_VRSRA_N(, int, s, 32, 2, 3); + TEST_VRSRA_N(, int, s, 64, 1, 3); + TEST_VRSRA_N(, uint, u, 8, 8, 3); + TEST_VRSRA_N(, uint, u, 16, 4, 3); + TEST_VRSRA_N(, uint, u, 32, 2, 3); + TEST_VRSRA_N(, uint, u, 64, 1, 3); + TEST_VRSRA_N(q, int, s, 8, 16, 3); + TEST_VRSRA_N(q, int, s, 16, 8, 3); + TEST_VRSRA_N(q, int, s, 32, 4, 3); + TEST_VRSRA_N(q, int, s, 64, 2, 3); + TEST_VRSRA_N(q, uint, u, 8, 16, 3); + TEST_VRSRA_N(q, uint, u, 16, 8, 3); + TEST_VRSRA_N(q, uint, u, 32, 4, 3); + TEST_VRSRA_N(q, uint, u, 64, 2, 3); + + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by 3, positive input)"); + + /* Shift by max to check overflow with rounding constant */ + TEST_VRSRA_N(, int, s, 8, 8, 8); + TEST_VRSRA_N(, int, s, 16, 4, 16); + TEST_VRSRA_N(, int, s, 32, 2, 32); + TEST_VRSRA_N(, int, s, 64, 1, 64); + TEST_VRSRA_N(, uint, u, 8, 8, 8); + TEST_VRSRA_N(, uint, u, 16, 4, 16); + TEST_VRSRA_N(, uint, u, 32, 2, 32); + TEST_VRSRA_N(, uint, u, 64, 1, 64); + TEST_VRSRA_N(q, int, s, 8, 16, 8); + TEST_VRSRA_N(q, int, s, 16, 8, 16); + TEST_VRSRA_N(q, int, s, 32, 4, 32); + TEST_VRSRA_N(q, int, s, 64, 2, 64); + TEST_VRSRA_N(q, uint, u, 8, 16, 8); + TEST_VRSRA_N(q, uint, u, 16, 8, 16); + TEST_VRSRA_N(q, uint, u, 32, 4, 32); + TEST_VRSRA_N(q, uint, u, 64, 2, 64); + + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by max, positive input)"); + /* Initialize with min values to check overflow */ + VDUP(vector2, , int, s, 8, 8, 0x80); + VDUP(vector2, , int, s, 16, 4, 0x8000); + VDUP(vector2, , int, s, 32, 2, 0x80000000); + VDUP(vector2, , int, s, 64, 1, 0x8000000000000000LL); + VDUP(vector2, q, int, s, 8, 16, 0x80); + VDUP(vector2, q, int, s, 16, 8, 0x8000); + VDUP(vector2, q, int, s, 32, 4, 0x80000000); + VDUP(vector2, q, int, s, 64, 2, 0x8000000000000000ULL); + + /* Shift by 1 to check overflow with rounding constant */ + TEST_VRSRA_N(, int, s, 8, 8, 1); + TEST_VRSRA_N(, int, s, 16, 4, 1); + TEST_VRSRA_N(, int, s, 32, 2, 1); + TEST_VRSRA_N(, int, s, 64, 1, 1); + TEST_VRSRA_N(q, int, s, 8, 16, 1); + TEST_VRSRA_N(q, int, s, 16, 8, 1); + TEST_VRSRA_N(q, int, s, 32, 4, 1); + TEST_VRSRA_N(q, int, s, 64, 2, 1); + + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by 1, negative input)"); + + /* Shift by 3 to check overflow with rounding constant */ + TEST_VRSRA_N(, int, s, 8, 8, 3); + TEST_VRSRA_N(, int, s, 16, 4, 3); + TEST_VRSRA_N(, int, s, 32, 2, 3); + TEST_VRSRA_N(, int, s, 64, 1, 3); + TEST_VRSRA_N(q, int, s, 8, 16, 3); + TEST_VRSRA_N(q, int, s, 16, 8, 3); + TEST_VRSRA_N(q, int, s, 32, 4, 3); + TEST_VRSRA_N(q, int, s, 64, 2, 3); + + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by max, negative input)"); + + /* Shift by max to check overflow with rounding constant */ + TEST_VRSRA_N(, int, s, 8, 8, 8); + TEST_VRSRA_N(, int, s, 16, 4, 16); + TEST_VRSRA_N(, int, s, 32, 2, 32); + TEST_VRSRA_N(, int, s, 64, 1, 64); + TEST_VRSRA_N(q, int, s, 8, 16, 8); + TEST_VRSRA_N(q, int, s, 16, 8, 16); + TEST_VRSRA_N(q, int, s, 32, 4, 32); + TEST_VRSRA_N(q, int, s, 64, 2, 64); + + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by max, negative input)"); +} diff --git a/ref_vrsubhn.c b/ref_vrsubhn.c new file mode 100644 index 0000000..e972d82 --- /dev/null +++ b/ref_vrsubhn.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vrsubhn +#define TEST_MSG "VRSUBHN" + +#include "ref_vaddhn.c" diff --git a/ref_vsXi_n.c b/ref_vsXi_n.c new file mode 100644 index 0000000..f710a67 --- /dev/null +++ b/ref_vsXi_n.c @@ -0,0 +1,116 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vsli +#define TEST_MSG "VSLI_N" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME ##_n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = vsxi_n(vector, vector2, val), + then store the result. */ +#define TEST_VSXI_N1(INSN, Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VSXI_N(INSN, Q, T1, T2, W, N, V) \ + TEST_VSXI_N1(INSN, Q, T1, T2, W, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Fill input vector2 with arbitrary values */ + VDUP(vector2, , int, s, 8, 8, 2); + VDUP(vector2, , int, s, 16, 4, -4); + VDUP(vector2, , int, s, 32, 2, 3); + VDUP(vector2, , int, s, 64, 1, 100); + VDUP(vector2, , uint, u, 8, 8, 20); + VDUP(vector2, , uint, u, 16, 4, 30); + VDUP(vector2, , uint, u, 32, 2, 40); + VDUP(vector2, , uint, u, 64, 1, 2); + VDUP(vector2, , poly, p, 8, 8, 20); + VDUP(vector2, , poly, p, 16, 4, 30); + VDUP(vector2, q, int, s, 8, 16, -10); + VDUP(vector2, q, int, s, 16, 8, -20); + VDUP(vector2, q, int, s, 32, 4, -30); + VDUP(vector2, q, int, s, 64, 2, 24); + VDUP(vector2, q, uint, u, 8, 16, 12); + VDUP(vector2, q, uint, u, 16, 8, 3); + VDUP(vector2, q, uint, u, 32, 4, 55); + VDUP(vector2, q, uint, u, 64, 2, 3); + VDUP(vector2, q, poly, p, 8, 16, 12); + VDUP(vector2, q, poly, p, 16, 8, 3); + + /* Choose shift amount arbitrarily */ + TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 4); + TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 3); + TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 1); + TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 32); + TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 2); + TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 10); + TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 30); + TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 3); + TEST_VSXI_N(INSN_NAME, , poly, p, 8, 8, 2); + TEST_VSXI_N(INSN_NAME, , poly, p, 16, 4, 10); + TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 5); + TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 3); + TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 20); + TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 16); + TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 3); + TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 12); + TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 23); + TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 53); + TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 3); + TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 12); + + dump_results_hex (TEST_MSG); + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} diff --git a/ref_vset_lane.c b/ref_vset_lane.c new file mode 100644 index 0000000..cd04bf8 --- /dev/null +++ b/ref_vset_lane.c @@ -0,0 +1,82 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSET_LANE/VSET_LANEQ" +void exec_vset_lane (void) +{ + /* vec=vset_lane(val, vec, lane), then store the result. */ +#define TEST_VSET_LANE_HERE(Q, T1, T2, W, N, V, L) \ + VECT_VAR(vector, T1, W, N) = \ + vset##Q##_lane_##T2##W(V, \ + VECT_VAR(vector, T1, W, N), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose value and lane arbitrarily */ + TEST_VSET_LANE_HERE(, int, s, 8, 8, 0x11, 7); + TEST_VSET_LANE_HERE(, int, s, 16, 4, 0x22, 3); + TEST_VSET_LANE_HERE(, int, s, 32, 2, 0x33, 1); + TEST_VSET_LANE_HERE(, int, s, 64, 1, 0x44, 0); + TEST_VSET_LANE_HERE(, uint, u, 8, 8, 0x55, 6); + TEST_VSET_LANE_HERE(, uint, u, 16, 4, 0x66, 2); + TEST_VSET_LANE_HERE(, uint, u, 32, 2, 0x77, 1); + TEST_VSET_LANE_HERE(, uint, u, 64, 1, 0x88, 0); + TEST_VSET_LANE_HERE(, poly, p, 8, 8, 0x55, 6); + TEST_VSET_LANE_HERE(, poly, p, 16, 4, 0x66, 2); + TEST_VSET_LANE_HERE(, float, f, 32, 2, 33.2f, 1); + + TEST_VSET_LANE_HERE(q, int, s, 8, 16, 0x99, 15); + TEST_VSET_LANE_HERE(q, int, s, 16, 8, 0xAA, 5); + TEST_VSET_LANE_HERE(q, int, s, 32, 4, 0xBB, 3); + TEST_VSET_LANE_HERE(q, int, s, 64, 2, 0xCC, 1); + TEST_VSET_LANE_HERE(q, uint, u, 8, 16, 0xDD, 14); + TEST_VSET_LANE_HERE(q, uint, u, 16, 8, 0xEE, 6); + TEST_VSET_LANE_HERE(q, uint, u, 32, 4, 0xFF, 2); + TEST_VSET_LANE_HERE(q, uint, u, 64, 2, 0x11, 1); + TEST_VSET_LANE_HERE(q, poly, p, 8, 16, 0xDD, 14); + TEST_VSET_LANE_HERE(q, poly, p, 16, 8, 0xEE, 6); + TEST_VSET_LANE_HERE(q, float, f, 32, 4, 11.2f, 3); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vshl.c b/ref_vshl.c new file mode 100644 index 0000000..048fecd --- /dev/null +++ b/ref_vshl.c @@ -0,0 +1,98 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSHL/VSHLQ" +void exec_vshl (void) +{ + /* Basic test: v3=vshl(v1,v2), then store the result. */ +#define TEST_VSHL(T3, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector_shift, T3, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Choose init value arbitrarily, will be used as shift amount */ + VDUP(vector_shift, , int, s, 8, 8, 1); + VDUP(vector_shift, , int, s, 16, 4, 3); + VDUP(vector_shift, , int, s, 32, 2, 8); + VDUP(vector_shift, , int, s, 64, 1, 3); + VDUP(vector_shift, q, int, s, 8, 16, 5); + VDUP(vector_shift, q, int, s, 16, 8, 12); + VDUP(vector_shift, q, int, s, 32, 4, 30); + VDUP(vector_shift, q, int, s, 64, 2, 63); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int); + + dump_results_hex (TEST_MSG); + + /* Test large shift amount */ + VDUP(vector_shift, , int, s, 8, 8, 8); + VDUP(vector_shift, , int, s, 16, 4, 16); + VDUP(vector_shift, , int, s, 32, 2, 32); + VDUP(vector_shift, , int, s, 64, 1, 64); + VDUP(vector_shift, q, int, s, 8, 16, 8); + VDUP(vector_shift, q, int, s, 16, 8, 17); + VDUP(vector_shift, q, int, s, 32, 4, 33); + VDUP(vector_shift, q, int, s, 64, 2, 65); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int); + + dump_results_hex2 (TEST_MSG, " (large shift amount)"); + + + /* Test negative shift amount */ + VDUP(vector_shift, , int, s, 8, 8, -1); + VDUP(vector_shift, , int, s, 16, 4, -1); + VDUP(vector_shift, , int, s, 32, 2, -2); + VDUP(vector_shift, , int, s, 64, 1, -4); + VDUP(vector_shift, q, int, s, 8, 16, -2); + VDUP(vector_shift, q, int, s, 16, 8, -5); + VDUP(vector_shift, q, int, s, 32, 4, -3); + VDUP(vector_shift, q, int, s, 64, 2, -5); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int); + + dump_results_hex2 (TEST_MSG, " (negative shift amount)"); +} diff --git a/ref_vshl_n.c b/ref_vshl_n.c new file mode 100644 index 0000000..87c914b --- /dev/null +++ b/ref_vshl_n.c @@ -0,0 +1,75 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSHL_N" +void exec_vshl_n (void) +{ + /* Basic test: v2=vshl_n(v1,v), then store the result. */ +#define TEST_VSHL_N(Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vshl##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Choose shift amount arbitrarily */ + TEST_VSHL_N(, int, s, 8, 8, 1); + TEST_VSHL_N(, int, s, 16, 4, 1); + TEST_VSHL_N(, int, s, 32, 2, 3); + TEST_VSHL_N(, int, s, 64, 1, 2); + TEST_VSHL_N(, uint, u, 8, 8, 2); + TEST_VSHL_N(, uint, u, 16, 4, 4); + TEST_VSHL_N(, uint, u, 32, 2, 3); + TEST_VSHL_N(, uint, u, 64, 1, 1); + + TEST_VSHL_N(q, int, s, 8, 16, 5); + TEST_VSHL_N(q, int, s, 16, 8, 1); + TEST_VSHL_N(q, int, s, 32, 4, 2); + TEST_VSHL_N(q, int, s, 64, 2, 2); + TEST_VSHL_N(q, uint, u, 8, 16, 2); + TEST_VSHL_N(q, uint, u, 16, 8, 3); + TEST_VSHL_N(q, uint, u, 32, 4, 2); + TEST_VSHL_N(q, uint, u, 64, 2, 1); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vshll_n.c b/ref_vshll_n.c new file mode 100644 index 0000000..341f321 --- /dev/null +++ b/ref_vshll_n.c @@ -0,0 +1,64 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSHLL_N" +void exec_vshll_n (void) +{ + /* Basic test: v2=vshll_n(v1,v), then store the result. */ +#define TEST_VSHLL_N(T1, T2, W, W2, N, V) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vshll##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1q##_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Choose shift amount arbitrarily */ + TEST_VSHLL_N(int, s, 8, 16, 8, 1); + TEST_VSHLL_N(int, s, 16, 32, 4, 1); + TEST_VSHLL_N(int, s, 32, 64, 2, 3); + TEST_VSHLL_N(uint, u, 8, 16, 8, 2); + TEST_VSHLL_N(uint, u, 16, 32, 4, 4); + TEST_VSHLL_N(uint, u, 32, 64, 2, 3); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vshr_n.c b/ref_vshr_n.c new file mode 100644 index 0000000..bff0f7b --- /dev/null +++ b/ref_vshr_n.c @@ -0,0 +1,76 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSHR_N" +void exec_vshr_n (void) +{ + /* Basic test: y=vshr_n(x,v), then store the result. */ +#define TEST_VSHR_N(Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vshr##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Choose shift amount arbitrarily */ + TEST_VSHR_N(, int, s, 8, 8, 1); + TEST_VSHR_N(, int, s, 16, 4, 12); + TEST_VSHR_N(, int, s, 32, 2, 2); + TEST_VSHR_N(, int, s, 64, 1, 32); + TEST_VSHR_N(, uint, u, 8, 8, 2); + TEST_VSHR_N(, uint, u, 16, 4, 3); + TEST_VSHR_N(, uint, u, 32, 2, 5); + TEST_VSHR_N(, uint, u, 64, 1, 33); + + TEST_VSHR_N(q, int, s, 8, 16, 1); + TEST_VSHR_N(q, int, s, 16, 8, 12); + TEST_VSHR_N(q, int, s, 32, 4, 2); + TEST_VSHR_N(q, int, s, 64, 2, 32); + TEST_VSHR_N(q, uint, u, 8, 16, 2); + TEST_VSHR_N(q, uint, u, 16, 8, 3); + TEST_VSHR_N(q, uint, u, 32, 4, 5); + TEST_VSHR_N(q, uint, u, 64, 2, 33); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vshrn_n.c b/ref_vshrn_n.c new file mode 100644 index 0000000..f9b8041 --- /dev/null +++ b/ref_vshrn_n.c @@ -0,0 +1,81 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSHRN_N" +void exec_vshrn_n (void) +{ + /* Basic test: y=vshrn_n(x,v), then store the result. */ +#define TEST_VSHRN_N(T1, T2, W, W2, N, V) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vshrn_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* vector is twice as large as vector_res */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Choose shift amount arbitrarily */ + TEST_VSHRN_N(int, s, 16, 8, 8, 1); + TEST_VSHRN_N(int, s, 32, 16, 4, 1); + TEST_VSHRN_N(int, s, 64, 32, 2, 2); + TEST_VSHRN_N(uint, u, 16, 8, 8, 2); + TEST_VSHRN_N(uint, u, 32, 16, 4, 3); + TEST_VSHRN_N(uint, u, 64, 32, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vsli_n.c b/ref_vsli_n.c new file mode 100644 index 0000000..2616d10 --- /dev/null +++ b/ref_vsli_n.c @@ -0,0 +1,96 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vsli +#define TEST_MSG "VSLI_N" + +/* Extra tests for functions requiring corner cases tests */ +void vsli_extra(void); +#define EXTRA_TESTS vsli_extra + +#include "ref_vsXi_n.c" + +void vsli_extra(void) +{ + /* Test cases with maximum shift amount (this amount is different + from vsri). */ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Fill input vector2 with arbitrary values */ + VDUP(vector2, , int, s, 8, 8, 2); + VDUP(vector2, , int, s, 16, 4, -4); + VDUP(vector2, , int, s, 32, 2, 3); + VDUP(vector2, , int, s, 64, 1, 100); + VDUP(vector2, , uint, u, 8, 8, 20); + VDUP(vector2, , uint, u, 16, 4, 30); + VDUP(vector2, , uint, u, 32, 2, 40); + VDUP(vector2, , uint, u, 64, 1, 2); + VDUP(vector2, , poly, p, 8, 8, 20); + VDUP(vector2, , poly, p, 16, 4, 30); + VDUP(vector2, q, int, s, 8, 16, -10); + VDUP(vector2, q, int, s, 16, 8, -20); + VDUP(vector2, q, int, s, 32, 4, -30); + VDUP(vector2, q, int, s, 64, 2, 24); + VDUP(vector2, q, uint, u, 8, 16, 12); + VDUP(vector2, q, uint, u, 16, 8, 3); + VDUP(vector2, q, uint, u, 32, 4, 55); + VDUP(vector2, q, uint, u, 64, 2, 3); + VDUP(vector2, q, poly, p, 8, 16, 12); + VDUP(vector2, q, poly, p, 16, 8, 3); + + /* Use maximum allowed shift amount */ + TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 7); + TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 15); + TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 31); + TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 63); + TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 7); + TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 15); + TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 31); + TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 63); + TEST_VSXI_N(INSN_NAME, , poly, p, 8, 8, 7); + TEST_VSXI_N(INSN_NAME, , poly, p, 16, 4, 15); + TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 7); + TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 15); + TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 31); + TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 63); + TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 7); + TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 15); + TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 31); + TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 63); + TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 7); + TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 15); + + dump_results_hex2 (TEST_MSG, "max shift amount"); +} diff --git a/ref_vsra_n.c b/ref_vsra_n.c new file mode 100644 index 0000000..bbbed14 --- /dev/null +++ b/ref_vsra_n.c @@ -0,0 +1,97 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSRA_N" +void exec_vsra_n (void) +{ + /* Basic test: y=vsra_n(x,v), then store the result. */ +#define TEST_VSRA_N(Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vsra##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Choose arbitrary initialization values */ + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , int, s, 64, 1, 0x44); + VDUP(vector2, , uint, u, 8, 8, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x66); + VDUP(vector2, , uint, u, 32, 2, 0x77); + VDUP(vector2, , uint, u, 64, 1, 0x88); + + VDUP(vector2, q, int, s, 8, 16, 0x11); + VDUP(vector2, q, int, s, 16, 8, 0x22); + VDUP(vector2, q, int, s, 32, 4, 0x33); + VDUP(vector2, q, int, s, 64, 2, 0x44); + VDUP(vector2, q, uint, u, 8, 16, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x66); + VDUP(vector2, q, uint, u, 32, 4, 0x77); + VDUP(vector2, q, uint, u, 64, 2, 0x88); + + /* Choose shift amount arbitrarily */ + TEST_VSRA_N(, int, s, 8, 8, 1); + TEST_VSRA_N(, int, s, 16, 4, 12); + TEST_VSRA_N(, int, s, 32, 2, 2); + TEST_VSRA_N(, int, s, 64, 1, 32); + TEST_VSRA_N(, uint, u, 8, 8, 2); + TEST_VSRA_N(, uint, u, 16, 4, 3); + TEST_VSRA_N(, uint, u, 32, 2, 5); + TEST_VSRA_N(, uint, u, 64, 1, 33); + + TEST_VSRA_N(q, int, s, 8, 16, 1); + TEST_VSRA_N(q, int, s, 16, 8, 12); + TEST_VSRA_N(q, int, s, 32, 4, 2); + TEST_VSRA_N(q, int, s, 64, 2, 32); + TEST_VSRA_N(q, uint, u, 8, 16, 2); + TEST_VSRA_N(q, uint, u, 16, 8, 3); + TEST_VSRA_N(q, uint, u, 32, 4, 5); + TEST_VSRA_N(q, uint, u, 64, 2, 33); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vsri_n.c b/ref_vsri_n.c new file mode 100644 index 0000000..7414b56 --- /dev/null +++ b/ref_vsri_n.c @@ -0,0 +1,96 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vsri +#define TEST_MSG "VSRI_N" + +/* Extra tests for functions requiring corner cases tests */ +void vsri_extra(void); +#define EXTRA_TESTS vsri_extra + +#include "ref_vsXi_n.c" + +void vsri_extra(void) +{ + /* Test cases with maximum shift amount (this amount is different + from vsli). */ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Fill input vector2 with arbitrary values */ + VDUP(vector2, , int, s, 8, 8, 2); + VDUP(vector2, , int, s, 16, 4, -4); + VDUP(vector2, , int, s, 32, 2, 3); + VDUP(vector2, , int, s, 64, 1, 100); + VDUP(vector2, , uint, u, 8, 8, 20); + VDUP(vector2, , uint, u, 16, 4, 30); + VDUP(vector2, , uint, u, 32, 2, 40); + VDUP(vector2, , uint, u, 64, 1, 2); + VDUP(vector2, , poly, p, 8, 8, 20); + VDUP(vector2, , poly, p, 16, 4, 30); + VDUP(vector2, q, int, s, 8, 16, -10); + VDUP(vector2, q, int, s, 16, 8, -20); + VDUP(vector2, q, int, s, 32, 4, -30); + VDUP(vector2, q, int, s, 64, 2, 24); + VDUP(vector2, q, uint, u, 8, 16, 12); + VDUP(vector2, q, uint, u, 16, 8, 3); + VDUP(vector2, q, uint, u, 32, 4, 55); + VDUP(vector2, q, uint, u, 64, 2, 3); + VDUP(vector2, q, poly, p, 8, 16, 12); + VDUP(vector2, q, poly, p, 16, 8, 3); + + /* Use maximum allowed shift amount */ + TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 8); + TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 16); + TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 32); + TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 64); + TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 8); + TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 16); + TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 32); + TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 64); + TEST_VSXI_N(INSN_NAME, , poly, p, 8, 8, 8); + TEST_VSXI_N(INSN_NAME, , poly, p, 16, 4, 16); + TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 8); + TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 16); + TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 32); + TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 64); + TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 8); + TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 16); + TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 32); + TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 64); + TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 8); + TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 16); + + dump_results_hex2 (TEST_MSG, " max shift amount"); +} diff --git a/ref_vst1_lane.c b/ref_vst1_lane.c new file mode 100644 index 0000000..2225749 --- /dev/null +++ b/ref_vst1_lane.c @@ -0,0 +1,85 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VST1_LANE/VST1_LANEQ" +void exec_vst1_lane (void) +{ +#define TEST_VST1_LANE(Q, T1, T2, W, N, L) \ + VECT_VAR(vector, T1, W, N) = \ + vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \ + vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector, T1, W, N), L) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_VARIABLE(vector, float, 16, 4); + DECL_VARIABLE(vector, float, 16, 8); +#endif + + clean_results (); + + /* Choose lane arbitrarily */ + TEST_VST1_LANE(, int, s, 8, 8, 7); + TEST_VST1_LANE(, int, s, 16, 4, 3); + TEST_VST1_LANE(, int, s, 32, 2, 1); + TEST_VST1_LANE(, int, s, 64, 1, 0); + TEST_VST1_LANE(, uint, u, 8, 8, 6); + TEST_VST1_LANE(, uint, u, 16, 4, 2); + TEST_VST1_LANE(, uint, u, 32, 2, 0); + TEST_VST1_LANE(, uint, u, 64, 1, 0); + TEST_VST1_LANE(, poly, p, 8, 8, 6); + TEST_VST1_LANE(, poly, p, 16, 4, 2); + TEST_VST1_LANE(, float, f, 32, 2, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_VST1_LANE(, float, f, 16, 4, 2); +#endif + + TEST_VST1_LANE(q, int, s, 8, 16, 15); + TEST_VST1_LANE(q, int, s, 16, 8, 5); + TEST_VST1_LANE(q, int, s, 32, 4, 1); + TEST_VST1_LANE(q, int, s, 64, 2, 1); + TEST_VST1_LANE(q, uint, u, 8, 16, 10); + TEST_VST1_LANE(q, uint, u, 16, 8, 4); + TEST_VST1_LANE(q, uint, u, 32, 4, 3); + TEST_VST1_LANE(q, uint, u, 64, 2, 0); + TEST_VST1_LANE(q, poly, p, 8, 16, 10); + TEST_VST1_LANE(q, poly, p, 16, 8, 4); + TEST_VST1_LANE(q, float, f, 32, 4, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_VST1_LANE(q, float, f, 16, 8, 5); +#endif + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vstX_lane.c b/ref_vstX_lane.c new file mode 100644 index 0000000..50dd045 --- /dev/null +++ b/ref_vstX_lane.c @@ -0,0 +1,243 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vstX_lane (void) +{ + /* In this case, input variables are arrays of vectors */ +#define DECL_VSTX_LANE(T1, W, N, X) \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \ + VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] + + /* We need to use a temporary result buffer (result_bis), because + the one used for other tests is not large enough. A subset of the + result data is moved from result_bis to result, and it is this + subset which is used to check the actual behaviour. The next + macro enables to move another chunk of data from result_bis to + result. */ +#define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L) \ + memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \ + sizeof(VECT_VAR(buffer_src, T1, W, N))); \ + memset (VECT_VAR(result_bis_##X, T1, W, N), 0, \ + sizeof(VECT_VAR(result_bis_##X, T1, W, N))); \ + \ + VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \ + vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \ + \ + VECT_ARRAY_VAR(vector, T1, W, N, X) = \ + /* Use dedicated init buffer, of size X */ \ + vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \ + VECT_ARRAY_VAR(vector_src, T1, W, N, X), \ + L); \ + vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ + VECT_ARRAY_VAR(vector, T1, W, N, X), \ + L); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* Overwrite "result" with the contents of "result_bis"[Y] */ +#define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \ + memcpy(VECT_VAR(result, T1, W, N), \ + &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* We need all variants in 64 bits, but there is no 64x2 variant */ +#define DECL_ALL_VSTX_LANE(X) \ + DECL_VSTX_LANE(int, 8, 8, X); \ + DECL_VSTX_LANE(int, 16, 4, X); \ + DECL_VSTX_LANE(int, 32, 2, X); \ + DECL_VSTX_LANE(uint, 8, 8, X); \ + DECL_VSTX_LANE(uint, 16, 4, X); \ + DECL_VSTX_LANE(uint, 32, 2, X); \ + DECL_VSTX_LANE(poly, 8, 8, X); \ + DECL_VSTX_LANE(poly, 16, 4, X); \ + DECL_VSTX_LANE(float, 32, 2, X); \ + DECL_VSTX_LANE(int, 16, 8, X); \ + DECL_VSTX_LANE(int, 32, 4, X); \ + DECL_VSTX_LANE(uint, 16, 8, X); \ + DECL_VSTX_LANE(uint, 32, 4, X); \ + DECL_VSTX_LANE(poly, 16, 8, X); \ + DECL_VSTX_LANE(float, 32, 4, X) + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define DECL_ALL_VSTX_LANE_FP16(X) \ + DECL_VSTX_LANE(float, 16, 4, X); \ + DECL_VSTX_LANE(float, 16, 8, X) +#endif + +#define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L] + + /* Use the same lanes regardless of the size of the array (X), for + simplicity */ +#define TEST_ALL_VSTX_LANE(X) \ + TEST_VSTX_LANE(, int, s, 8, 8, X, 7); \ + TEST_VSTX_LANE(, int, s, 16, 4, X, 2); \ + TEST_VSTX_LANE(, int, s, 32, 2, X, 0); \ + TEST_VSTX_LANE(, float, f, 32, 2, X, 0); \ + TEST_VSTX_LANE(, uint, u, 8, 8, X, 4); \ + TEST_VSTX_LANE(, uint, u, 16, 4, X, 3); \ + TEST_VSTX_LANE(, uint, u, 32, 2, X, 1); \ + TEST_VSTX_LANE(, poly, p, 8, 8, X, 4); \ + TEST_VSTX_LANE(, poly, p, 16, 4, X, 3); \ + TEST_VSTX_LANE(q, int, s, 16, 8, X, 6); \ + TEST_VSTX_LANE(q, int, s, 32, 4, X, 2); \ + TEST_VSTX_LANE(q, uint, u, 16, 8, X, 5); \ + TEST_VSTX_LANE(q, uint, u, 32, 4, X, 0); \ + TEST_VSTX_LANE(q, poly, p, 16, 8, X, 5); \ + TEST_VSTX_LANE(q, float, f, 32, 4, X, 2) + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define TEST_ALL_VSTX_LANE_FP16(X) \ + TEST_VSTX_LANE(, float, f, 16, 4, X, 3); \ + TEST_VSTX_LANE(q, float, f, 16, 8, X, 6) +#endif + +#define TEST_ALL_EXTRA_CHUNKS(X, Y) \ + TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \ + TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 4, X, Y) + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define TEST_ALL_EXTRA_CHUNKS_FP16(X, Y) \ + TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(float, 16, 8, X, Y) +#endif + + /* Declare the temporary buffers / variables */ + DECL_ALL_VSTX_LANE(2); + DECL_ALL_VSTX_LANE(3); + DECL_ALL_VSTX_LANE(4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DECL_ALL_VSTX_LANE_FP16(2); + DECL_ALL_VSTX_LANE_FP16(3); + DECL_ALL_VSTX_LANE_FP16(4); +#endif + + /* Define dummy input arrays, large enough for x4 vectors */ + DUMMY_ARRAY(buffer_src, int, 8, 8, 4); + DUMMY_ARRAY(buffer_src, int, 16, 4, 4); + DUMMY_ARRAY(buffer_src, int, 32, 2, 4); + DUMMY_ARRAY(buffer_src, uint, 8, 8, 4); + DUMMY_ARRAY(buffer_src, uint, 16, 4, 4); + DUMMY_ARRAY(buffer_src, uint, 32, 2, 4); + DUMMY_ARRAY(buffer_src, poly, 8, 8, 4); + DUMMY_ARRAY(buffer_src, poly, 16, 4, 4); + DUMMY_ARRAY(buffer_src, float, 32, 2, 4); + DUMMY_ARRAY(buffer_src, int, 16, 8, 4); + DUMMY_ARRAY(buffer_src, int, 32, 4, 4); + DUMMY_ARRAY(buffer_src, uint, 16, 8, 4); + DUMMY_ARRAY(buffer_src, uint, 32, 4, 4); + DUMMY_ARRAY(buffer_src, poly, 16, 8, 4); + DUMMY_ARRAY(buffer_src, float, 32, 4, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DUMMY_ARRAY(buffer_src, float, 16, 4, 4); + DUMMY_ARRAY(buffer_src, float, 16, 8, 4); +#endif + + /* Check vst2_lane/vst2q_lane */ + clean_results (); +#define TEST_MSG "VST2_LANE/VST2Q_LANE" + TEST_ALL_VSTX_LANE(2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_VSTX_LANE_FP16(2); +#endif + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(2, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(2, 1); +#endif + dump_results_hex2 (TEST_MSG, " chunk 1"); + + /* Check vst3_lane/vst3q_lane */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VST3_LANE/VST3Q_LANE" + TEST_ALL_VSTX_LANE(3); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_VSTX_LANE_FP16(3); +#endif + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(3, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(3, 1); +#endif + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(3, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(3, 2); +#endif + dump_results_hex2 (TEST_MSG, " chunk 2"); + + /* Check vst4_lane/vst4q_lane */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VST4_LANE/VST4Q_LANE" + TEST_ALL_VSTX_LANE(4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_VSTX_LANE_FP16(4); +#endif + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(4, 1); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(4, 1); +#endif + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(4, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(4, 2); +#endif + dump_results_hex2 (TEST_MSG, " chunk 2"); + TEST_ALL_EXTRA_CHUNKS(4, 3); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + TEST_ALL_EXTRA_CHUNKS_FP16(4, 3); +#endif + dump_results_hex2 (TEST_MSG, " chunk 3"); +} diff --git a/ref_vsub.c b/ref_vsub.c new file mode 100644 index 0000000..b83c8c6 --- /dev/null +++ b/ref_vsub.c @@ -0,0 +1,60 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vsub +#define TEST_MSG "VSUB/VSUBQ" + +/* Extra tests for functions requiring floating-point types */ +void exec_vsub_f32(void); +#define EXTRA_TESTS exec_vsub_f32 + +#include "ref_v_binary_op.c" + +void exec_vsub_f32(void) +{ + int i; + + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector2, float, 32, 4); + + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + VDUP(vector, , float, f, 32, 2, 2.3f); + VDUP(vector, q, float, f, 32, 4, 3.4f); + + VDUP(vector2, , float, f, 32, 2, 4.5f); + VDUP(vector2, q, float, f, 32, 4, 5.6f); + + TEST_BINARY_OP(INSN_NAME, , float, f, 32, 2); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + + fprintf(ref_file, "\nfloat32:\n"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vsubhn.c b/ref_vsubhn.c new file mode 100644 index 0000000..ef68d60 --- /dev/null +++ b/ref_vsubhn.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vsubhn +#define TEST_MSG "VSUBHN" + +#include "ref_vaddhn.c" diff --git a/ref_vsubl.c b/ref_vsubl.c new file mode 100644 index 0000000..093ab53 --- /dev/null +++ b/ref_vsubl.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vsubl +#define TEST_MSG "VSUBL" + +#include "ref_vaddl.c" diff --git a/ref_vsubw.c b/ref_vsubw.c new file mode 100644 index 0000000..1df07d1 --- /dev/null +++ b/ref_vsubw.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vsubw +#define TEST_MSG "VSUBW" + +#include "ref_vaddw.c" diff --git a/ref_vtbX.c b/ref_vtbX.c new file mode 100644 index 0000000..6f87d1a --- /dev/null +++ b/ref_vtbX.c @@ -0,0 +1,227 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vtbX (void) +{ + int i; + + /* In this case, input variables are arrays of vectors */ +#define DECL_VTBX(T1, W, N, X) \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(table_vector, T1, W, N, X) + + /* The vtbl1 variant is different from vtbl{2,3,4} because it takes a + vector as 1st param, instead of an array of vectors */ +#define TEST_VTBL1(T1, T2, T3, W, N) \ + VECT_VAR(table_vector, T1, W, N) = \ + vld1##_##T2##W((T1##W##_t *)lookup_table); \ + \ + VECT_VAR(vector_res, T1, W, N) = \ + vtbl1_##T2##W(VECT_VAR(table_vector, T1, W, N), \ + VECT_VAR(vector, T3, W, N)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); + +#define TEST_VTBLX(T1, T2, T3, W, N, X) \ + VECT_ARRAY_VAR(table_vector, T1, W, N, X) = \ + vld##X##_##T2##W((T1##W##_t *)lookup_table); \ + \ + VECT_VAR(vector_res, T1, W, N) = \ + vtbl##X##_##T2##W(VECT_ARRAY_VAR(table_vector, T1, W, N, X), \ + VECT_VAR(vector, T3, W, N)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* We need to define a lookup table */ + uint8_t lookup_table[32]; + + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, poly, 8, 8); + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, poly, 8, 8); + + /* For vtbl1 */ + DECL_VARIABLE(table_vector, int, 8, 8); + DECL_VARIABLE(table_vector, uint, 8, 8); + DECL_VARIABLE(table_vector, poly, 8, 8); + + /* For vtbx* */ + DECL_VARIABLE(default_vector, int, 8, 8); + DECL_VARIABLE(default_vector, uint, 8, 8); + DECL_VARIABLE(default_vector, poly, 8, 8); + + /* We need only 8 bits variants */ +#define DECL_ALL_VTBLX(X) \ + DECL_VTBX(int, 8, 8, X); \ + DECL_VTBX(uint, 8, 8, X); \ + DECL_VTBX(poly, 8, 8, X) + +#define TEST_ALL_VTBL1() \ + TEST_VTBL1(int, s, int, 8, 8); \ + TEST_VTBL1(uint, u, uint, 8, 8); \ + TEST_VTBL1(poly, p, uint, 8, 8) + +#define TEST_ALL_VTBLX(X) \ + TEST_VTBLX(int, s, int, 8, 8, X); \ + TEST_VTBLX(uint, u, uint, 8, 8, X); \ + TEST_VTBLX(poly, p, uint, 8, 8, X) + + /* Declare the temporary buffers / variables */ + DECL_ALL_VTBLX(2); + DECL_ALL_VTBLX(3); + DECL_ALL_VTBLX(4); + + /* Fill the lookup table */ + for (i=0; i<32; i++) { + lookup_table[i] = i-15; + } + + /* Choose init value arbitrarily, will be used as table index */ + VDUP(vector, , int, s, 8, 8, 1); + VDUP(vector, , uint, u, 8, 8, 2); + VDUP(vector, , poly, p, 8, 8, 2); + + /* To ensure code coverage of lib, add some indexes larger than 8,16 and 32 */ + /* except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9) */ + TEST_VSET_LANE(vector, , int, s, 8, 8, 0, 10); + TEST_VSET_LANE(vector, , int, s, 8, 8, 4, 20); + TEST_VSET_LANE(vector, , int, s, 8, 8, 5, 40); + TEST_VSET_LANE(vector, , uint, u, 8, 8, 0, 10); + TEST_VSET_LANE(vector, , uint, u, 8, 8, 4, 20); + TEST_VSET_LANE(vector, , uint, u, 8, 8, 5, 40); + TEST_VSET_LANE(vector, , poly, p, 8, 8, 0, 10); + TEST_VSET_LANE(vector, , poly, p, 8, 8, 4, 20); + TEST_VSET_LANE(vector, , poly, p, 8, 8, 5, 40); + + + /* Check vtbl1 */ + clean_results (); +#define TEST_MSG "VTBL1" + TEST_ALL_VTBL1(); + dump_results_hex (TEST_MSG); + + /* Check vtbl2 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBL2" + TEST_ALL_VTBLX(2); + dump_results_hex (TEST_MSG); + + /* Check vtbl3 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBL3" + TEST_ALL_VTBLX(3); + dump_results_hex (TEST_MSG); + + /* Check vtbl4 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBL4" + TEST_ALL_VTBLX(4); + dump_results_hex (TEST_MSG); + + + /* Now test VTBX */ + + /* The vtbx1 variant is different from vtbx{2,3,4} because it takes a + vector as 1st param, instead of an array of vectors */ +#define TEST_VTBX1(T1, T2, T3, W, N) \ + VECT_VAR(table_vector, T1, W, N) = \ + vld1##_##T2##W((T1##W##_t *)lookup_table); \ + \ + VECT_VAR(vector_res, T1, W, N) = \ + vtbx1_##T2##W(VECT_VAR(default_vector, T1, W, N), \ + VECT_VAR(table_vector, T1, W, N), \ + VECT_VAR(vector, T3, W, N)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); + +#define TEST_VTBXX(T1, T2, T3, W, N, X) \ + VECT_ARRAY_VAR(table_vector, T1, W, N, X) = \ + vld##X##_##T2##W((T1##W##_t *)lookup_table); \ + \ + VECT_VAR(vector_res, T1, W, N) = \ + vtbx##X##_##T2##W(VECT_VAR(default_vector, T1, W, N), \ + VECT_ARRAY_VAR(table_vector, T1, W, N, X), \ + VECT_VAR(vector, T3, W, N)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); + +#define TEST_ALL_VTBX1() \ + TEST_VTBX1(int, s, int, 8, 8); \ + TEST_VTBX1(uint, u, uint, 8, 8); \ + TEST_VTBX1(poly, p, uint, 8, 8) + +#define TEST_ALL_VTBXX(X) \ + TEST_VTBXX(int, s, int, 8, 8, X); \ + TEST_VTBXX(uint, u, uint, 8, 8, X); \ + TEST_VTBXX(poly, p, uint, 8, 8, X) + + /* Choose init value arbitrarily, will be used as default value */ + VDUP(default_vector, , int, s, 8, 8, 0x33); + VDUP(default_vector, , uint, u, 8, 8, 0xCC); + VDUP(default_vector, , poly, p, 8, 8, 0xCC); + + /* Check vtbx1 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBX1" + TEST_ALL_VTBX1(); + dump_results_hex (TEST_MSG); + + /* Check vtbx2 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBX2" + TEST_ALL_VTBXX(2); + dump_results_hex (TEST_MSG); + + /* Check vtbx3 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBX3" + TEST_ALL_VTBXX(3); + dump_results_hex (TEST_MSG); + + /* Check vtbx4 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBX4" + TEST_ALL_VTBXX(4); + dump_results_hex (TEST_MSG); +} diff --git a/ref_vtrn.c b/ref_vtrn.c new file mode 100644 index 0000000..4ac3e84 --- /dev/null +++ b/ref_vtrn.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vtrn +#define TEST_MSG "VTRN/VTRNQ" + +#include "ref_vuzp.c" diff --git a/ref_vtst.c b/ref_vtst.c new file mode 100644 index 0000000..a273a3a --- /dev/null +++ b/ref_vtst.c @@ -0,0 +1,99 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vtst +#define TEST_MSG "VTST/VTSTQ" +#endif + +/* Can't use the standard ref_v_binary_op.c template because vtst has + no 64 bits variant, and outputs are always of uint type */ +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x,x), then store the result. */ +#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, uint, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_u##W(VECT_VAR(result, uint, W, N), \ + VECT_VAR(vector_res, uint, W, N)) + +#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_UNSIGNED_VARIANTS(vector_res); + + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); + + /* Choose init value arbitrarily, will be used as comparison value */ + VDUP(vector2, , int, s, 8, 8, 15); + VDUP(vector2, , int, s, 16, 4, 5); + VDUP(vector2, , int, s, 32, 2, 1); + VDUP(vector2, , uint, u, 8, 8, 15); + VDUP(vector2, , uint, u, 16, 4, 5); + VDUP(vector2, , uint, u, 32, 2, 1); + VDUP(vector2, q, int, s, 8, 16, 15); + VDUP(vector2, q, int, s, 16, 8, 5); + VDUP(vector2, q, int, s, 32, 4, 1); + VDUP(vector2, q, uint, u, 8, 16, 15); + VDUP(vector2, q, uint, u, 16, 8, 5); + VDUP(vector2, q, uint, u, 32, 4, 1); + +#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR, T1, T2) \ + MACRO(VAR, , T1, T2, 8, 8); \ + MACRO(VAR, , T1, T2, 16, 4); \ + MACRO(VAR, , T1, T2, 32, 2); \ + MACRO(VAR, q, T1, T2, 8, 16); \ + MACRO(VAR, q, T1, T2, 16, 8); \ + MACRO(VAR, q, T1, T2, 32, 4) + + /* Split the test, as both signed and unsigned variants output their + result in an unsigned form (thus the same output variable is used + in these tests) */ + TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME, int, s); + dump_results_hex2 (TEST_MSG, " (signed input)"); + + TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME, uint, u); + dump_results_hex2 (TEST_MSG, " (unsigned input)"); +} diff --git a/ref_vuzp.c b/ref_vuzp.c new file mode 100644 index 0000000..0e6f1d8 --- /dev/null +++ b/ref_vuzp.c @@ -0,0 +1,171 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#if defined(__arm__) || defined(__aarch64__) +#include <arm_neon.h> +#else +#include "stm-arm-neon.h" +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vuzp +#define TEST_MSG "VUZP/VUZPQ" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* In this case, output variables are arrays of vectors */ +#define DECL_VUZP(T1, W, N) \ + VECT_ARRAY_TYPE(T1, W, N, 2) VECT_ARRAY_VAR(result_vec, T1, W, N, 2); \ + VECT_VAR_DECL(result_bis, T1, W, N)[2 * N] + + /* We need to use a temporary result buffer (result_bis), because + the one used for other tests is not large enough. A subset of the + result data is moved from result_bis to result, and it is this + subset which is used to check the actual behaviour. The next + macro enables to move another chunk of data from result_bis to + result. */ +#define TEST_VUZP(INSN, Q, T1, T2, W, N) \ + VECT_ARRAY_VAR(result_vec, T1, W, N, 2) = \ + INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst2##Q##_##T2##W(VECT_VAR(result_bis, T1, W, N), \ + VECT_ARRAY_VAR(result_vec, T1, W, N, 2)); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis, T1, W, N), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* Overwrite "result" with the contents of "result_bis"[X] */ +#define TEST_EXTRA_CHUNK(T1, W, N, X) \ + memcpy(VECT_VAR(result, T1, W, N), &(VECT_VAR(result_bis, T1, W, N)[X*N]), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector1); + DECL_VARIABLE_ALL_VARIANTS(vector2); + + /* We don't need 64 bits variants */ +#define DECL_ALL_VUZP() \ + DECL_VUZP(int, 8, 8); \ + DECL_VUZP(int, 16, 4); \ + DECL_VUZP(int, 32, 2); \ + DECL_VUZP(uint, 8, 8); \ + DECL_VUZP(uint, 16, 4); \ + DECL_VUZP(uint, 32, 2); \ + DECL_VUZP(poly, 8, 8); \ + DECL_VUZP(poly, 16, 4); \ + DECL_VUZP(float, 32, 2); \ + DECL_VUZP(int, 8, 16); \ + DECL_VUZP(int, 16, 8); \ + DECL_VUZP(int, 32, 4); \ + DECL_VUZP(uint, 8, 16); \ + DECL_VUZP(uint, 16, 8); \ + DECL_VUZP(uint, 32, 4); \ + DECL_VUZP(poly, 8, 16); \ + DECL_VUZP(poly, 16, 8); \ + DECL_VUZP(float, 32, 4) + + DECL_ALL_VUZP(); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); + VLOAD(vector1, buffer, , float, f, 32, 2); + VLOAD(vector1, buffer, q, float, f, 32, 4); + + /* Choose arbitrary initialization values */ + VDUP(vector2, , int, s, 8, 8, 0x11); + VDUP(vector2, , int, s, 16, 4, 0x22); + VDUP(vector2, , int, s, 32, 2, 0x33); + VDUP(vector2, , uint, u, 8, 8, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x66); + VDUP(vector2, , uint, u, 32, 2, 0x77); + VDUP(vector2, , poly, p, 8, 8, 0x55); + VDUP(vector2, , poly, p, 16, 4, 0x66); + VDUP(vector2, , float, f, 32, 2, 33.6f); + + VDUP(vector2, q, int, s, 8, 16, 0x11); + VDUP(vector2, q, int, s, 16, 8, 0x22); + VDUP(vector2, q, int, s, 32, 4, 0x33); + VDUP(vector2, q, uint, u, 8, 16, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x66); + VDUP(vector2, q, uint, u, 32, 4, 0x77); + VDUP(vector2, q, poly, p, 8, 16, 0x55); + VDUP(vector2, q, poly, p, 16, 8, 0x66); + VDUP(vector2, q, float, f, 32, 4, 33.8f); + +#define TEST_ALL_VUZP(INSN) \ + TEST_VUZP(INSN, , int, s, 8, 8); \ + TEST_VUZP(INSN, , int, s, 16, 4); \ + TEST_VUZP(INSN, , int, s, 32, 2); \ + TEST_VUZP(INSN, , uint, u, 8, 8); \ + TEST_VUZP(INSN, , uint, u, 16, 4); \ + TEST_VUZP(INSN, , uint, u, 32, 2); \ + TEST_VUZP(INSN, , poly, p, 8, 8); \ + TEST_VUZP(INSN, , poly, p, 16, 4); \ + TEST_VUZP(INSN, , float, f, 32, 2); \ + TEST_VUZP(INSN, q, int, s, 8, 16); \ + TEST_VUZP(INSN, q, int, s, 16, 8); \ + TEST_VUZP(INSN, q, int, s, 32, 4); \ + TEST_VUZP(INSN, q, uint, u, 8, 16); \ + TEST_VUZP(INSN, q, uint, u, 16, 8); \ + TEST_VUZP(INSN, q, uint, u, 32, 4); \ + TEST_VUZP(INSN, q, poly, p, 8, 16); \ + TEST_VUZP(INSN, q, poly, p, 16, 8); \ + TEST_VUZP(INSN, q, float, f, 32, 4) + +#define TEST_ALL_EXTRA_CHUNKS() \ + TEST_EXTRA_CHUNK(int, 8, 8, 1); \ + TEST_EXTRA_CHUNK(int, 16, 4, 1); \ + TEST_EXTRA_CHUNK(int, 32, 2, 1); \ + TEST_EXTRA_CHUNK(uint, 8, 8, 1); \ + TEST_EXTRA_CHUNK(uint, 16, 4, 1); \ + TEST_EXTRA_CHUNK(uint, 32, 2, 1); \ + TEST_EXTRA_CHUNK(poly, 8, 8, 1); \ + TEST_EXTRA_CHUNK(poly, 16, 4, 1); \ + TEST_EXTRA_CHUNK(float, 32, 2, 1); \ + TEST_EXTRA_CHUNK(int, 8, 16, 1); \ + TEST_EXTRA_CHUNK(int, 16, 8, 1); \ + TEST_EXTRA_CHUNK(int, 32, 4, 1); \ + TEST_EXTRA_CHUNK(uint, 8, 16, 1); \ + TEST_EXTRA_CHUNK(uint, 16, 8, 1); \ + TEST_EXTRA_CHUNK(uint, 32, 4, 1); \ + TEST_EXTRA_CHUNK(poly, 8, 16, 1); \ + TEST_EXTRA_CHUNK(poly, 16, 8, 1); \ + TEST_EXTRA_CHUNK(float, 32, 4, 1) + + /* Check vuzp/vuzpq */ + clean_results (); + TEST_ALL_VUZP(INSN_NAME); + + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(); + dump_results_hex2 (TEST_MSG, " chunk 1"); +} diff --git a/ref_vzip.c b/ref_vzip.c new file mode 100644 index 0000000..8c0b363 --- /dev/null +++ b/ref_vzip.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vzip +#define TEST_MSG "VZIP/VZIPQ" + +#include "ref_vuzp.c" diff --git a/retarget.c b/retarget.c new file mode 100644 index 0000000..3f01076 --- /dev/null +++ b/retarget.c @@ -0,0 +1,46 @@ +/* +** Copyright (C) ARM Limited, 2005. All rights reserved. +*/ + +#if defined(__cplusplus) +#include <cstdio> +#else +#include <stdio.h> +#endif +#include <rt_misc.h> + +// Heap base from scatter file +extern int Image$$HEAP$$ZI$$Base; +//#pragma import(__use_two_region_memory) + +extern void core_init(void); + +/* +The functions below are patched onto main. +*/ + +extern void $Super$$main(void); + +void $Sub$$main(void) +{ + core_init(); // does some extra setup work + + $Super$$main(); // calls the original function +} + + +/* +This function re-implements the C Library semihosted function. The stack pointer +has aready been set and is passed back to the function, The base of the heap is +set from the scatter file +*/ +__value_in_regs struct __initial_stackheap __user_initial_stackheap( + unsigned R0, unsigned SP, unsigned R2, unsigned SL) +{ + struct __initial_stackheap config; + + config.heap_base = (unsigned int)&Image$$HEAP$$ZI$$Base; // placed by scatterfile + config.stack_base = SP; // inherit SP from the execution environment + + return config; +} diff --git a/scatter.scat b/scatter.scat new file mode 100644 index 0000000..c6c8b46 --- /dev/null +++ b/scatter.scat @@ -0,0 +1,29 @@ +;; Copyright ARM Ltd 2005. All rights reserved. + +ROM_LOAD 0x2000 +{ + + ROM_EXEC 0x2000 + { + init.o (CortexA8, +First) ; Create Translation Table + * (InRoot$$Sections) ; this section must be in a root region + } + + I-TCM 0x30000 FIXED ; 0x1E000 ; built at 0x100 to avoid vector space + { ; assumes 32K I-TCM + + * (+RO) ; any remaining code inc C lib. + } + + D-TCM 0x230000 FIXED ; 0x40000 ; 8 Kb of D-TCM used for RW/ZI + { + * (+RW,+ZI) + } + + HEAP 0x4E0000 EMPTY 0x100000 {} ; 8Kb Heap follows direcly after RW/ZI + + STACK 0x300000 EMPTY -0x8000 {} ; 32KB Stack, starts after DTCM block. + + TTB 0x20000 EMPTY 0x4000 {} ; place translation table at 0x28000, 16Kb required + +} diff --git a/stm-arm-neon-ref.h b/stm-arm-neon-ref.h new file mode 100644 index 0000000..2f2d255 --- /dev/null +++ b/stm-arm-neon-ref.h @@ -0,0 +1,815 @@ +/* + +Copyright (c) 2009, 2010, 2011, 2012, 2013 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifndef _STM_ARM_NEON_REF_H_ +#define _STM_ARM_NEON_REF_H_ + +#if defined(__cplusplus) +#include <cstdio> +#include <cinttypes> +#include <cstring> +#else +#include <stdio.h> +#if defined(_MSC_VER) +#include "msinttypes.h" +#include <float.h> /* for isnan() ... */ +static int32_t _ptrNan[]={0x7fc00000L}; +#define NAN (*(float*)_ptrNan) +static int32_t _ptrInf[]={0x7f800000L}; +#define INFINITY (*(float*)_ptrInf) +#define HUGE_VALF INFINITY +#else +#include <inttypes.h> +#endif +#include <string.h> +#endif + +#define xSTR(X) #X +#define STR(X) xSTR(X) + +#define xNAME1(V,T) V ## _ ## T +#define xNAME(V,T) xNAME1(V,T) + +#define VAR(V,T,W) xNAME(V,T##W) +#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W) + +#define VECT_NAME(T, W, N) T##W##x##N +#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L +#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t) +#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t) + +#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N)) +#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N) + +/* This one is used for padding between input buffers. */ +#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42; + +/* Array declarations. */ +#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N] +#define ARRAY4(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[4] + +/* Arrays of vectors. */ +#define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L)) +#define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L] + +static int result_idx = 0; +#define DUMP(MSG,T,W,N,FMT) \ + fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \ + STR(VECT_VAR(result, T, W, N))); \ + for(i=0; i<N ; i++) \ + { \ + fprintf(ref_file, "%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \ + } \ + fprintf(ref_file, " }\n"); \ + DUMP4GCC(MSG,T,W,N,FMT); + +/* Use casts for remove sign bits */ +#define DUMP_POLY(MSG,T,W,N,FMT) \ + fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \ + STR(VECT_VAR(result, T, W, N))); \ + for(i=0; i<N ; i++) \ + { \ + fprintf(ref_file, "%" FMT ", ", \ + (uint##W##_t)VECT_VAR(result, T, W, N)[i]); \ + } \ + fprintf(ref_file, " }\n"); \ + DUMP4GCC(MSG,T,W,N,FMT); + +#define DUMP_FP(MSG,T,W,N,FMT) \ + fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \ + STR(VECT_VAR(result, T, W, N))); \ + for(i=0; i<N ; i++) \ + { \ + union fp_operand { \ + uint##W##_t i; \ + float##W##_t f; \ + } tmp; \ + tmp.f = VECT_VAR(result, T, W, N)[i]; \ + fprintf(ref_file, "%" FMT ", ", tmp.i); \ + } \ + fprintf(ref_file, " }\n"); \ + DUMP4GCC_FP(MSG,T,W,N,FMT); + +#define DUMP4GCC(MSG,T,W,N,FMT) \ + fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \ + STR(T), W, N); \ + for(i=0; i<(N-1) ; i++) \ + { \ + if (W < 32) { \ + uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i]; \ + fprintf(gcc_tests_file, "0x%" FMT ", ", tmp); \ + } else { \ + fprintf(gcc_tests_file, "0x%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \ + } \ + } \ + if (W < 32) { \ + uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i]; \ + fprintf(gcc_tests_file, "0x%" FMT, tmp); \ + } else { \ + fprintf(gcc_tests_file, "0x%" FMT, VECT_VAR(result, T, W, N)[i]); \ + } \ + fprintf(gcc_tests_file, " };\n"); + +#define DUMP4GCC_FP(MSG,T,W,N,FMT) \ + { \ + union fp_operand { \ + uint##W##_t i; \ + float##W##_t f; \ + } tmp; \ + fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \ + "hfloat", W, N); \ + for(i=0; i<(N-1) ; i++) \ + { \ + tmp.f = VECT_VAR(result, T, W, N)[i]; \ + fprintf(gcc_tests_file, "0x%" FMT ", ", tmp.i); \ + } \ + tmp.f = VECT_VAR(result, T, W, N)[i]; \ + fprintf(gcc_tests_file, "0x%" FMT, tmp.i); \ + fprintf(gcc_tests_file, " };\n"); \ + } + +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +#define float16_t __fp16 + +#define DUMP_FP16(MSG,T,W,N,FMT) \ + fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \ + STR(VECT_VAR(result, T, W, N))); \ + for(i=0; i<N ; i++) \ + { \ + uint##W##_t tmp; \ + tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \ + fprintf(ref_file, "%" FMT ", ", tmp); \ + } \ + fprintf(ref_file, " }\n"); \ + DUMP4GCC_FP16(MSG,T,W,N,FMT); + +#define DUMP4GCC_FP16(MSG,T,W,N,FMT) \ + { \ + uint##W##_t tmp; \ + fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \ + "hfloat", W, N); \ + for(i=0; i<(N-1) ; i++) \ + { \ + tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \ + fprintf(gcc_tests_file, "0x%" FMT ", ", tmp); \ + } \ + tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \ + fprintf(gcc_tests_file, "0x%" FMT, tmp); \ + fprintf(gcc_tests_file, " };\n"); \ + } +#endif + +#define CLEAN_PATTERN_8 0x33 +#define CLEAN_PATTERN_16 0x3333 +#define CLEAN_PATTERN_32 0x33333333 +#define CLEAN_PATTERN_64 0x3333333333333333 + +#define CLEAN(VAR,T,W,N) \ + memset(VECT_VAR(VAR, T, W, N), \ + CLEAN_PATTERN_8, \ + sizeof(VECT_VAR(VAR, T, W, N))); + +#define CHECK_INIT(VAR,Q,T1,T2,W,N) \ + { \ + ARRAY(check_result, T1, W, N); \ + int i; \ + \ + vst1##Q##_##T2##W(VECT_VAR(check_result, T1, W, N), \ + VECT_VAR(VAR, T1, W, N)); \ + for(i=0; i<N ; i++) \ + { \ + /*if (VECT_VAR(check_result, T1, W, N)[i] == CLEAN_PATTERN_##W)*/ { \ + fprintf(stdout, "%s:%d: %s[%d] unintialized! %#x\n", \ + __FUNCTION__, __LINE__, \ + STR(VECT_VAR(VAR, T1, W, N)), i, \ + VECT_VAR(check_result, T1, W, N)[i]); \ + } \ + } \ + } + +/* Generic declarations: */ +extern FILE* log_file; +extern FILE* ref_file; +extern FILE* gcc_tests_file; + +/* Input buffers, one of each size */ +extern ARRAY(buffer, int, 8, 8); +extern ARRAY(buffer, int, 16, 4); +extern ARRAY(buffer, int, 32, 2); +extern ARRAY(buffer, int, 64, 1); +extern ARRAY(buffer, uint, 8, 8); +extern ARRAY(buffer, uint, 16, 4); +extern ARRAY(buffer, uint, 32, 2); +extern ARRAY(buffer, uint, 64, 1); +extern ARRAY(buffer, poly, 8, 8); +extern ARRAY(buffer, poly, 16, 4); +extern ARRAY(buffer, float, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern ARRAY(buffer, float, 16, 4); +#endif +extern ARRAY(buffer, int, 8, 16); +extern ARRAY(buffer, int, 16, 8); +extern ARRAY(buffer, int, 32, 4); +extern ARRAY(buffer, int, 64, 2); +extern ARRAY(buffer, uint, 8, 16); +extern ARRAY(buffer, uint, 16, 8); +extern ARRAY(buffer, uint, 32, 4); +extern ARRAY(buffer, uint, 64, 2); +extern ARRAY(buffer, poly, 8, 16); +extern ARRAY(buffer, poly, 16, 8); +extern ARRAY(buffer, float, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern ARRAY(buffer, float, 16, 8); +#endif + +/* The tests for vld1_dup and vdup expect at least 4 entries in the + input buffer, so force 1- and 2-elements initializers to have 4 + entries. */ +extern ARRAY(buffer_dup, int, 8, 8); +extern ARRAY(buffer_dup, int, 16, 4); +extern ARRAY4(buffer_dup, int, 32, 2); +extern ARRAY4(buffer_dup, int, 64, 1); +extern ARRAY(buffer_dup, uint, 8, 8); +extern ARRAY(buffer_dup, uint, 16, 4); +extern ARRAY4(buffer_dup, uint, 32, 2); +extern ARRAY4(buffer_dup, uint, 64, 1); +extern ARRAY(buffer_dup, poly, 8, 8); +extern ARRAY(buffer_dup, poly, 16, 4); +extern ARRAY4(buffer_dup, float, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern ARRAY4(buffer_dup, float, 16, 4); +#endif +extern ARRAY(buffer_dup, int, 8, 16); +extern ARRAY(buffer_dup, int, 16, 8); +extern ARRAY(buffer_dup, int, 32, 4); +extern ARRAY4(buffer_dup, int, 64, 2); +extern ARRAY(buffer_dup, uint, 8, 16); +extern ARRAY(buffer_dup, uint, 16, 8); +extern ARRAY(buffer_dup, uint, 32, 4); +extern ARRAY4(buffer_dup, uint, 64, 2); +extern ARRAY(buffer_dup, poly, 8, 16); +extern ARRAY(buffer_dup, poly, 16, 8); +extern ARRAY(buffer_dup, float, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern ARRAY(buffer_dup, float, 16, 8); +#endif + +/* Input buffers for vld2, one of each size */ +extern VECT_ARRAY(buffer_vld2, int, 8, 8, 2); +extern VECT_ARRAY(buffer_vld2, int, 16, 4, 2); +extern VECT_ARRAY(buffer_vld2, int, 32, 2, 2); +extern VECT_ARRAY(buffer_vld2, int, 64, 1, 2); +extern VECT_ARRAY(buffer_vld2, uint, 8, 8, 2); +extern VECT_ARRAY(buffer_vld2, uint, 16, 4, 2); +extern VECT_ARRAY(buffer_vld2, uint, 32, 2, 2); +extern VECT_ARRAY(buffer_vld2, uint, 64, 1, 2); +extern VECT_ARRAY(buffer_vld2, poly, 8, 8, 2); +extern VECT_ARRAY(buffer_vld2, poly, 16, 4, 2); +extern VECT_ARRAY(buffer_vld2, float, 32, 2, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern VECT_ARRAY(buffer_vld2, float, 16, 4, 2); +#endif +extern VECT_ARRAY(buffer_vld2, int, 8, 16, 2); +extern VECT_ARRAY(buffer_vld2, int, 16, 8, 2); +extern VECT_ARRAY(buffer_vld2, int, 32, 4, 2); +extern VECT_ARRAY(buffer_vld2, int, 64, 2, 2); +extern VECT_ARRAY(buffer_vld2, uint, 8, 16, 2); +extern VECT_ARRAY(buffer_vld2, uint, 16, 8, 2); +extern VECT_ARRAY(buffer_vld2, uint, 32, 4, 2); +extern VECT_ARRAY(buffer_vld2, uint, 64, 2, 2); +extern VECT_ARRAY(buffer_vld2, poly, 8, 16, 2); +extern VECT_ARRAY(buffer_vld2, poly, 16, 8, 2); +extern VECT_ARRAY(buffer_vld2, float, 32, 4, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern VECT_ARRAY(buffer_vld2, float, 16, 8, 2); +#endif + +/* Input buffers for vld3, one of each size */ +extern VECT_ARRAY(buffer_vld3, int, 8, 8, 3); +extern VECT_ARRAY(buffer_vld3, int, 16, 4, 3); +extern VECT_ARRAY(buffer_vld3, int, 32, 2, 3); +extern VECT_ARRAY(buffer_vld3, int, 64, 1, 3); +extern VECT_ARRAY(buffer_vld3, uint, 8, 8, 3); +extern VECT_ARRAY(buffer_vld3, uint, 16, 4, 3); +extern VECT_ARRAY(buffer_vld3, uint, 32, 2, 3); +extern VECT_ARRAY(buffer_vld3, uint, 64, 1, 3); +extern VECT_ARRAY(buffer_vld3, poly, 8, 8, 3); +extern VECT_ARRAY(buffer_vld3, poly, 16, 4, 3); +extern VECT_ARRAY(buffer_vld3, float, 32, 2, 3); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern VECT_ARRAY(buffer_vld3, float, 16, 4, 3); +#endif +extern VECT_ARRAY(buffer_vld3, int, 8, 16, 3); +extern VECT_ARRAY(buffer_vld3, int, 16, 8, 3); +extern VECT_ARRAY(buffer_vld3, int, 32, 4, 3); +extern VECT_ARRAY(buffer_vld3, int, 64, 2, 3); +extern VECT_ARRAY(buffer_vld3, uint, 8, 16, 3); +extern VECT_ARRAY(buffer_vld3, uint, 16, 8, 3); +extern VECT_ARRAY(buffer_vld3, uint, 32, 4, 3); +extern VECT_ARRAY(buffer_vld3, uint, 64, 2, 3); +extern VECT_ARRAY(buffer_vld3, poly, 8, 16, 3); +extern VECT_ARRAY(buffer_vld3, poly, 16, 8, 3); +extern VECT_ARRAY(buffer_vld3, float, 32, 4, 3); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern VECT_ARRAY(buffer_vld3, float, 16, 8, 3); +#endif + +/* Input buffers for vld4, one of each size */ +extern VECT_ARRAY(buffer_vld4, int, 8, 8, 4); +extern VECT_ARRAY(buffer_vld4, int, 16, 4, 4); +extern VECT_ARRAY(buffer_vld4, int, 32, 2, 4); +extern VECT_ARRAY(buffer_vld4, int, 64, 1, 4); +extern VECT_ARRAY(buffer_vld4, uint, 8, 8, 4); +extern VECT_ARRAY(buffer_vld4, uint, 16, 4, 4); +extern VECT_ARRAY(buffer_vld4, uint, 32, 2, 4); +extern VECT_ARRAY(buffer_vld4, uint, 64, 1, 4); +extern VECT_ARRAY(buffer_vld4, poly, 8, 8, 4); +extern VECT_ARRAY(buffer_vld4, poly, 16, 4, 4); +extern VECT_ARRAY(buffer_vld4, float, 32, 2, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern VECT_ARRAY(buffer_vld4, float, 16, 4, 4); +#endif +extern VECT_ARRAY(buffer_vld4, int, 8, 16, 4); +extern VECT_ARRAY(buffer_vld4, int, 16, 8, 4); +extern VECT_ARRAY(buffer_vld4, int, 32, 4, 4); +extern VECT_ARRAY(buffer_vld4, int, 64, 2, 4); +extern VECT_ARRAY(buffer_vld4, uint, 8, 16, 4); +extern VECT_ARRAY(buffer_vld4, uint, 16, 8, 4); +extern VECT_ARRAY(buffer_vld4, uint, 32, 4, 4); +extern VECT_ARRAY(buffer_vld4, uint, 64, 2, 4); +extern VECT_ARRAY(buffer_vld4, poly, 8, 16, 4); +extern VECT_ARRAY(buffer_vld4, poly, 16, 8, 4); +extern VECT_ARRAY(buffer_vld4, float, 32, 4, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern VECT_ARRAY(buffer_vld4, float, 16, 8, 4); +#endif + +/* Input buffers for vld2_lane */ +extern VECT_VAR_DECL(buffer_vld2_lane, int, 8, 2)[2]; +extern VECT_VAR_DECL(buffer_vld2_lane, int, 16, 2)[2]; +extern VECT_VAR_DECL(buffer_vld2_lane, int, 32, 2)[2]; +extern VECT_VAR_DECL(buffer_vld2_lane, int, 64, 2)[2]; +extern VECT_VAR_DECL(buffer_vld2_lane, uint, 8, 2)[2]; +extern VECT_VAR_DECL(buffer_vld2_lane, uint, 16, 2)[2]; +extern VECT_VAR_DECL(buffer_vld2_lane, uint, 32, 2)[2]; +extern VECT_VAR_DECL(buffer_vld2_lane, uint, 64, 2)[2]; +extern VECT_VAR_DECL(buffer_vld2_lane, poly, 8, 2)[2]; +extern VECT_VAR_DECL(buffer_vld2_lane, poly, 16, 2)[2]; +extern VECT_VAR_DECL(buffer_vld2_lane, float, 32, 2)[2]; +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2)[2]; +#endif + +/* Input buffers for vld3_lane */ +extern VECT_VAR_DECL(buffer_vld3_lane, int, 8, 3)[3]; +extern VECT_VAR_DECL(buffer_vld3_lane, int, 16, 3)[3]; +extern VECT_VAR_DECL(buffer_vld3_lane, int, 32, 3)[3]; +extern VECT_VAR_DECL(buffer_vld3_lane, int, 64, 3)[3]; +extern VECT_VAR_DECL(buffer_vld3_lane, uint, 8, 3)[3]; +extern VECT_VAR_DECL(buffer_vld3_lane, uint, 16, 3)[3]; +extern VECT_VAR_DECL(buffer_vld3_lane, uint, 32, 3)[3]; +extern VECT_VAR_DECL(buffer_vld3_lane, uint, 64, 3)[3]; +extern VECT_VAR_DECL(buffer_vld3_lane, poly, 8, 3)[3]; +extern VECT_VAR_DECL(buffer_vld3_lane, poly, 16, 3)[3]; +extern VECT_VAR_DECL(buffer_vld3_lane, float, 32, 3)[3]; +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3)[3]; +#endif + +/* Input buffers for vld4_lane */ +extern VECT_VAR_DECL(buffer_vld4_lane, int, 8, 4)[4]; +extern VECT_VAR_DECL(buffer_vld4_lane, int, 16, 4)[4]; +extern VECT_VAR_DECL(buffer_vld4_lane, int, 32, 4)[4]; +extern VECT_VAR_DECL(buffer_vld4_lane, int, 64, 4)[4]; +extern VECT_VAR_DECL(buffer_vld4_lane, uint, 8, 4)[4]; +extern VECT_VAR_DECL(buffer_vld4_lane, uint, 16, 4)[4]; +extern VECT_VAR_DECL(buffer_vld4_lane, uint, 32, 4)[4]; +extern VECT_VAR_DECL(buffer_vld4_lane, uint, 64, 4)[4]; +extern VECT_VAR_DECL(buffer_vld4_lane, poly, 8, 4)[4]; +extern VECT_VAR_DECL(buffer_vld4_lane, poly, 16, 4)[4]; +extern VECT_VAR_DECL(buffer_vld4_lane, float, 32, 4)[4]; +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +extern VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4)[4]; +#endif + +/* Output buffers, one of each size */ +static ARRAY(result, int, 8, 8); +static ARRAY(result, int, 16, 4); +static ARRAY(result, int, 32, 2); +static ARRAY(result, int, 64, 1); +static ARRAY(result, uint, 8, 8); +static ARRAY(result, uint, 16, 4); +static ARRAY(result, uint, 32, 2); +static ARRAY(result, uint, 64, 1); +static ARRAY(result, poly, 8, 8); +static ARRAY(result, poly, 16, 4); +static ARRAY(result, float, 32, 2); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +static ARRAY(result, float, 16, 4); +#endif +static ARRAY(result, int, 8, 16); +static ARRAY(result, int, 16, 8); +static ARRAY(result, int, 32, 4); +static ARRAY(result, int, 64, 2); +static ARRAY(result, uint, 8, 16); +static ARRAY(result, uint, 16, 8); +static ARRAY(result, uint, 32, 4); +static ARRAY(result, uint, 64, 2); +static ARRAY(result, poly, 8, 16); +static ARRAY(result, poly, 16, 8); +static ARRAY(result, float, 32, 4); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) +static ARRAY(result, float, 16, 8); +#endif + +/* Dump results (generic function) */ +static void dump_results (char *test_name) +{ + int i; + + fprintf(ref_file, "\n%s output:\n", test_name); + fprintf(gcc_tests_file, "\n%s output:\n", test_name); + + DUMP(test_name, int, 8, 8, PRId8); + DUMP(test_name, int, 16, 4, PRId16); + DUMP(test_name, int, 32, 2, PRId32); + DUMP(test_name, int, 64, 1, PRId64); + DUMP(test_name, uint, 8, 8, PRIu8); + DUMP(test_name, uint, 16, 4, PRIu16); + DUMP(test_name, uint, 32, 2, PRIu32); + DUMP(test_name, uint, 64, 1, PRIu64); + DUMP_POLY(test_name, poly, 8, 8, PRIu8); + DUMP_POLY(test_name, poly, 16, 4, PRIu16); + DUMP_FP(test_name, float, 32, 2, PRIx32); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DUMP_FP16(test_name, float, 16, 4, PRIu16); +#endif + + DUMP(test_name, int, 8, 16, PRId8); + DUMP(test_name, int, 16, 8, PRId16); + DUMP(test_name, int, 32, 4, PRId32); + DUMP(test_name, int, 64, 2, PRId64); + DUMP(test_name, uint, 8, 16, PRIu8); + DUMP(test_name, uint, 16, 8, PRIu16); + DUMP(test_name, uint, 32, 4, PRIu32); + DUMP(test_name, uint, 64, 2, PRIu64); + DUMP_POLY(test_name, poly, 8, 16, PRIu8); + DUMP_POLY(test_name, poly, 16, 8, PRIu16); + DUMP_FP(test_name, float, 32, 4, PRIx32); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DUMP_FP16(test_name, float, 16, 8, PRIu16); +#endif +} + +/* Dump results in hex (generic function) */ +static void dump_results_hex2 (const char *test_name, const char* comment) +{ + int i; + + fprintf(ref_file, "\n%s%s output:\n", test_name, comment); + fprintf(gcc_tests_file, "\n%s%s output:\n", test_name, comment); + + DUMP(test_name, int, 8, 8, PRIx8); + DUMP(test_name, int, 16, 4, PRIx16); + DUMP(test_name, int, 32, 2, PRIx32); + DUMP(test_name, int, 64, 1, PRIx64); + DUMP(test_name, uint, 8, 8, PRIx8); + DUMP(test_name, uint, 16, 4, PRIx16); + DUMP(test_name, uint, 32, 2, PRIx32); + DUMP(test_name, uint, 64, 1, PRIx64); + DUMP_POLY(test_name, poly, 8, 8, PRIx8); + DUMP_POLY(test_name, poly, 16, 4, PRIx16); + DUMP_FP(test_name, float, 32, 2, PRIx32); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DUMP_FP16(test_name, float, 16, 4, PRIx16); +#endif + + DUMP(test_name, int, 8, 16, PRIx8); + DUMP(test_name, int, 16, 8, PRIx16); + DUMP(test_name, int, 32, 4, PRIx32); + DUMP(test_name, int, 64, 2, PRIx64); + DUMP(test_name, uint, 8, 16, PRIx8); + DUMP(test_name, uint, 16, 8, PRIx16); + DUMP(test_name, uint, 32, 4, PRIx32); + DUMP(test_name, uint, 64, 2, PRIx64); + DUMP_POLY(test_name, poly, 8, 16, PRIx8); + DUMP_POLY(test_name, poly, 16, 8, PRIx16); + DUMP_FP(test_name, float, 32, 4, PRIx32); +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) + DUMP_FP16(test_name, float, 16, 8, PRIx16); +#endif +} + +static void dump_results_hex (const char *test_name) +{ + dump_results_hex2(test_name, ""); +} + +#ifndef STM_ARM_NEON_MODELS + +/* This hack is to cope with various compilers/libc which may not + provide endian.h or cross-compilers such as llvm which includes the + host's endian.h. */ +#ifndef __arm__ +#include <endian.h> +#define THIS_ENDIAN __BYTE_ORDER +#else /* __arm__ */ +#ifdef __ARMEL__ +#define THIS_ENDIAN __LITTLE_ENDIAN +#else /* __ARMEL__ */ +#define THIS_ENDIAN __BIG_ENDIAN +#endif +#endif /* __arm__ */ + +#if THIS_ENDIAN == __LITTLE_ENDIAN + +typedef union { + struct { + int _xxx:27; + unsigned int QC:1; + int V:1; + int C:1; + int Z:1; + int N:1; + } b; + unsigned int word; +} _ARM_FPSCR; + +#else /* __BIG_ENDIAN */ + +typedef union { + struct { + int N:1; + int Z:1; + int C:1; + int V:1; + unsigned int QC:1; + int _dnm:27; + } b; + unsigned int word; +} _ARM_FPSCR; + +#endif /* __BIG_ENDIAN */ + +#ifdef __ARMCC_VERSION +register _ARM_FPSCR _afpscr_for_qc __asm("fpscr"); +# define Neon_Cumulative_Sat _afpscr_for_qc.b.QC +# define Set_Neon_Cumulative_Sat(x, depend) {Neon_Cumulative_Sat = (x);} +#else +/* GCC/ARM does not know this register */ +# define Neon_Cumulative_Sat __read_neon_cumulative_sat() +/* We need a fake dependency to ensure correct ordering of asm + statements to preset the QC flag value, and Neon operators writing + to QC. */ +#define Set_Neon_Cumulative_Sat(x, depend) \ + __set_neon_cumulative_sat((x), (depend)) + +# if defined(__aarch64__) +static volatile int __read_neon_cumulative_sat (void) { + _ARM_FPSCR _afpscr_for_qc; + asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); + return _afpscr_for_qc.b.QC; +} + +#define __set_neon_cumulative_sat(x, depend) { \ + _ARM_FPSCR _afpscr_for_qc; \ + asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); \ + _afpscr_for_qc.b.QC = x; \ + asm volatile ("msr fpsr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \ + } + +# else +static volatile int __read_neon_cumulative_sat (void) { + _ARM_FPSCR _afpscr_for_qc; + asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); + return _afpscr_for_qc.b.QC; +} + +#define __set_neon_cumulative_sat(x, depend) { \ + _ARM_FPSCR _afpscr_for_qc; \ + asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); \ + _afpscr_for_qc.b.QC = x; \ + asm volatile ("vmsr fpscr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \ + } + +# endif +#endif + +#endif /* STM_ARM_NEON_MODELS */ + +static void dump_neon_cumulative_sat(const char* msg, const char *name, + const char* t1, int w, int n) +{ + fprintf(ref_file, "%s:%d:%s Neon cumulative saturation %d\n", msg, result_idx++, + name, Neon_Cumulative_Sat); + fprintf(gcc_tests_file, + "int VECT_VAR(expected_cumulative_sat,%s,%d,%d) = %d;\n", + t1, w, n, Neon_Cumulative_Sat); +} + +/* Clean output buffers before execution */ +static void clean_results (void) +{ + result_idx = 0; + CLEAN(result, int, 8, 8); + CLEAN(result, int, 16, 4); + CLEAN(result, int, 32, 2); + CLEAN(result, int, 64, 1); + CLEAN(result, uint, 8, 8); + CLEAN(result, uint, 16, 4); + CLEAN(result, uint, 32, 2); + CLEAN(result, uint, 64, 1); + CLEAN(result, poly, 8, 8); + CLEAN(result, poly, 16, 4); + CLEAN(result, float, 32, 2); + + CLEAN(result, int, 8, 16); + CLEAN(result, int, 16, 8); + CLEAN(result, int, 32, 4); + CLEAN(result, int, 64, 2); + CLEAN(result, uint, 8, 16); + CLEAN(result, uint, 16, 8); + CLEAN(result, uint, 32, 4); + CLEAN(result, uint, 64, 2); + CLEAN(result, poly, 8, 16); + CLEAN(result, poly, 16, 8); + CLEAN(result, float, 32, 4); +} + + +/* Helpers to declare variables of various types */ +#define DECL_VARIABLE(VAR, T1, W, N) \ + VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N) + +#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \ + DECL_VARIABLE(VAR, int, 8, 8); \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, int, 64, 1) + +#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR) \ + DECL_VARIABLE(VAR, uint, 8, 8); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, uint, 64, 1) + +#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR) \ + DECL_VARIABLE(VAR, int, 8, 16); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, int, 64, 2) + +#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR) \ + DECL_VARIABLE(VAR, uint, 8, 16); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, uint, 64, 2) + +#define DECL_VARIABLE_64BITS_VARIANTS(VAR) \ + DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \ + DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \ + DECL_VARIABLE(VAR, poly, 8, 8); \ + DECL_VARIABLE(VAR, poly, 16, 4); \ + DECL_VARIABLE(VAR, float, 32, 2) + +#define DECL_VARIABLE_128BITS_VARIANTS(VAR) \ + DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR); \ + DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \ + DECL_VARIABLE(VAR, poly, 8, 16); \ + DECL_VARIABLE(VAR, poly, 16, 8); \ + DECL_VARIABLE(VAR, float, 32, 4) + +#define DECL_VARIABLE_ALL_VARIANTS(VAR) \ + DECL_VARIABLE_64BITS_VARIANTS(VAR); \ + DECL_VARIABLE_128BITS_VARIANTS(VAR) + +#define DECL_VARIABLE_SIGNED_VARIANTS(VAR) \ + DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \ + DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR) + +#define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR) \ + DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \ + DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR) + +/* Helpers to initialize vectors */ +#define VDUP(VAR, Q, T1, T2, W, N, V) \ + VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V) + +#define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \ + VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \ + VECT_VAR(VAR, T1, W, N), \ + L) + +/* We need to load initial values first, so rely on VLD1 */ +#define VLOAD(VAR, BUF, Q, T1, T2, W, N) \ + VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)) + +/* Helpers for macros with 1 constant and 5 variable arguments */ +#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \ + MACRO(VAR, , int, s, 8, 8); \ + MACRO(VAR, , int, s, 16, 4); \ + MACRO(VAR, , int, s, 32, 2); \ + MACRO(VAR, , int, s, 64, 1) + +#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) \ + MACRO(VAR, , uint, u, 8, 8); \ + MACRO(VAR, , uint, u, 16, 4); \ + MACRO(VAR, , uint, u, 32, 2); \ + MACRO(VAR, , uint, u, 64, 1) + +#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \ + MACRO(VAR, q, int, s, 8, 16); \ + MACRO(VAR, q, int, s, 16, 8); \ + MACRO(VAR, q, int, s, 32, 4); \ + MACRO(VAR, q, int, s, 64, 2) + +#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR) \ + MACRO(VAR, q, uint, u, 8, 16); \ + MACRO(VAR, q, uint, u, 16, 8); \ + MACRO(VAR, q, uint, u, 32, 4); \ + MACRO(VAR, q, uint, u, 64, 2) + +#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR) \ + TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \ + TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) + +#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR) \ + TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \ + TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) + +#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR) \ + TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR); \ + TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR) + +#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR) \ + TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \ + TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) + +/* Helpers for macros with 2 constant and 5 variable arguments */ +#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + MACRO(VAR1, VAR2, , int, s, 8, 8); \ + MACRO(VAR1, VAR2, , int, s, 16, 4); \ + MACRO(VAR1, VAR2, , int, s, 32, 2); \ + MACRO(VAR1, VAR2 , , int, s, 64, 1) + +#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + MACRO(VAR1, VAR2, , uint, u, 8, 8); \ + MACRO(VAR1, VAR2, , uint, u, 16, 4); \ + MACRO(VAR1, VAR2, , uint, u, 32, 2); \ + MACRO(VAR1, VAR2, , uint, u, 64, 1) + +#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + MACRO(VAR1, VAR2, q, int, s, 8, 16); \ + MACRO(VAR1, VAR2, q, int, s, 16, 8); \ + MACRO(VAR1, VAR2, q, int, s, 32, 4); \ + MACRO(VAR1, VAR2, q, int, s, 64, 2) + +#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + MACRO(VAR1, VAR2, q, uint, u, 8, 16); \ + MACRO(VAR1, VAR2, q, uint, u, 16, 8); \ + MACRO(VAR1, VAR2, q, uint, u, 32, 4); \ + MACRO(VAR1, VAR2, q, uint, u, 64, 2) + +#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ + TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ + MACRO(VAR1, VAR2, , poly, p, 8, 8); \ + MACRO(VAR1, VAR2, , poly, p, 16, 4) + +#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ + TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ + MACRO(VAR1, VAR2, q, poly, p, 8, 16); \ + MACRO(VAR1, VAR2, q, poly, p, 16, 8) + +#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \ + TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) + +#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ + TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) + +#endif /* _STM_ARM_NEON_REF_H_ */ |