diff options
author | Christophe Lyon <christophe.lyon@st.com> | 2011-01-24 17:37:40 +0100 |
---|---|---|
committer | Christophe Lyon <christophe.lyon@st.com> | 2011-01-24 17:37:40 +0100 |
commit | 073831adf9442c019e8d34b18b0c04b1d780a19b (patch) | |
tree | 391f6efd9ceabde2554e9d2c637c9fdafdc9617a | |
download | platform_external_arm-neon-tests-073831adf9442c019e8d34b18b0c04b1d780a19b.tar.gz platform_external_arm-neon-tests-073831adf9442c019e8d34b18b0c04b1d780a19b.tar.bz2 platform_external_arm-neon-tests-073831adf9442c019e8d34b18b0c04b1d780a19b.zip |
First public release of the ARM/Neon tests.
-rw-r--r-- | Init.s | 259 | ||||
-rw-r--r-- | InitCache.s | 52 | ||||
-rw-r--r-- | Makefile | 133 | ||||
-rw-r--r-- | README | 56 | ||||
-rw-r--r-- | armscript.inc | 14 | ||||
-rw-r--r-- | compute_ref.axf | bin | 0 -> 1742664 bytes | |||
-rw-r--r-- | compute_ref.c | 345 | ||||
-rw-r--r-- | ref-rvct.txt | 6849 | ||||
-rw-r--r-- | ref_dsp.c | 411 | ||||
-rw-r--r-- | ref_dspfns.c | 1493 | ||||
-rw-r--r-- | ref_integer.c | 279 | ||||
-rw-r--r-- | ref_v_binary_op.c | 88 | ||||
-rw-r--r-- | ref_v_binary_sat_op.c | 108 | ||||
-rw-r--r-- | ref_v_comp_f_op.c | 87 | ||||
-rw-r--r-- | ref_v_comp_op.c | 178 | ||||
-rw-r--r-- | ref_v_unary_op.c | 91 | ||||
-rw-r--r-- | ref_v_unary_sat_op.c | 95 | ||||
-rw-r--r-- | ref_vaba.c | 125 | ||||
-rw-r--r-- | ref_vabal.c | 103 | ||||
-rw-r--r-- | ref_vabd.c | 116 | ||||
-rw-r--r-- | ref_vabdl.c | 93 | ||||
-rw-r--r-- | ref_vabs.c | 54 | ||||
-rw-r--r-- | ref_vadd.c | 60 | ||||
-rw-r--r-- | ref_vaddhn.c | 86 | ||||
-rw-r--r-- | ref_vaddl.c | 104 | ||||
-rw-r--r-- | ref_vaddw.c | 104 | ||||
-rw-r--r-- | ref_vand.c | 29 | ||||
-rw-r--r-- | ref_vbic.c | 29 | ||||
-rw-r--r-- | ref_vbsl.c | 96 | ||||
-rw-r--r-- | ref_vcage.c | 29 | ||||
-rw-r--r-- | ref_vcagt.c | 29 | ||||
-rw-r--r-- | ref_vcale.c | 29 | ||||
-rw-r--r-- | ref_vcalt.c | 29 | ||||
-rw-r--r-- | ref_vceq.c | 29 | ||||
-rw-r--r-- | ref_vcge.c | 29 | ||||
-rw-r--r-- | ref_vcgt.c | 29 | ||||
-rw-r--r-- | ref_vcle.c | 29 | ||||
-rw-r--r-- | ref_vcls.c | 107 | ||||
-rw-r--r-- | ref_vclt.c | 29 | ||||
-rw-r--r-- | ref_vclz.c | 112 | ||||
-rw-r--r-- | ref_vcnt.c | 80 | ||||
-rw-r--r-- | ref_vcombine.c | 77 | ||||
-rw-r--r-- | ref_vcreate.c | 99 | ||||
-rw-r--r-- | ref_vcvt.c | 160 | ||||
-rw-r--r-- | ref_vdup.c | 109 | ||||
-rw-r--r-- | ref_vdup_lane.c | 77 | ||||
-rw-r--r-- | ref_veor.c | 29 | ||||
-rw-r--r-- | ref_vext.c | 100 | ||||
-rw-r--r-- | ref_vget_high.c | 64 | ||||
-rw-r--r-- | ref_vget_lane.c | 93 | ||||
-rw-r--r-- | ref_vget_low.c | 64 | ||||
-rw-r--r-- | ref_vhadd.c | 31 | ||||
-rw-r--r-- | ref_vhsub.c | 31 | ||||
-rw-r--r-- | ref_vld1.c | 55 | ||||
-rw-r--r-- | ref_vld1_dup.c | 60 | ||||
-rw-r--r-- | ref_vld1_lane.c | 102 | ||||
-rw-r--r-- | ref_vldX.c | 157 | ||||
-rw-r--r-- | ref_vldX_dup.c | 136 | ||||
-rw-r--r-- | ref_vldX_lane.c | 170 | ||||
-rw-r--r-- | ref_vmax.c | 116 | ||||
-rw-r--r-- | ref_vmin.c | 29 | ||||
-rw-r--r-- | ref_vmla.c | 144 | ||||
-rw-r--r-- | ref_vmla_lane.c | 125 | ||||
-rw-r--r-- | ref_vmla_n.c | 112 | ||||
-rw-r--r-- | ref_vmlal.c | 119 | ||||
-rw-r--r-- | ref_vmlal_lane.c | 101 | ||||
-rw-r--r-- | ref_vmlal_n.c | 92 | ||||
-rw-r--r-- | ref_vmls.c | 29 | ||||
-rw-r--r-- | ref_vmls_lane.c | 29 | ||||
-rw-r--r-- | ref_vmls_n.c | 29 | ||||
-rw-r--r-- | ref_vmlsl.c | 29 | ||||
-rw-r--r-- | ref_vmlsl_lane.c | 29 | ||||
-rw-r--r-- | ref_vmlsl_n.c | 29 | ||||
-rw-r--r-- | ref_vmovl.c | 60 | ||||
-rw-r--r-- | ref_vmovn.c | 60 | ||||
-rw-r--r-- | ref_vmul.c | 127 | ||||
-rw-r--r-- | ref_vmul_lane.c | 105 | ||||
-rw-r--r-- | ref_vmul_n.c | 91 | ||||
-rw-r--r-- | ref_vmull.c | 77 | ||||
-rw-r--r-- | ref_vmull_lane.c | 84 | ||||
-rw-r--r-- | ref_vmull_n.c | 81 | ||||
-rw-r--r-- | ref_vmvn.c | 112 | ||||
-rw-r--r-- | ref_vneg.c | 54 | ||||
-rw-r--r-- | ref_vorn.c | 29 | ||||
-rw-r--r-- | ref_vorr.c | 29 | ||||
-rw-r--r-- | ref_vpadal.c | 140 | ||||
-rw-r--r-- | ref_vpadd.c | 96 | ||||
-rw-r--r-- | ref_vpaddl.c | 113 | ||||
-rw-r--r-- | ref_vpmax.c | 29 | ||||
-rw-r--r-- | ref_vpmin.c | 29 | ||||
-rw-r--r-- | ref_vqabs.c | 73 | ||||
-rw-r--r-- | ref_vqadd.c | 153 | ||||
-rw-r--r-- | ref_vqdmlal.c | 97 | ||||
-rw-r--r-- | ref_vqdmlal_lane.c | 103 | ||||
-rw-r--r-- | ref_vqdmlal_n.c | 91 | ||||
-rw-r--r-- | ref_vqdmlsl.c | 29 | ||||
-rw-r--r-- | ref_vqdmlsl_lane.c | 29 | ||||
-rw-r--r-- | ref_vqdmlsl_n.c | 29 | ||||
-rw-r--r-- | ref_vqdmulh.c | 114 | ||||
-rw-r--r-- | ref_vqdmulh_lane.c | 115 | ||||
-rw-r--r-- | ref_vqdmulh_n.c | 108 | ||||
-rw-r--r-- | ref_vqdmull.c | 92 | ||||
-rw-r--r-- | ref_vqdmull_lane.c | 105 | ||||
-rw-r--r-- | ref_vqdmull_n.c | 101 | ||||
-rw-r--r-- | ref_vqmovn.c | 112 | ||||
-rw-r--r-- | ref_vqmovun.c | 93 | ||||
-rw-r--r-- | ref_vqneg.c | 73 | ||||
-rw-r--r-- | ref_vqrdmulh.c | 134 | ||||
-rw-r--r-- | ref_vqrdmulh_lane.c | 133 | ||||
-rw-r--r-- | ref_vqrdmulh_n.c | 122 | ||||
-rw-r--r-- | ref_vqrshl.c | 199 | ||||
-rw-r--r-- | ref_vqrshrn_n.c | 133 | ||||
-rw-r--r-- | ref_vqrshrun_n.c | 138 | ||||
-rw-r--r-- | ref_vqshl.c | 239 | ||||
-rw-r--r-- | ref_vqshl_n.c | 130 | ||||
-rw-r--r-- | ref_vqshlu_n.c | 155 | ||||
-rw-r--r-- | ref_vqshrn_n.c | 134 | ||||
-rw-r--r-- | ref_vqshrun_n.c | 114 | ||||
-rw-r--r-- | ref_vqsub.c | 156 | ||||
-rw-r--r-- | ref_vraddhn.c | 29 | ||||
-rw-r--r-- | ref_vrecpe.c | 97 | ||||
-rw-r--r-- | ref_vrecps.c | 76 | ||||
-rw-r--r-- | ref_vreinterpret.c | 256 | ||||
-rw-r--r-- | ref_vrev.c | 96 | ||||
-rw-r--r-- | ref_vrhadd.c | 31 | ||||
-rw-r--r-- | ref_vrshl.c | 192 | ||||
-rw-r--r-- | ref_vrshr_n.c | 217 | ||||
-rw-r--r-- | ref_vrshrn_n.c | 119 | ||||
-rw-r--r-- | ref_vrsqrte.c | 105 | ||||
-rw-r--r-- | ref_vrsqrts.c | 76 | ||||
-rw-r--r-- | ref_vrsra_n.c | 238 | ||||
-rw-r--r-- | ref_vrsubhn.c | 29 | ||||
-rw-r--r-- | ref_vset_lane.c | 78 | ||||
-rw-r--r-- | ref_vshl.c | 98 | ||||
-rw-r--r-- | ref_vshl_n.c | 75 | ||||
-rw-r--r-- | ref_vshll_n.c | 64 | ||||
-rw-r--r-- | ref_vshr_n.c | 76 | ||||
-rw-r--r-- | ref_vshrn_n.c | 81 | ||||
-rw-r--r-- | ref_vsli_n.c | 104 | ||||
-rw-r--r-- | ref_vsra_n.c | 97 | ||||
-rw-r--r-- | ref_vsri_n.c | 29 | ||||
-rw-r--r-- | ref_vst1_lane.c | 71 | ||||
-rw-r--r-- | ref_vstX_lane.c | 176 | ||||
-rw-r--r-- | ref_vsub.c | 60 | ||||
-rw-r--r-- | ref_vsubhn.c | 29 | ||||
-rw-r--r-- | ref_vsubl.c | 29 | ||||
-rw-r--r-- | ref_vsubw.c | 29 | ||||
-rw-r--r-- | ref_vtbX.c | 213 | ||||
-rw-r--r-- | ref_vtrn.c | 29 | ||||
-rw-r--r-- | ref_vtst.c | 99 | ||||
-rw-r--r-- | ref_vuzp.c | 155 | ||||
-rw-r--r-- | ref_vzip.c | 29 | ||||
-rw-r--r-- | retarget.c | 42 | ||||
-rw-r--r-- | scatter.scat | 29 | ||||
-rw-r--r-- | stm-arm-neon-ref.h | 438 |
155 files changed, 23034 insertions, 0 deletions
@@ -0,0 +1,259 @@ +;================================================================== +; Copyright ARM Ltd 2005. All rights reserved. +; +; Cortex-A8 Dhrystone example - Startup Code +;================================================================== + + PRESERVE8 + AREA CORTEXA8, CODE, READONLY + + ENTRY + +; Standard definitions of mode bits and interrupt (I & F) flags in PSRs + +Mode_USR EQU 0x10 +Mode_FIQ EQU 0x11 +Mode_IRQ EQU 0x12 +Mode_SVC EQU 0x13 +Mode_ABT EQU 0x17 +Mode_UNDEF EQU 0x1B +Mode_SYS EQU 0x1F + +I_Bit EQU 0x80 ; when I bit is set, IRQ is disabled +F_Bit EQU 0x40 ; when F bit is set, FIQ is disabled + +;================================================================== +; Disable Cortex-A8 MMU if enabled +;================================================================== + + EXPORT Start + +Start + + MRC p15, 0, r0, c1, c0, 0 ; Read CP15 Control Register into r0 + TST r0, #0x1 ; Is the MMU enabled? + BICNE r0, r0, #0x1 ; Clear bit 0 + MCRNE p15, 0, r0, c1, c0, 0 ; Write value back + +;================================================================== +; Initialise Supervisor Mode Stack +; Note stack must be 8 byte aligned. +;================================================================== + + IMPORT ||Image$$STACK$$ZI$$Limit|| ; Linker symbol from scatter file + LDR SP, =||Image$$STACK$$ZI$$Limit|| + +;================================================================== +; TLB maintenance, Invalidate Data and Instruction TLB's +;================================================================== + + MOV r0,#0 + MCR p15, 0, r0, c8, c7, 0 ; Cortex-A8 I-TLB and D-TLB invalidation + +;================================================================== +; Cache Invalidation code for Cortex-A8 +;================================================================== + + ; Invalidate L1 Instruction Cache + + MRC p15, 1, r0, c0, c0, 1 ; Read CLIDR + TST r0, #0x3 ; Harvard Cache? + MOV r0, #0 + MCRNE p15, 0, r0, c7, c5, 0 ; Invalidate Instruction Cache + + ; Invalidate Data/Unified Caches + + MRC p15, 1, r0, c0, c0, 1 ; Read CLIDR + ANDS r3, r0, #&7000000 + MOV r3, r3, LSR #23 ; Total cache levels << 1 + BEQ Finished + + MOV r10, #0 ; R10 holds current cache level << 1 +Loop1 ADD r2, r10, r10, LSR #1 ; R2 holds cache "Set" position + MOV r1, r0, LSR r2 ; Bottom 3 bits are the Cache-type for this level + AND r1, R1, #7 ; Get those 3 bits alone + CMP r1, #2 + BLT Skip ; No cache or only instruction cache at this level + + MCR p15, 2, r10, c0, c0, 0 ; Write the Cache Size selection register + MOV r1, #0 + MCR p15, 0, r1, c7, c5, 4 ; PrefetchFlush to sync the change to the CacheSizeID reg + MRC p15, 1, r1, c0, c0, 0 ; Reads current Cache Size ID register + AND r2, r1, #&7 ; Extract the line length field + ADD r2, r2, #4 ; Add 4 for the line length offset (log2 16 bytes) + LDR r4, =0x3FF + ANDS r4, r4, r1, LSR #3 ; R4 is the max number on the way size (right aligned) + CLZ r5, r4 ; R5 is the bit position of the way size increment + LDR r7, =0x00007FFF + ANDS r7, r7, r1, LSR #13 ; R7 is the max number of the index size (right aligned) + +Loop2 MOV r9, r4 ; R9 working copy of the max way size (right aligned) + +Loop3 ORR r11, r10, r9, LSL r5 ; Factor in the Way number and cache number into R11 + ORR r11, r11, r7, LSL r2 ; Factor in the Set number + MCR p15, 0, r11, c7, c14, 2 ; Clean and Invalidate by set/way + SUBS r9, r9, #1 ; Decrement the Way number + BGE Loop3 + SUBS r7, r7, #1 ; Decrement the Set number + BGE Loop2 +Skip ADD r10, r10, #2 ; increment the cache number + CMP r3, r10 + BGT Loop1 + +Finished + + +;=================================================================== +; Cortex-A8 MMU Configuration +; Set translation table base +;=================================================================== + + + IMPORT ||Image$$TTB$$ZI$$Base|| ; from scatter file.; + + ; Cortex-A8 supports two translation tables + ; Configure translation table base (TTB) control register cp15,c2 + ; to a value of all zeros, indicates we are using TTB register 0. + + MOV r0,#0x0 + MCR p15, 0, r0, c2, c0, 2 + + ; write the address of our page table base to TTB register 0.; + ; We are setting to outer-noncachable [4:3] is zero + + LDR r0,=||Image$$TTB$$ZI$$Base|| + MCR p15, 0, r0, c2, c0, 0 + + +;=================================================================== +; Cortex-A8 PAGE TABLE generation, using standard Arch v6 tables +; +; AP[11:10] - Access Permissions = b11, Read/Write Access +; Domain[8:5] - Domain = b1111, Domain 15 +; Type[1:0] - Descriptor Type = b10, 1Mb descriptors +; +; TEX C B +; 000 0 0 Strongly Ordered +; 001 1 1 Outer and inner write back, write allocate Normal +;=================================================================== + + LDR r1,=0xfff ; loop counter + LDR r2,=2_00000000000000000000110111100010 + + ; r0 contains the address of the translation table base + ; r1 is loop counter + ; r2 is level1 descriptor (bits 19:0) + + ; use loop counter to create 4096 individual table entries + ; this writes from address 0x7FFC down to 0x4000 in word steps (4bytes). + +init_ttb_1 + + ORR r3, r2, r1, LSL#20 ; r3 now contains full level1 descriptor to write + STR r3, [r0, r1, LSL#2] ; str table entry at TTB base + loopcount*4 + SUBS r1, r1, #1 ; decrement loop counter + BPL init_ttb_1 + + ; In this example we will change the cacheable attribute in the first descriptor. + ; Virtual memory from 0 to 1MB will be cacheable (write back mode). + ; TEX[14:12]=001 and CB[3:2]= 11, Outer and inner write back, write allocate. + + ORR r3,r3,#2_0000000001100 ; Set CB bits + ORR r3,r3,#2_1000000000000 ; Set TEX bits + STR r3,[r0] + + ADD r2, r3, #0x100000 ; alter r3 to have correct base address for second descriptor (flat mapping) + STR r2, [r0, #4] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x200000 ; alter r3 to have correct base address for 3 descriptor (flat mapping) + STR r2, [r0, #8] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x300000 ; alter r3 to have correct base address for 4 descriptor (flat mapping) + STR r2, [r0, #0xc] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x400000 ; alter r3 to have correct base address for 5 descriptor (flat mapping) + STR r2, [r0, #0x10] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x500000 ; alter r3 to have correct base address for 6 descriptor (flat mapping) + STR r2, [r0, #0x14] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x600000 ; alter r3 to have correct base address for 7 descriptor (flat mapping) + STR r2, [r0, #0x18] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x700000 ; alter r3 to have correct base address for 8 descriptor (flat mapping) + STR r2, [r0, #0x1c] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x800000 ; alter r3 to have correct base address for 9 descriptor (flat mapping) + STR r2, [r0, #0x20] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0x900000 ; alter r3 to have correct base address for 10 descriptor (flat mapping) + STR r2, [r0, #0x24] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0xa00000 ; alter r3 to have correct base address for 11 descriptor (flat mapping) + STR r2, [r0, #0x28] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0xb00000 ; alter r3 to have correct base address for 12 descriptor (flat mapping) + STR r2, [r0, #0x2c] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + + ADD r2, r3, #0xc00000 ; alter r3 to have correct base address for 13 descriptor (flat mapping) + STR r2, [r0, #0x30] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) + +;=================================================================== +; Setup domain control register - Enable all domains to client mode +;=================================================================== + + MRC p15, 0, r0, c3, c0, 0 ; Read Domain Access Control Register + LDR r0, =0x55555555 ; Initialize every domain entry to b01 (client) + MCR p15, 0, r0, c3, c0, 0 ; Write Domain Access Control Register + +;=================================================================== +; Setup L2 Cache - L2 Cache Auxiliary Control +;=================================================================== + + MOV r0, #0 + ;MCR p15, 1, r0, c9, c0, 2 ; Write L2 Auxilary Control Register + +;================================================================== +; Enable access to NEON/VFP by enabling access to Coprocessors 10 and 11. +; Enables Full Access i.e. in both priv and non priv modes +;================================================================== + + MRC p15, 0, r0, c1, c0, 2 ; read CP access register + ORR r0, r0, #(0x3 <<20) ; enable access CP 10 + ORR r0, r0, #(0x3 <<22) ; enable access CP 11 + MCR p15, 0, r0, c1, c0, 2 ; write CP access register back + +;================================================================== +; Switch on the VFP and Neon Hardware +;================================================================= + + MOV r0, #0 ; Set up a register + ORR r0, r0, #(0x1 << 30) + FMXR FPEXC, r0 ; Write FPEXC register, EN bit set. + +;=================================================================== +; Enable MMU and Branch to __main +;=================================================================== + + IMPORT __main ; before MMU enabled import label to __main + LDR r12,=__main ; save this in register for possible long jump + + + MRC p15, 0, r0, c1, c0, 0 ; read CP15 register 1 into r0 + ORR r0, r0, #0x1 ; enable MMU before scatter loading + MCR p15, 0, r0, c1, c0, 0 ; write CP15 register 1 + + +; Now the MMU is enabled, virtual to physical address translations will occur. +; This will affect the next instruction fetches. +; +; The two instructions currently in the ARM pipeline will have been fetched +; before the MMU was enabled. This property is useful because the next two +; instructions are safe even if new instruction fetches fail. If this routine +; was mapped out of the new virtual memory map, the branch to __main would +; still succeed. + + BX r12 ; branch to __main C library entry point + + END ; mark the end of this file + diff --git a/InitCache.s b/InitCache.s new file mode 100644 index 0000000..250652a --- /dev/null +++ b/InitCache.s @@ -0,0 +1,52 @@ +; Copyright ARM Ltd 2005. All rights reserved. + +;================================================================== +; This code provides basic global enable for a Cortex-A8 cache +; and program flow prediction +; This code must be run from a privileged mode +;================================================================== + + AREA CORTEXA8CACHE, CODE, READONLY + EXPORT core_init + +core_init + +;================================================================== +; Global Enable for Cortex-A8 Instruction and Data Caching +;================================================================== + + MRC p15, 0, r0, c1, c0, 0 ; read CP15 register 1 into r0 + ORR r0, r0, #(0x1 <<12) ; enable I Cache + ;BIC r0, r0, #(0x1 <<12) ; Clear bit 0 + ORR r0, r0, #(0x1 <<2) ; enable D Cache + ;BIC r0, r0, #(0x1 << 2) ; Clear bit 0 + ORR r0, r0, #0x1 ; enable MMU + MCR p15, 0, r0, c1, c0, 0 ; write CP15 register 1 + +;================================================================== +; Enable Cortex-A8 Level2 Unified Cache +;================================================================== + + MRC p15, 0, r0, c1, c0, 1 ; Read Auxiliary Control Register + ORR r0, r0, #2 ; L2EN bit, enable L2 cache + ;BIC r0, r0, #(0x1 << 1) ; L2EN bit, disable L2 cache + ;ORR r0, r0, #(0x1 << 4) ;Enables speculative accesses on AXI + ORR r0, r0, #(0x1 << 4) ;Enables speculative accesses on AXI + ORR r0, r0, #(0x1 << 5) ;Enables caching NEON data within the L1 data cache + MCR p15, 0, r0, c1, c0, 1 ; Write Auxiliary Control Register + +;================================================================== +; Cortex-A8 program flow prediction +;================================================================== + + MRC p15, 0, r0, c1, c0, 0 ; read CP15 register 1 into r0 + ORR r0, r0, #(0x1 <<11) ; Enable all forms of branch prediction + ;BIC r0, r0, #(0x1 << 11) ; Disable all forms of branch prediction + MCR p15, 0, r0, c1, c0, 0 ; write CP15 register 1 + +;================================================================== + + BX lr + + END ; mark the end of this file + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e27425a --- /dev/null +++ b/Makefile @@ -0,0 +1,133 @@ +# Copyright (c) 2009, 2010, 2011 STMicroelectronics +# Written by Christophe Lyon + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# ARM RVCT +CC.rvct := armcc +CFLAGS.rvct = -g --cpu=cortex-a9 -Ono_special_regs_postregalloc -I. +LD.rvct := armlink +LDFLAGS.rvct := --cpu=cortex-a9 --entry 0x2000 + +# GCC/ARM cross compiler +CC.gccarm := arm-none-eabi-gcc +CFLAGS.gccarm := -g -Wall -mcpu=cortex-a9 -mfloat-abi=softfp -mfpu=neon -fshort-wchar -Wno-unused-variable -Wno-unused-function +LD.gccarm := armlink +LDFLAGS.gccarm := --cpu=cortex-a9 --entry 0x2000 + +# List of validated intrinsics +REFNAMES = vld1 vadd vld1_lane vld1_dup vdup vget_high vget_low \ + vqdmlal_lane vqdmlsl_lane vext vshrn_n vset_lane vget_lane \ + vqsub vqdmulh_lane vqdmull vqdmlal vqdmlsl vceq vcge vcle \ + vcgt vclt vbsl vshl vldX vdup_lane vrshrn_n vqdmull_lane \ + vst1_lane vqshl vqshl_n vqrshrn_n vsub vqadd vabs vqabs \ + vcombine vmax vmin vneg vqneg vmlal vmlal_lane vmlsl \ + vmlsl_lane vmovl vmovn vmull vmull_lane vrev vrshl vshl_n \ + vshr_n vsra_n vtrn vuzp vzip vreinterpret vqdmulh vqrdmulh \ + vqrdmulh_lane vqrshl vaba vabal vabd vabdl vand vorr vorn \ + veor vbic vcreate vldX_lane vldX_dup vmla vmls vmul \ + vmul_lane vmul_n vmull_n vqdmulh_n vqdmull_n vqrdmulh_n \ + vmla_lane vmls_lane vmla_n vmls_n vmlal_n vmlsl_n vqdmlal_n \ + vqdmlsl_n vsri_n vsli_n vtst vaddhn vraddhn vaddl vaddw \ + vhadd vrhadd vhsub vsubl vsubw vsubhn vrsubhn vmvn vqmovn \ + vqmovun vrshr_n vrsra_n vshll_n vpaddl vpadd vpadal \ + vqshlu_n vclz vcls vcnt vqshrn_n vpmax vpmin vqshrun_n \ + vqrshrun_n vstX_lane vtbX vrecpe vrsqrte integer vcage \ + vcagt vcale vcalt vrecps vrsqrts vcvt dsp dspfns +REFLIST = $(addprefix ref_, $(REFNAMES)) + +all: ref-rvct.qemu + +check: + diff stm-arm-neon.refrvct ref-rvct.txt + +# Building reference files with RVCT +REFOBJS.rvct = $(addsuffix .rvct.o, $(REFLIST)) +REFRVCT=stm-arm-neon.refrvct +ref-rvct: $(REFRVCT) +ref-rvct.qemu: $(REFRVCT).qemu + +$(REFRVCT): compute_ref.axf + rvdebug -stdiolog=stdio.log -jou=journal.log -log=log.log -nologo -cmd -init @coretile.core.cpu0@RTSM -inc armscript.inc -exec $^ + +$(REFRVCT).qemu: compute_ref.axf + qemu-system-arm -cpu cortex-a9 -semihosting -nographic -kernel $^ + +compute_ref.axf: scatter.scat compute_ref.rvct.o retarget.rvct.o \ + InitCache.o Init.o $(REFOBJS.rvct) + $(LD.rvct) $(LDFLAGS.rvct) --scatter $^ -o $@ + +compute_ref.rvct.o retarget.rvct.o: %.rvct.o: %.c + $(CC.rvct) $(CFLAGS.rvct) -c $^ -o $@ -DREFFILE=\"$(REFRVCT)\" + +ref_%.rvct.o: ref_%.c stm-arm-neon-ref.h $(NEONINCLUDE) + $(CC.rvct) $(CFLAGS.rvct) -c $< -o $@ + +InitCache.o Init.o: %.o: %.s + $(CC.rvct) $(CFLAGS.rvct) -c $^ -o $@ + + +# Building reference files with GCC/ARM +REFOBJS.gccarm = $(addsuffix .gccarm.o, $(REFLIST)) +REFGCCARM=stm-arm-neon.gccarm +ref-gccarm: $(REFGCCARM) + +$(REFGCCARM): compute_ref.gccarm + rvdebug -stdiolog=stdio.log -jou=journal.log -log=log.log -nologo -cmd -init @coretile.core.cpu0@RTSM -inc armscript.inc -exec $^ + +compute_ref.gccarm: scatter.scat compute_ref.gccarm.o retarget.rvct.o \ + InitCache.o Init.o $(REFOBJS.gccarm) + $(LD.rvct) $(LDFLAG.rvct) --scatter $^ -o $@ + +compute_ref.gccarm.o: %.gccarm.o: %.c + $(CC.gccarm) $(CFLAGS.gccarm) -c $^ -o $@ -DREFFILE=\"$(REFGCCARM)\" + +ref_%.gccarm.o: ref_%.c stm-arm-neon-ref.h + $(CC.gccarm) $(CFLAGS.gccarm) -c $< -o $@ + +# Use '*' rather than '%' in these rules: +# - using '%' does not make them add to the implicit rules above (they +# are different rules, only the 1st one matches) +# - they are needed only when the target already exists, so the +# wildcard matches when needed. +# - if the target does not already exist, the implicit rules apply. +ref_vadd.*.o ref_vsub.*.o ref_vand.*.o ref_vbic.*.o ref_veor.*.o ref_vorn.*.o ref_vorr.*.o: ref_v_binary_op.c +ref_vqadd.*.o ref_vqsub.*.o: ref_v_binary_sat_op.c +ref_vabs.*.o ref_vneg.*.o ref_vmvn.*.o: ref_v_unary_op.c +ref_vqabs.*.o ref_vqneg.*.o: ref_v_unary_sat_op.c +ref_vceq.*.o ref_vcge.*.o ref_vcle.*.o ref_vcgt.*.o ref_vclt.*.o: ref_v_comp_op.c +ref_vhadd.*.o ref_vrhadd.*.o ref_vhsub.*.o ref_vmin.*.o: ref_vmax.c +ref_vmls.*.o: ref_vmla.c +ref_vmls_lane.*.o: ref_vmla_lane.c +ref_vmls_n.*.o: ref_vmla_n.c +ref_vmlsl.*.o: ref_vmlal.c +ref_vmlsl_lane.*.o: ref_vmlal_lane.c +ref_vmlsl_n.*.o: ref_vmlal_n.c +ref_vqdmlsl.*.o: ref_vqdmlal.c +ref_vqdmlsl_lane.*.o: ref_vqdmlal_lane.c +ref_vqdmlsl_n.*.o: ref_vqdmlal_n.c +ref_vtrn.*.o ref_vzip.*.o: ref_vuzp.c +ref_vsri_n.*.o: ref_vsli_n.c +ref_vraddhn.*.o ref_vsubhn.*.o ref_vrsubhn.*.o: ref_vaddhn.c +ref_vsubl.*.o: ref_vaddl.c +ref_vsubw.*.o: ref_vaddw.c +ref_vcage.*.o ref_vcale.*.o ref_vcagt.*.o ref_vcalt.*.o: ref_v_comp_f_op.c + +clean: + rm -f *.o *.log stm-arm-neon.refrvct @@ -0,0 +1,56 @@ +ARM Neon reference tests +======================== +This package contains extensive tests for the ARM/Neon instructions. + +It works by building a program which uses all of them, and then +executing it on an actual target or a simulator. + +It can be used to validate the simulator against an actual HW target, +or to validate C compilers in presence of Neon intrinsics calls. + +The supplied Makefile enables to build with both ARM RVCT compiler and +GNU GCC (for the ARM target), and supports execution with ARM RVDEBUG +on an ARM simulator and with QEMU. + +For convenience, the ARM ELF binary file (as compiled with RVCT) is +supplied (compute_ref.axf), as well as expected outputs +(ref-rvct.txt). + +Typical usage when used to debug QEmu: +$ make all # to build the test program with ARM rvct and execute with QEmu +$ make check # to compare the results with the expected output + + +Known issues: +------------- +The tests currently fail to build with GCC/ARM: +- no support for Neon_Overflow/fpsrc register +- ICE when compiling ref_vldX.c, ref_vldX_lane.c, ref_vstX_lane.c +- fails to compile vst1_lane.c +- missing include files: dspfns.h, armdsp.h + +VS[LR]I.64 tests are disabled because QEmu aborts. + +Engineering: +------------ +In order to cover all the Neon instructions extensively, these tests +make intensive use of the C-preprocessor, to save maintenance efforts. + +Most tests (the more regular ones) share a common basic structure. In +general, variable names are suffixed by their type name, so as to +differentiate variables with the same purpose but of differente types. +Hence vector1_int8x8, vector1_int16x4 etc... + +For instance in ref_vmul.c the layout of the code is as follows: + +- declare input and output vectors (named 'vector1', 'vector2' and + 'vector_res') of each possible type (s/u, 8/16/32/64 bits). + +- clean the result buffers. + +- initialize input vectors 'vector1' and 'vector2'. + +- call each variant of the intrinsic and store the result in a buffer + named 'buffer', whose contents is printed after execution. + +One can then compare the actual result with the expected one. diff --git a/armscript.inc b/armscript.inc new file mode 100644 index 0000000..ad53a5c --- /dev/null +++ b/armscript.inc @@ -0,0 +1,14 @@ +ERROR=ABORT // Abort if error occurs when processing the script +WAIT=ON // Wait for each command to finish + +GO + +STATS + + //STDIOLOG OFF // Close the log file + + //UNLOAD 1 // Unload the image + //DELFILE 1 // Remove the symbol definitions + //DISCONNECT // Disconnect from the target + //WAIT=OFF +QUIT Y diff --git a/compute_ref.axf b/compute_ref.axf Binary files differnew file mode 100644 index 0000000..4db6e19 --- /dev/null +++ b/compute_ref.axf diff --git a/compute_ref.c b/compute_ref.c new file mode 100644 index 0000000..e1109b9 --- /dev/null +++ b/compute_ref.c @@ -0,0 +1,345 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <stdio.h> +#include <stdlib.h> + +FILE* log_file = NULL; +FILE* ref_file = NULL; + +#define LOGFILE "stm-arm-neon-ref.log" + +void cleanup () +{ + if (log_file) fclose (log_file); + if (ref_file) fclose (ref_file); + exit (1); +} + +extern void exec_vld1(void); +extern void exec_vadd(void); +extern void exec_vld1_lane(void); +extern void exec_vld1_dup(void); +extern void exec_vdup(void); +extern void exec_vget_high(void); +extern void exec_vget_low(void); +extern void exec_vqdmlal_lane(void); +extern void exec_vqdmlsl_lane(void); +extern void exec_vqdmlal_n(void); +extern void exec_vqdmlsl_n(void); +extern void exec_vext(void); +extern void exec_vshr_n(void); +extern void exec_vshrn_n(void); +extern void exec_vrshrn_n(void); +extern void exec_vqrshrn_n(void); +extern void exec_vset_lane(void); +extern void exec_vget_lane(void); +extern void exec_vqsub(void); +extern void exec_vqdmulh(void); +extern void exec_vqdmulh_lane(void); +extern void exec_vqdmulh_n(void); +extern void exec_vqdmull(void); +extern void exec_vqdmlal(void); +extern void exec_vqdmlsl(void); +extern void exec_vceq(void); +extern void exec_vcge(void); +extern void exec_vcle(void); +extern void exec_vcgt(void); +extern void exec_vclt(void); +extern void exec_vbsl(void); +extern void exec_vshl(void); +extern void exec_vqshl(void); +extern void exec_vqshl_n(void); +extern void exec_vrshl(void); +extern void exec_vshl_n(void); +extern void exec_vldX(void); +extern void exec_vdup_lane(void); +extern void exec_vqdmull_lane(void); +extern void exec_vqdmull_n(void); +extern void exec_vst1_lane(void); +extern void exec_vsub(void); +extern void exec_vqadd(void); +extern void exec_vabs(void); +extern void exec_vqabs(void); +extern void exec_vcombine(void); +extern void exec_vmax(void); +extern void exec_vmin(void); +extern void exec_vneg(void); +extern void exec_vqneg(void); +extern void exec_vmlal(void); +extern void exec_vmlal_lane(void); +extern void exec_vmlal_n(void); +extern void exec_vmlsl(void); +extern void exec_vmlsl_lane(void); +extern void exec_vmlsl_n(void); +extern void exec_vmovl(void); +extern void exec_vmovn(void); +extern void exec_vmull(void); +extern void exec_vmull_lane(void); +extern void exec_vrev(void); +extern void exec_vsra_n(void); +extern void exec_vtrn(void); +extern void exec_vuzp(void); +extern void exec_vzip(void); +extern void exec_vreinterpret(void); +extern void exec_vqrdmulh(void); +extern void exec_vqrdmulh_lane(void); +extern void exec_vqrdmulh_n(void); +extern void exec_vqrshl(void); +extern void exec_vaba(void); +extern void exec_vabal(void); +extern void exec_vabd(void); +extern void exec_vabdl(void); +extern void exec_vand(void); +extern void exec_vorr(void); +extern void exec_vorn(void); +extern void exec_veor(void); +extern void exec_vbic(void); +extern void exec_vcreate(void); +extern void exec_vldX_lane(void); +extern void exec_vldX_dup(void); +extern void exec_vmla(void); +extern void exec_vmls(void); +extern void exec_vmul(void); +extern void exec_vmul_lane(void); +extern void exec_vmul_n(void); +extern void exec_vmull_n(void); +extern void exec_vmla_lane(void); +extern void exec_vmls_lane(void); +extern void exec_vmla_n(void); +extern void exec_vmls_n(void); +extern void exec_vsli_n(void); +extern void exec_vsri_n(void); +extern void exec_vtst(void); +extern void exec_vaddhn(void); +extern void exec_vraddhn(void); +extern void exec_vaddl(void); +extern void exec_vaddw(void); +extern void exec_vhadd(void); +extern void exec_vrhadd(void); +extern void exec_vhsub(void); +extern void exec_vsubl(void); +extern void exec_vsubw(void); +extern void exec_vsubhn(void); +extern void exec_vrsubhn(void); +extern void exec_vmvn(void); +extern void exec_vqmovn(void); +extern void exec_vqmovun(void); +extern void exec_vrshr_n(void); +extern void exec_vrsra_n(void); +extern void exec_vshll_n(void); +extern void exec_vpaddl(void); +extern void exec_vpadd(void); +extern void exec_vpadal(void); +extern void exec_vqshlu_n(void); +extern void exec_vclz(void); +extern void exec_vcls(void); +extern void exec_vcnt(void); +extern void exec_vqshrn_n(void); +extern void exec_vpmax(void); +extern void exec_vpmin(void); +extern void exec_vqshrun_n(void); +extern void exec_vqrshrun_n(void); +extern void exec_vstX_lane(void); +extern void exec_vtbX(void); +extern void exec_vrecpe(void); +extern void exec_vrsqrte(void); +extern void exec_integer(void); /* Integer (non-NEON) intrinsics */ + +extern void exec_vcage(void); +extern void exec_vcagt(void); +extern void exec_vcale(void); +extern void exec_vcalt(void); +extern void exec_vcvt(void); +extern void exec_vrecps(void); +extern void exec_vrsqrts(void); + +extern void exec_dsp(void); /* DSP (non-NEON) intrinsics */ +extern void exec_dspfns(void); /* DSP FNS (non-NEON/ITU) intrinsics */ + +int main () +{ + log_file = fopen (LOGFILE, "w"); + if (log_file == NULL) { + fprintf (stderr, "Error opening log file "LOGFILE"\n"); + cleanup (); + } + + ref_file = fopen (REFFILE, "w"); + if (ref_file == NULL) { + fprintf (log_file, "Error opening ref file %s\n", REFFILE); + cleanup (); + } + + fprintf (log_file, "Computing refs....\n"); + + exec_vld1 (); + exec_vadd (); + exec_vld1_lane (); + exec_vld1_dup (); + exec_vdup (); + exec_vget_high (); + exec_vget_low (); + exec_vqdmlal_lane (); + exec_vqdmlsl_lane (); + exec_vqdmlal_n (); + exec_vqdmlsl_n (); + exec_vext (); + exec_vshr_n (); + exec_vshrn_n (); + exec_vrshrn_n (); + exec_vqrshrn_n (); + exec_vset_lane (); + exec_vget_lane (); + exec_vqsub (); + exec_vqdmulh (); + exec_vqdmulh_lane (); + exec_vqdmulh_n (); + exec_vqdmull (); + exec_vqdmlal (); + exec_vqdmlsl (); + exec_vceq (); + exec_vcge (); + exec_vcle (); + exec_vcgt (); + exec_vclt (); + exec_vbsl (); + exec_vshl (); + exec_vshl_n (); + exec_vqshl (); + exec_vqshl_n (); + exec_vrshl (); + exec_vldX (); + exec_vdup_lane (); + exec_vqdmull_lane (); + exec_vqdmull_n (); + exec_vst1_lane (); + exec_vsub (); + exec_vqadd (); + exec_vabs (); + exec_vqabs (); + exec_vcombine (); + exec_vmax (); + exec_vmin (); + exec_vneg (); + exec_vqneg (); + exec_vmlal (); + exec_vmlsl (); + exec_vmlal_lane (); + exec_vmlsl_lane (); + exec_vmlal_n (); + exec_vmlsl_n (); + exec_vmovl (); + exec_vmovn (); + exec_vmull (); + exec_vmull_lane (); + exec_vrev (); + exec_vsra_n (); + exec_vtrn (); + exec_vuzp (); + exec_vzip (); + exec_vreinterpret (); + exec_vqrdmulh (); + exec_vqrdmulh_lane (); + exec_vqrdmulh_n (); + exec_vqrshl (); + exec_vaba (); + exec_vabal (); + exec_vabd (); + exec_vabdl (); + exec_vand (); + exec_vorr (); + exec_vorn (); + exec_veor (); + exec_vbic (); + exec_vcreate (); + exec_vldX_lane (); + exec_vldX_dup (); + exec_vmla (); + exec_vmls (); + exec_vmul (); + exec_vmul_lane (); + exec_vmul_n (); + exec_vmull_n (); + exec_vmla_lane (); + exec_vmls_lane (); + exec_vmla_n (); + exec_vmls_n (); +#if 0 + exec_vsli_n (); + exec_vsri_n (); +#endif + exec_vtst (); + exec_vaddhn (); + exec_vraddhn (); + exec_vaddl (); + exec_vaddw (); + exec_vhadd (); + exec_vrhadd (); + exec_vhsub (); + exec_vsubl (); + exec_vsubw (); + exec_vsubhn (); + exec_vrsubhn (); + exec_vmvn (); + exec_vqmovn (); + exec_vqmovun (); + exec_vrshr_n (); + exec_vrsra_n (); + exec_vshll_n (); + exec_vpaddl (); + exec_vpadd (); + exec_vpadal (); + exec_vqshlu_n (); + exec_vclz (); + exec_vcls (); + exec_vcnt (); + exec_vqshrn_n (); + exec_vpmax (); + exec_vpmin (); + exec_vqshrun_n (); + exec_vqrshrun_n (); + exec_vstX_lane (); + exec_vtbX (); + exec_vrecpe (); + exec_vrsqrte (); + + exec_integer (); + + exec_vcage (); + exec_vcale (); + exec_vcagt (); + exec_vcalt (); + exec_vcvt (); + exec_vrecps (); + exec_vrsqrts (); + + exec_dsp (); + exec_dspfns (); + + fprintf (log_file, "Finished\n"); + + return 0; +} diff --git a/ref-rvct.txt b/ref-rvct.txt new file mode 100644 index 0000000..8c95fbd --- /dev/null +++ b/ref-rvct.txt @@ -0,0 +1,6849 @@ + +VLD1/VLD1Q output: +VLD1/VLD1Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD1/VLD1Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD1/VLD1Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD1/VLD1Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD1/VLD1Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD1/VLD1Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD1/VLD1Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD1/VLD1Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD1/VLD1Q:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VLD1/VLD1Q:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD1/VLD1Q:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD1/VLD1Q:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD1/VLD1Q:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VLD1/VLD1Q:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD1/VLD1Q:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD1/VLD1Q:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD1/VLD1Q:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VLD1/VLD1Q:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } + +VADD/VADDQ output: +VADD/VADDQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, } +VADD/VADDQ:1:result_int16x4 [] = { ffffffec, ffffffed, ffffffee, ffffffef, } +VADD/VADDQ:2:result_int32x2 [] = { fffffff3, fffffff4, } +VADD/VADDQ:3:result_int64x1 [] = { 54, } +VADD/VADDQ:4:result_uint8x8 [] = { 4, 5, 6, 7, 8, 9, a, b, } +VADD/VADDQ:5:result_uint16x4 [] = { e, f, 10, 11, } +VADD/VADDQ:6:result_uint32x2 [] = { 18, 19, } +VADD/VADDQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VADD/VADDQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VADD/VADDQ:9:result_int8x16 [] = { ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, ffffffeb, ffffffec, ffffffed, ffffffee, ffffffef, fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, } +VADD/VADDQ:10:result_int16x8 [] = { ffffffdc, ffffffdd, ffffffde, ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, } +VADD/VADDQ:11:result_int32x4 [] = { ffffffd2, ffffffd3, ffffffd4, ffffffd5, } +VADD/VADDQ:12:result_int64x2 [] = { 8, 9, } +VADD/VADDQ:13:result_uint8x16 [] = { fc, fd, fe, ff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, } +VADD/VADDQ:14:result_uint16x8 [] = { fff3, fff4, fff5, fff6, fff7, fff8, fff9, fffa, } +VADD/VADDQ:15:result_uint32x4 [] = { 27, 28, 29, 2a, } +VADD/VADDQ:16:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff4, } +VADD/VADDQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +float32: +VADD/VADDQ:18:result_float32x2 [] = { 40d9999a 0x1.b33334p+2 6.8, 40d9999a 0x1.b33334p+2 6.8, } +VADD/VADDQ:19:result_float32x4 [] = { 41100000 0x1.2p+3 9, 41100000 0x1.2p+3 9, 41100000 0x1.2p+3 9, 41100000 0x1.2p+3 9, } + +VLD1_LANE/VLD1_LANEQ output: +VLD1_LANE/VLD1_LANEQ:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, ffffffaa, } +VLD1_LANE/VLD1_LANEQ:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:2:result_int32x2 [] = { aaaaaaaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:3:result_int64x1 [] = { fffffffffffffff0, } +VLD1_LANE/VLD1_LANEQ:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, f0, } +VLD1_LANE/VLD1_LANEQ:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, fff0, } +VLD1_LANE/VLD1_LANEQ:6:result_uint32x2 [] = { aaaaaaaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD1_LANE/VLD1_LANEQ:8:result_float32x2 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, c1800000 -0x1p+4 -16, } +VLD1_LANE/VLD1_LANEQ:9:result_int8x16 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, } +VLD1_LANE/VLD1_LANEQ:10:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, ffffaaaa, ffffaaaa, } +VLD1_LANE/VLD1_LANEQ:11:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, aaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:12:result_int64x2 [] = { aaaaaaaaaaaaaaaa, fffffffffffffff0, } +VLD1_LANE/VLD1_LANEQ:13:result_uint8x16 [] = { aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, f0, aa, aa, aa, } +VLD1_LANE/VLD1_LANEQ:14:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, aaaa, } +VLD1_LANE/VLD1_LANEQ:15:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, aaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:16:result_uint64x2 [] = { fffffffffffffff0, aaaaaaaaaaaaaaaa, } +VLD1_LANE/VLD1_LANEQ:17:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, c1800000 -0x1p+4 -16, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } + +VLD1_DUP/VLD1_DUPQ output: +VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, } +VLD1_DUP/VLD1_DUPQ:9:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:10:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:11:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:13:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VLD1_DUP/VLD1_DUPQ:14:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VLD1_DUP/VLD1_DUPQ:15:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VLD1_DUP/VLD1_DUPQ:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VLD1_DUP/VLD1_DUPQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, } + +VLD1_DUP/VLD1_DUPQ output: +VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, } +VLD1_DUP/VLD1_DUPQ:9:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:10:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:11:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:12:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:13:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VLD1_DUP/VLD1_DUPQ:14:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VLD1_DUP/VLD1_DUPQ:15:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VLD1_DUP/VLD1_DUPQ:16:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VLD1_DUP/VLD1_DUPQ:17:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, } + +VLD1_DUP/VLD1_DUPQ output: +VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:8:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, } +VLD1_DUP/VLD1_DUPQ:9:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:10:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:11:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:12:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:13:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VLD1_DUP/VLD1_DUPQ:14:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VLD1_DUP/VLD1_DUPQ:15:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VLD1_DUP/VLD1_DUPQ:16:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VLD1_DUP/VLD1_DUPQ:17:result_float32x4 [] = { c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, } + +VDUP/VDUPQ output: +VDUP/VDUPQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff0, } +VDUP/VDUPQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VDUP/VDUPQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, } +VDUP/VDUPQ:9:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:10:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:11:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP/VDUPQ:13:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VDUP/VDUPQ:14:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VDUP/VDUPQ:15:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP/VDUPQ:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP/VDUPQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, } + +VDUP/VDUPQ output: +VDUP/VDUPQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff1, } +VDUP/VDUPQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff1, } +VDUP/VDUPQ:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, } +VDUP/VDUPQ:9:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:10:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:11:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:12:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VDUP/VDUPQ:13:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VDUP/VDUPQ:14:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP/VDUPQ:15:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP/VDUPQ:16:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VDUP/VDUPQ:17:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, } + +VDUP/VDUPQ output: +VDUP/VDUPQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:2:result_int32x2 [] = { fffffff2, fffffff2, } +VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff2, } +VDUP/VDUPQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff2, fffffff2, } +VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VDUP/VDUPQ:8:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, } +VDUP/VDUPQ:9:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:10:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:11:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:12:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VDUP/VDUPQ:13:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VDUP/VDUPQ:14:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VDUP/VDUPQ:15:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP/VDUPQ:16:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VDUP/VDUPQ:17:result_float32x4 [] = { c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, } + +VMOV/VMOVQ output: +VMOV/VMOVQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff0, } +VMOV/VMOVQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VMOV/VMOVQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, } +VMOV/VMOVQ:9:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:10:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:11:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VMOV/VMOVQ:13:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, } +VMOV/VMOVQ:14:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VMOV/VMOVQ:15:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VMOV/VMOVQ:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VMOV/VMOVQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, } + +VMOV/VMOVQ output: +VMOV/VMOVQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff1, } +VMOV/VMOVQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff1, } +VMOV/VMOVQ:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, } +VMOV/VMOVQ:9:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:10:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:11:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:12:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VMOV/VMOVQ:13:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, } +VMOV/VMOVQ:14:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VMOV/VMOVQ:15:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VMOV/VMOVQ:16:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1, } +VMOV/VMOVQ:17:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, } + +VMOV/VMOVQ output: +VMOV/VMOVQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:2:result_int32x2 [] = { fffffff2, fffffff2, } +VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff2, } +VMOV/VMOVQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff2, fffffff2, } +VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VMOV/VMOVQ:8:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, } +VMOV/VMOVQ:9:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:10:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:11:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:12:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VMOV/VMOVQ:13:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, } +VMOV/VMOVQ:14:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2, } +VMOV/VMOVQ:15:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VMOV/VMOVQ:16:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2, } +VMOV/VMOVQ:17:result_float32x4 [] = { c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, } + +VGET_HIGH output: +VGET_HIGH:0:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VGET_HIGH:1:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VGET_HIGH:2:result_int32x2 [] = { fffffff2, fffffff3, } +VGET_HIGH:3:result_int64x1 [] = { fffffffffffffff1, } +VGET_HIGH:4:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VGET_HIGH:5:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VGET_HIGH:6:result_uint32x2 [] = { fffffff2, fffffff3, } +VGET_HIGH:7:result_uint64x1 [] = { fffffffffffffff1, } +VGET_HIGH:8:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } +VGET_HIGH:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_HIGH:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_HIGH:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_HIGH:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_HIGH:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_HIGH:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_HIGH:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_HIGH:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_HIGH:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VGET_LOW output: +VGET_LOW:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VGET_LOW:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VGET_LOW:2:result_int32x2 [] = { fffffff0, fffffff1, } +VGET_LOW:3:result_int64x1 [] = { fffffffffffffff0, } +VGET_LOW:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VGET_LOW:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VGET_LOW:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VGET_LOW:7:result_uint64x1 [] = { fffffffffffffff0, } +VGET_LOW:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VGET_LOW:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_LOW:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_LOW:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_LOW:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_LOW:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VGET_LOW:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VGET_LOW:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VGET_LOW:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VGET_LOW:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLAL_LANE overflow output: +VQDMLAL_LANE:0:vqdmlal_lane_s16 Neon overflow 0 +VQDMLAL_LANE:1:vqdmlal_lane_s32 Neon overflow 0 + +VQDMLAL_LANE output: +VQDMLAL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:5:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLAL_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:13:result_int32x4 [] = { 7c1e, 7c1f, 7c20, 7c21, } +VQDMLAL_LANE:14:result_int64x2 [] = { 7c1e, 7c1f, } +VQDMLAL_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_LANE:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLAL_LANE (mul with input=0) overflow output: +VQDMLAL_LANE:20:vqdmlal_lane_s16 Neon overflow 0 +VQDMLAL_LANE:21:vqdmlal_lane_s32 Neon overflow 0 + +VQDMLAL_LANE (mul with input=0) output: +VQDMLAL_LANE:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:23:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:24:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:25:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:28:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:29:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLAL_LANE:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:33:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VQDMLAL_LANE:34:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQDMLAL_LANE:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_LANE:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLAL_LANE (check mul overflow) overflow output: +VQDMLAL_LANE:40:vqdmlal_lane_s16 Neon overflow 1 +VQDMLAL_LANE:41:vqdmlal_lane_s32 Neon overflow 1 + +VQDMLAL_LANE (check mul overflow) output: +VQDMLAL_LANE:42:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:43:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:44:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:45:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:46:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:47:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:48:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_LANE:49:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_LANE:50:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLAL_LANE:51:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:52:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:53:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2, } +VQDMLAL_LANE:54:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0, } +VQDMLAL_LANE:55:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_LANE:56:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_LANE:57:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_LANE:58:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_LANE:59:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLSL_LANE overflow output: +VQDMLSL_LANE:0:vqdmlsl_lane_s16 Neon overflow 0 +VQDMLSL_LANE:1:vqdmlsl_lane_s32 Neon overflow 0 + +VQDMLSL_LANE output: +VQDMLSL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:5:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLSL_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:13:result_int32x4 [] = { ffff83c2, ffff83c3, ffff83c4, ffff83c5, } +VQDMLSL_LANE:14:result_int64x2 [] = { ffffffffffff83c2, ffffffffffff83c3, } +VQDMLSL_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_LANE:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLSL_LANE (mul with input=0) overflow output: +VQDMLSL_LANE:20:vqdmlsl_lane_s16 Neon overflow 0 +VQDMLSL_LANE:21:vqdmlsl_lane_s32 Neon overflow 0 + +VQDMLSL_LANE (mul with input=0) output: +VQDMLSL_LANE:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:23:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:24:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:25:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:28:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:29:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLSL_LANE:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:33:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VQDMLSL_LANE:34:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQDMLSL_LANE:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_LANE:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLSL_LANE (check mul overflow) overflow output: +VQDMLSL_LANE:40:vqdmlsl_lane_s16 Neon overflow 1 +VQDMLSL_LANE:41:vqdmlsl_lane_s32 Neon overflow 1 + +VQDMLSL_LANE (check mul overflow) output: +VQDMLSL_LANE:42:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:43:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:44:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:45:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:46:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:47:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:48:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_LANE:49:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_LANE:50:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLSL_LANE:51:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:52:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:53:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQDMLSL_LANE:54:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQDMLSL_LANE:55:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_LANE:56:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_LANE:57:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_LANE:58:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_LANE:59:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLAL_N overflow output: +VQDMLAL_N:0:vqdmlal_n_s16 Neon overflow 0 +VQDMLAL_N:1:vqdmlal_n_s32 Neon overflow 0 + +VQDMLAL_N output: +VQDMLAL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:5:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_N:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_N:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLAL_N:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:13:result_int32x4 [] = { 1684, 1685, 1686, 1687, } +VQDMLAL_N:14:result_int64x2 [] = { 21ce, 21cf, } +VQDMLAL_N:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_N:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_N:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLAL_N (check mul overflow) overflow output: +VQDMLAL_N:20:vqdmlal_n_s16 Neon overflow 1 +VQDMLAL_N:21:vqdmlal_n_s32 Neon overflow 1 + +VQDMLAL_N (check mul overflow) output: +VQDMLAL_N:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:23:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:24:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:25:result_int64x1 [] = { 3333333333333333, } +VQDMLAL_N:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL_N:28:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL_N:29:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL_N:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLAL_N:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:33:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2, } +VQDMLAL_N:34:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0, } +VQDMLAL_N:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL_N:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL_N:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL_N:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL_N:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLSL_N overflow output: +VQDMLSL_N:0:vqdmlsl_n_s16 Neon overflow 0 +VQDMLSL_N:1:vqdmlsl_n_s32 Neon overflow 0 + +VQDMLSL_N output: +VQDMLSL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:5:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_N:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_N:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLSL_N:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:13:result_int32x4 [] = { ffffe95c, ffffe95d, ffffe95e, ffffe95f, } +VQDMLSL_N:14:result_int64x2 [] = { ffffffffffffde12, ffffffffffffde13, } +VQDMLSL_N:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_N:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_N:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLSL_N (check mul overflow) overflow output: +VQDMLSL_N:20:vqdmlsl_n_s16 Neon overflow 1 +VQDMLSL_N:21:vqdmlsl_n_s32 Neon overflow 1 + +VQDMLSL_N (check mul overflow) output: +VQDMLSL_N:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:23:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:24:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:25:result_int64x1 [] = { 3333333333333333, } +VQDMLSL_N:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL_N:28:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL_N:29:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL_N:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLSL_N:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:33:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQDMLSL_N:34:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQDMLSL_N:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL_N:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL_N:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL_N:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL_N:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VEXT/VEXTQ output: +VEXT/VEXTQ:0:result_int8x8 [] = { fffffff7, 11, 11, 11, 11, 11, 11, 11, } +VEXT/VEXTQ:1:result_int16x4 [] = { fffffff3, 22, 22, 22, } +VEXT/VEXTQ:2:result_int32x2 [] = { fffffff1, 33, } +VEXT/VEXTQ:3:result_int64x1 [] = { fffffffffffffff0, } +VEXT/VEXTQ:4:result_uint8x8 [] = { f6, f7, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:5:result_uint16x4 [] = { fff2, fff3, 66, 66, } +VEXT/VEXTQ:6:result_uint32x2 [] = { fffffff1, 77, } +VEXT/VEXTQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VEXT/VEXTQ:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, 42066666 0x1.0cccccp+5 33.6, } +VEXT/VEXTQ:9:result_int8x16 [] = { fffffffe, ffffffff, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, } +VEXT/VEXTQ:10:result_int16x8 [] = { fffffff7, 22, 22, 22, 22, 22, 22, 22, } +VEXT/VEXTQ:11:result_int32x4 [] = { fffffff3, 33, 33, 33, } +VEXT/VEXTQ:12:result_int64x2 [] = { fffffffffffffff1, 44, } +VEXT/VEXTQ:13:result_uint8x16 [] = { fc, fd, fe, ff, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, } +VEXT/VEXTQ:14:result_uint16x8 [] = { fff6, fff7, 66, 66, 66, 66, 66, 66, } +VEXT/VEXTQ:15:result_uint32x4 [] = { fffffff3, 77, 77, 77, } +VEXT/VEXTQ:16:result_uint64x2 [] = { fffffffffffffff1, 88, } +VEXT/VEXTQ:17:result_float32x4 [] = { c1500000 -0x1.ap+3 -13, 4204cccd 0x1.09999ap+5 33.2, 4204cccd 0x1.09999ap+5 33.2, 4204cccd 0x1.09999ap+5 33.2, } + +VSHR_N output: +VSHR_N:0:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VSHR_N:1:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VSHR_N:2:result_int32x2 [] = { fffffffc, fffffffc, } +VSHR_N:3:result_int64x1 [] = { ffffffffffffffff, } +VSHR_N:4:result_uint8x8 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, } +VSHR_N:5:result_uint16x4 [] = { 1ffe, 1ffe, 1ffe, 1ffe, } +VSHR_N:6:result_uint32x2 [] = { 7ffffff, 7ffffff, } +VSHR_N:7:result_uint64x1 [] = { 7fffffff, } +VSHR_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSHR_N:9:result_int8x16 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, fffffffc, fffffffd, fffffffd, fffffffe, fffffffe, ffffffff, ffffffff, } +VSHR_N:10:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VSHR_N:11:result_int32x4 [] = { fffffffc, fffffffc, fffffffc, fffffffc, } +VSHR_N:12:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VSHR_N:13:result_uint8x16 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, } +VSHR_N:14:result_uint16x8 [] = { 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, } +VSHR_N:15:result_uint32x4 [] = { 7ffffff, 7ffffff, 7ffffff, 7ffffff, } +VSHR_N:16:result_uint64x2 [] = { 7fffffff, 7fffffff, } +VSHR_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VSHRN_N output: +VSHRN_N:0:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VSHRN_N:1:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9, } +VSHRN_N:2:result_int32x2 [] = { fffffffc, fffffffc, } +VSHRN_N:3:result_int64x1 [] = { 3333333333333333, } +VSHRN_N:4:result_uint8x8 [] = { fc, fc, fc, fc, fd, fd, fd, fd, } +VSHRN_N:5:result_uint16x4 [] = { fffe, fffe, fffe, fffe, } +VSHRN_N:6:result_uint32x2 [] = { fffffffe, fffffffe, } +VSHRN_N:7:result_uint64x1 [] = { 3333333333333333, } +VSHRN_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSHRN_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHRN_N:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHRN_N:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VSHRN_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHRN_N:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSHRN_N:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSHRN_N:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VSHRN_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHRN_N (with input = 0) output: +VRSHRN_N:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:1:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:2:result_int32x2 [] = { 0, 0, } +VRSHRN_N:3:result_int64x1 [] = { 3333333333333333, } +VRSHRN_N:4:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:5:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:6:result_uint32x2 [] = { 0, 0, } +VRSHRN_N:7:result_uint64x1 [] = { 3333333333333333, } +VRSHRN_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHRN_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHRN_N output: +VRSHRN_N:18:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VRSHRN_N:19:result_int16x4 [] = { fffffff8, fffffff9, fffffff9, fffffffa, } +VRSHRN_N:20:result_int32x2 [] = { fffffffc, fffffffc, } +VRSHRN_N:21:result_int64x1 [] = { 3333333333333333, } +VRSHRN_N:22:result_uint8x8 [] = { fc, fc, fd, fd, fd, fd, fe, fe, } +VRSHRN_N:23:result_uint16x4 [] = { fffe, fffe, fffe, fffe, } +VRSHRN_N:24:result_uint32x2 [] = { fffffffe, fffffffe, } +VRSHRN_N:25:result_uint64x1 [] = { 3333333333333333, } +VRSHRN_N:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHRN_N:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:28:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:29:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:32:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:33:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHRN_N (with large shift amount) output: +VRSHRN_N:36:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:37:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:38:result_int32x2 [] = { 0, 0, } +VRSHRN_N:39:result_int64x1 [] = { 3333333333333333, } +VRSHRN_N:40:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHRN_N:41:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHRN_N:42:result_uint32x2 [] = { 0, 0, } +VRSHRN_N:43:result_uint64x1 [] = { 3333333333333333, } +VRSHRN_N:44:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHRN_N:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:46:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:47:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSHRN_N:50:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSHRN_N:51:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSHRN_N:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSHRN_N:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHRN_N overflow output: +VQRSHRN_N:0:vqrshrn_n_s16 Neon overflow 0 +VQRSHRN_N:1:vqrshrn_n_s32 Neon overflow 0 +VQRSHRN_N:2:vqrshrn_n_s64 Neon overflow 0 +VQRSHRN_N:3:vqrshrn_n_u16 Neon overflow 1 +VQRSHRN_N:4:vqrshrn_n_u32 Neon overflow 1 +VQRSHRN_N:5:vqrshrn_n_u64 Neon overflow 1 + +VQRSHRN_N output: +VQRSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VQRSHRN_N:7:result_int16x4 [] = { fffffff8, fffffff9, fffffff9, fffffffa, } +VQRSHRN_N:8:result_int32x2 [] = { fffffffc, fffffffc, } +VQRSHRN_N:9:result_int64x1 [] = { 3333333333333333, } +VQRSHRN_N:10:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRN_N:11:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRN_N:12:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRN_N:13:result_uint64x1 [] = { 3333333333333333, } +VQRSHRN_N:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHRN_N:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:16:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:17:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHRN_N (check saturation: shift by 3) overflow output: +VQRSHRN_N:24:vqrshrn_n_s16 Neon overflow 1 +VQRSHRN_N:25:vqrshrn_n_s32 Neon overflow 1 +VQRSHRN_N:26:vqrshrn_n_s64 Neon overflow 1 +VQRSHRN_N:27:vqrshrn_n_u16 Neon overflow 1 +VQRSHRN_N:28:vqrshrn_n_u32 Neon overflow 1 +VQRSHRN_N:29:vqrshrn_n_u64 Neon overflow 1 + +VQRSHRN_N (check saturation: shift by 3) output: +VQRSHRN_N:30:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHRN_N:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRSHRN_N:32:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRSHRN_N:33:result_int64x1 [] = { 3333333333333333, } +VQRSHRN_N:34:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRN_N:35:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRN_N:36:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRN_N:37:result_uint64x1 [] = { 3333333333333333, } +VQRSHRN_N:38:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHRN_N:39:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:40:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:41:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:42:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:43:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:44:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:45:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:46:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:47:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHRN_N (check saturation: shift by max) overflow output: +VQRSHRN_N:48:vqrshrn_n_s16 Neon overflow 1 +VQRSHRN_N:49:vqrshrn_n_s32 Neon overflow 1 +VQRSHRN_N:50:vqrshrn_n_s64 Neon overflow 1 +VQRSHRN_N:51:vqrshrn_n_u16 Neon overflow 1 +VQRSHRN_N:52:vqrshrn_n_u32 Neon overflow 1 +VQRSHRN_N:53:vqrshrn_n_u64 Neon overflow 1 + +VQRSHRN_N (check saturation: shift by max) output: +VQRSHRN_N:54:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQRSHRN_N:55:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRSHRN_N:56:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRSHRN_N:57:result_int64x1 [] = { 3333333333333333, } +VQRSHRN_N:58:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRN_N:59:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRN_N:60:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRN_N:61:result_uint64x1 [] = { 3333333333333333, } +VQRSHRN_N:62:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHRN_N:63:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:64:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:65:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:66:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:67:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRN_N:68:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRN_N:69:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRN_N:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRN_N:71:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VSET_LANE/VSET_LANEQ output: +VSET_LANE/VSET_LANEQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, 11, } +VSET_LANE/VSET_LANEQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, 22, } +VSET_LANE/VSET_LANEQ:2:result_int32x2 [] = { fffffff0, 33, } +VSET_LANE/VSET_LANEQ:3:result_int64x1 [] = { 44, } +VSET_LANE/VSET_LANEQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, 55, f7, } +VSET_LANE/VSET_LANEQ:5:result_uint16x4 [] = { fff0, fff1, 66, fff3, } +VSET_LANE/VSET_LANEQ:6:result_uint32x2 [] = { fffffff0, 77, } +VSET_LANE/VSET_LANEQ:7:result_uint64x1 [] = { 88, } +VSET_LANE/VSET_LANEQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, 4204cccd 0x1.09999ap+5 33.2, } +VSET_LANE/VSET_LANEQ:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffff99, } +VSET_LANE/VSET_LANEQ:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, aa, fffffff6, fffffff7, } +VSET_LANE/VSET_LANEQ:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, bb, } +VSET_LANE/VSET_LANEQ:12:result_int64x2 [] = { fffffffffffffff0, cc, } +VSET_LANE/VSET_LANEQ:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, dd, ff, } +VSET_LANE/VSET_LANEQ:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, ee, fff7, } +VSET_LANE/VSET_LANEQ:15:result_uint32x4 [] = { fffffff0, fffffff1, ff, fffffff3, } +VSET_LANE/VSET_LANEQ:16:result_uint64x2 [] = { fffffffffffffff0, 11, } +VSET_LANE/VSET_LANEQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, 41333333 0x1.666666p+3 11.2, } + +VGET_LANE/VGETQ_LANE output: +fffffff7, fffffff3, fffffff1, fffffffffffffff0, f6, fff2, fffffff1, fffffffffffffff0, c1700000, ffffffff, fffffff5, fffffff3, fffffffffffffff1, fe, fff6, fffffff2, fffffffffffffff1, c1500000, + +VQSUB/VQSUBQ overflow output: +VQSUB/VQSUBQ:0:vqsub_s8 Neon overflow 0 +VQSUB/VQSUBQ:1:vqsub_s16 Neon overflow 0 +VQSUB/VQSUBQ:2:vqsub_s32 Neon overflow 0 +VQSUB/VQSUBQ:3:vqsub_s64 Neon overflow 0 +VQSUB/VQSUBQ:4:vqsub_u8 Neon overflow 0 +VQSUB/VQSUBQ:5:vqsub_u16 Neon overflow 0 +VQSUB/VQSUBQ:6:vqsub_u32 Neon overflow 0 +VQSUB/VQSUBQ:7:vqsub_u64 Neon overflow 0 +VQSUB/VQSUBQ:8:vqsubq_s8 Neon overflow 0 +VQSUB/VQSUBQ:9:vqsubq_s16 Neon overflow 0 +VQSUB/VQSUBQ:10:vqsubq_s32 Neon overflow 0 +VQSUB/VQSUBQ:11:vqsubq_s64 Neon overflow 0 +VQSUB/VQSUBQ:12:vqsubq_u8 Neon overflow 0 +VQSUB/VQSUBQ:13:vqsubq_u16 Neon overflow 0 +VQSUB/VQSUBQ:14:vqsubq_u32 Neon overflow 0 +VQSUB/VQSUBQ:15:vqsubq_u64 Neon overflow 0 + +VQSUB/VQSUBQ output: +VQSUB/VQSUBQ:16:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, } +VQSUB/VQSUBQ:17:result_int16x4 [] = { ffffffce, ffffffcf, ffffffd0, ffffffd1, } +VQSUB/VQSUBQ:18:result_int32x2 [] = { ffffffbd, ffffffbe, } +VQSUB/VQSUBQ:19:result_int64x1 [] = { ffffffffffffffac, } +VQSUB/VQSUBQ:20:result_uint8x8 [] = { 9b, 9c, 9d, 9e, 9f, a0, a1, a2, } +VQSUB/VQSUBQ:21:result_uint16x4 [] = { ff8a, ff8b, ff8c, ff8d, } +VQSUB/VQSUBQ:22:result_uint32x2 [] = { ffffff79, ffffff7a, } +VQSUB/VQSUBQ:23:result_uint64x1 [] = { ffffffffffffff68, } +VQSUB/VQSUBQ:24:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSUB/VQSUBQ:25:result_int8x16 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, ffffffeb, ffffffec, ffffffed, ffffffee, } +VQSUB/VQSUBQ:26:result_int16x8 [] = { ffffffce, ffffffcf, ffffffd0, ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, } +VQSUB/VQSUBQ:27:result_int32x4 [] = { ffffffbd, ffffffbe, ffffffbf, ffffffc0, } +VQSUB/VQSUBQ:28:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad, } +VQSUB/VQSUBQ:29:result_uint8x16 [] = { 9b, 9c, 9d, 9e, 9f, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, } +VQSUB/VQSUBQ:30:result_uint16x8 [] = { ff8a, ff8b, ff8c, ff8d, ff8e, ff8f, ff90, ff91, } +VQSUB/VQSUBQ:31:result_uint32x4 [] = { ffffff79, ffffff7a, ffffff7b, ffffff7c, } +VQSUB/VQSUBQ:32:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69, } +VQSUB/VQSUBQ:33:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSUB/VQSUBQ 64 bits saturation overflow output: +VQSUB/VQSUBQ:34:vqsub_s64 Neon overflow 0 +VQSUB/VQSUBQ:35:vqsub_u64 Neon overflow 0 +VQSUB/VQSUBQ:36:vqsubq_s64 Neon overflow 0 +VQSUB/VQSUBQ:37:vqsubq_u64 Neon overflow 0 + +64 bits saturation: +VQSUB/VQSUBQ:38:result_int64x1 [] = { fffffffffffffff0, } +VQSUB/VQSUBQ:39:result_uint64x1 [] = { fffffffffffffff0, } +VQSUB/VQSUBQ:40:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQSUB/VQSUBQ:41:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } + +VQSUB/VQSUBQ 64 bits saturation overflow output: +VQSUB/VQSUBQ:42:vqsub_s64 Neon overflow 0 +VQSUB/VQSUBQ:43:vqsub_u64 Neon overflow 0 +VQSUB/VQSUBQ:44:vqsubq_s64 Neon overflow 0 +VQSUB/VQSUBQ:45:vqsubq_u64 Neon overflow 0 +VQSUB/VQSUBQ:46:result_int64x1 [] = { ffffffffffffffac, } +VQSUB/VQSUBQ:47:result_uint64x1 [] = { ffffffffffffff68, } +VQSUB/VQSUBQ:48:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad, } +VQSUB/VQSUBQ:49:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69, } + +VQSUB/VQSUBQ 64 bits saturation overflow output: +VQSUB/VQSUBQ:50:vqsub_s64 Neon overflow 1 +VQSUB/VQSUBQ:51:vqsub_u64 Neon overflow 1 +VQSUB/VQSUBQ:52:vqsubq_s64 Neon overflow 1 +VQSUB/VQSUBQ:53:vqsubq_u64 Neon overflow 1 +VQSUB/VQSUBQ:54:result_int64x1 [] = { 8000000000000000, } +VQSUB/VQSUBQ:55:result_uint64x1 [] = { 0, } +VQSUB/VQSUBQ:56:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSUB/VQSUBQ:57:result_uint64x2 [] = { 0, 0, } + +less than 64 bits saturation: +VQSUB/VQSUBQ:58:vqsub_s8 Neon overflow 1 +VQSUB/VQSUBQ:59:vqsub_s16 Neon overflow 1 +VQSUB/VQSUBQ:60:vqsub_s32 Neon overflow 1 +VQSUB/VQSUBQ:61:vqsubq_s8 Neon overflow 1 +VQSUB/VQSUBQ:62:vqsubq_s16 Neon overflow 1 +VQSUB/VQSUBQ:63:vqsubq_s32 Neon overflow 1 +VQSUB/VQSUBQ:64:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSUB/VQSUBQ:65:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQSUB/VQSUBQ:66:result_int32x2 [] = { 80000000, 80000000, } +VQSUB/VQSUBQ:67:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSUB/VQSUBQ:68:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQSUB/VQSUBQ:69:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VQSUB/VQSUBQ less than 64 bits saturation overflow output: +VQSUB/VQSUBQ:70:vqsub_u8 Neon overflow 1 +VQSUB/VQSUBQ:71:vqsub_u16 Neon overflow 1 +VQSUB/VQSUBQ:72:vqsub_u32 Neon overflow 1 +VQSUB/VQSUBQ:73:vqsubq_u8 Neon overflow 1 +VQSUB/VQSUBQ:74:vqsubq_u16 Neon overflow 1 +VQSUB/VQSUBQ:75:vqsubq_u32 Neon overflow 1 +VQSUB/VQSUBQ:76:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSUB/VQSUBQ:77:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSUB/VQSUBQ:78:result_uint32x2 [] = { 0, 0, } +VQSUB/VQSUBQ:79:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSUB/VQSUBQ:80:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSUB/VQSUBQ:81:result_uint32x4 [] = { 0, 0, 0, 0, } + +VQDMULH overflow output: +VQDMULH:0:vqdmulh_s16 Neon overflow 0 +VQDMULH:1:vqdmulh_s32 Neon overflow 0 +VQDMULH:2:vqdmulhq_s16 Neon overflow 0 +VQDMULH:3:vqdmulhq_s32 Neon overflow 0 + +VQDMULH output: +VQDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:5:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH:6:result_int32x2 [] = { ffffffff, ffffffff, } +VQDMULH:7:result_int64x1 [] = { 3333333333333333, } +VQDMULH:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:10:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH:11:result_uint64x1 [] = { 3333333333333333, } +VQDMULH:12:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMULH:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:14:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH:15:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH:16:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:21:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMULH overflow output: +VQDMULH:22:vqdmulh_s16 Neon overflow 1 +VQDMULH:23:vqdmulh_s32 Neon overflow 1 +VQDMULH:24:vqdmulhq_s16 Neon overflow 1 +VQDMULH:25:vqdmulhq_s32 Neon overflow 1 + +VQDMULH output: +VQDMULH:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:27:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQDMULH:28:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQDMULH:29:result_int64x1 [] = { 3333333333333333, } +VQDMULH:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH:32:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH:33:result_uint64x1 [] = { 3333333333333333, } +VQDMULH:34:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMULH:35:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:36:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQDMULH:37:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULH:38:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:39:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH:40:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH:41:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH:42:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH:43:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMULH_LANE overflow output: +VQDMULH_LANE:0:vqdmulh_lane_s16 Neon overflow 0 +VQDMULH_LANE:1:vqdmulh_lane_s32 Neon overflow 0 +VQDMULH_LANE:2:vqdmulhq_lane_s16 Neon overflow 0 +VQDMULH_LANE:3:vqdmulhq_lane_s32 Neon overflow 0 + +VQDMULH_LANE output: +VQDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:5:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH_LANE:6:result_int32x2 [] = { ffffffff, ffffffff, } +VQDMULH_LANE:7:result_int64x1 [] = { 3333333333333333, } +VQDMULH_LANE:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:10:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:11:result_uint64x1 [] = { 3333333333333333, } +VQDMULH_LANE:12:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMULH_LANE:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:14:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH_LANE:15:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQDMULH_LANE:16:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_LANE:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:21:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMULH_LANE (check mul overflow) overflow output: +VQDMULH_LANE:22:vqdmulh_lane_s16 Neon overflow 1 +VQDMULH_LANE:23:vqdmulh_lane_s32 Neon overflow 1 +VQDMULH_LANE:24:vqdmulhq_lane_s16 Neon overflow 1 +VQDMULH_LANE:25:vqdmulhq_lane_s32 Neon overflow 1 + +VQDMULH_LANE (check mul overflow) output: +VQDMULH_LANE:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:27:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQDMULH_LANE:28:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQDMULH_LANE:29:result_int64x1 [] = { 3333333333333333, } +VQDMULH_LANE:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_LANE:32:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH_LANE:33:result_uint64x1 [] = { 3333333333333333, } +VQDMULH_LANE:34:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMULH_LANE:35:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:36:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQDMULH_LANE:37:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULH_LANE:38:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:39:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_LANE:40:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_LANE:41:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_LANE:42:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_LANE:43:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMULH_N overflow output: +VQDMULH_N:0:vqdmulh_n_s16 Neon overflow 0 +VQDMULH_N:1:vqdmulh_n_s32 Neon overflow 0 +VQDMULH_N:2:vqdmulhq_n_s16 Neon overflow 0 +VQDMULH_N:3:vqdmulhq_n_s32 Neon overflow 0 + +VQDMULH_N output: +VQDMULH_N:4:result_int16x4 [] = { 19, 19, 19, 19, } +VQDMULH_N:5:result_int32x2 [] = { 4, 4, } +VQDMULH_N:6:result_int16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VQDMULH_N:7:result_int32x4 [] = { a, a, a, a, } + +VQDMULH_N (check mul overflow) overflow output: +VQDMULH_N:8:vqdmulh_n_s16 Neon overflow 1 +VQDMULH_N:9:vqdmulh_n_s32 Neon overflow 1 +VQDMULH_N:10:vqdmulhq_n_s16 Neon overflow 1 +VQDMULH_N:11:vqdmulhq_n_s32 Neon overflow 1 + +VQDMULH_N (check mul overflow) output: +VQDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQDMULH_N:15:result_int64x1 [] = { 3333333333333333, } +VQDMULH_N:16:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:17:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULH_N:18:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULH_N:19:result_uint64x1 [] = { 3333333333333333, } +VQDMULH_N:20:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMULH_N:21:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:22:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQDMULH_N:23:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULH_N:24:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_N:25:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULH_N:26:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULH_N:27:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULH_N:28:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULH_N:29:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMULL overflow output: +VQDMULL:0:vqdmull_s16 Neon overflow 0 +VQDMULL:1:vqdmull_s32 Neon overflow 0 + +VQDMULL output: +VQDMULL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMULL:5:result_int64x1 [] = { 3333333333333333, } +VQDMULL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULL:9:result_uint64x1 [] = { 3333333333333333, } +VQDMULL:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMULL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:13:result_int32x4 [] = { 200, 1c2, 188, 152, } +VQDMULL:14:result_int64x2 [] = { 200, 1c2, } +VQDMULL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULL:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULL:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMULL (check mul overflow) overflow output: +VQDMULL:20:vqdmull_s16 Neon overflow 1 +VQDMULL:21:vqdmull_s32 Neon overflow 1 + +VQDMULL (check mul overflow) output: +VQDMULL:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:23:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:24:result_int32x2 [] = { 33333333, 33333333, } +VQDMULL:25:result_int64x1 [] = { 3333333333333333, } +VQDMULL:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMULL:28:result_uint32x2 [] = { 33333333, 33333333, } +VQDMULL:29:result_uint64x1 [] = { 3333333333333333, } +VQDMULL:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMULL:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:33:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULL:34:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQDMULL:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMULL:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMULL:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMULL:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMULL:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLAL overflow output: +VQDMLAL:0:vqdmlal_s16 Neon overflow 0 +VQDMLAL:1:vqdmlal_s32 Neon overflow 0 + +VQDMLAL output: +VQDMLAL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL:5:result_int64x1 [] = { 3333333333333333, } +VQDMLAL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLAL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:13:result_int32x4 [] = { 7c1e, 7c1f, 7c20, 7c21, } +VQDMLAL:14:result_int64x2 [] = { 7c1e, 7c1f, } +VQDMLAL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLAL (check mul overflow) overflow output: +VQDMLAL:20:vqdmlal_s16 Neon overflow 1 +VQDMLAL:21:vqdmlal_s32 Neon overflow 1 + +VQDMLAL (check mul overflow) output: +VQDMLAL:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:23:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:24:result_int32x2 [] = { 33333333, 33333333, } +VQDMLAL:25:result_int64x1 [] = { 3333333333333333, } +VQDMLAL:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLAL:28:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLAL:29:result_uint64x1 [] = { 3333333333333333, } +VQDMLAL:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLAL:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:33:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2, } +VQDMLAL:34:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0, } +VQDMLAL:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLAL:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLAL:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLAL:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLAL:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLSL overflow output: +VQDMLSL:0:vqdmlsl_s16 Neon overflow 0 +VQDMLSL:1:vqdmlsl_s32 Neon overflow 0 + +VQDMLSL output: +VQDMLSL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:4:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL:5:result_int64x1 [] = { 3333333333333333, } +VQDMLSL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:8:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL:9:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLSL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:13:result_int32x4 [] = { ffff83c2, ffff83c3, ffff83c4, ffff83c5, } +VQDMLSL:14:result_int64x2 [] = { ffffffffffff83c2, ffffffffffff83c3, } +VQDMLSL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQDMLSL (check mul overflow) overflow output: +VQDMLSL:20:vqdmlsl_s16 Neon overflow 1 +VQDMLSL:21:vqdmlsl_s32 Neon overflow 1 + +VQDMLSL (check mul overflow) output: +VQDMLSL:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:23:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:24:result_int32x2 [] = { 33333333, 33333333, } +VQDMLSL:25:result_int64x1 [] = { 3333333333333333, } +VQDMLSL:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQDMLSL:28:result_uint32x2 [] = { 33333333, 33333333, } +VQDMLSL:29:result_uint64x1 [] = { 3333333333333333, } +VQDMLSL:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQDMLSL:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:33:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQDMLSL:34:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQDMLSL:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQDMLSL:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQDMLSL:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQDMLSL:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQDMLSL:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VCEQ/VCEQQ output: +VCEQ/VCEQQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, ff, 0, } +VCEQ/VCEQQ:1:result_uint16x4 [] = { 0, 0, ffff, 0, } +VCEQ/VCEQQ:2:result_uint32x2 [] = { ffffffff, 0, } +VCEQ/VCEQQ:3:result_uint8x8 [] = { 0, 0, 0, ff, 0, 0, 0, 0, } +VCEQ/VCEQQ:4:result_uint16x4 [] = { 0, 0, ffff, 0, } +VCEQ/VCEQQ:5:result_uint32x2 [] = { 0, ffffffff, } +VCEQ/VCEQQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, 0, 0, 0, } +VCEQ/VCEQQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, 0, } +VCEQ/VCEQQ:8:result_uint32x4 [] = { 0, 0, ffffffff, 0, } +VCEQ/VCEQQ:9:result_uint8x16 [] = { 0, 0, 0, 0, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCEQ/VCEQQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, 0, } +VCEQ/VCEQQ:11:result_uint32x4 [] = { 0, 0, ffffffff, 0, } +VCEQ/VCEQQ:12:result_uint32x2 [] = { 0, ffffffff, } +VCEQ/VCEQQ:13:result_uint32x4 [] = { 0, 0, ffffffff, 0, } +VCEQ/VCEQQ:14:result_uint32x2 [] = { ffffffff, 0, } +VCEQ/VCEQQ:15:result_uint32x2 [] = { 0, ffffffff, } +VCEQ/VCEQQ:16:result_uint32x2 [] = { ffffffff, 0, } + +VCGE/VCGEQ output: +VCGE/VCGEQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, ff, ff, } +VCGE/VCGEQ:1:result_uint16x4 [] = { 0, 0, ffff, ffff, } +VCGE/VCGEQ:2:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ:3:result_uint8x8 [] = { 0, 0, 0, ff, ff, ff, ff, ff, } +VCGE/VCGEQ:4:result_uint16x4 [] = { 0, 0, ffff, ffff, } +VCGE/VCGEQ:5:result_uint32x2 [] = { 0, ffffffff, } +VCGE/VCGEQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, ff, ff, ff, } +VCGE/VCGEQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, ffff, } +VCGE/VCGEQ:8:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } +VCGE/VCGEQ:9:result_uint8x16 [] = { 0, 0, 0, 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VCGE/VCGEQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, ffff, } +VCGE/VCGEQ:11:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } +VCGE/VCGEQ:12:result_uint32x2 [] = { 0, ffffffff, } +VCGE/VCGEQ:13:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } +VCGE/VCGEQ:14:result_uint32x2 [] = { ffffffff, ffffffff, } +VCGE/VCGEQ:15:result_uint32x2 [] = { 0, ffffffff, } +VCGE/VCGEQ:16:result_uint32x2 [] = { ffffffff, ffffffff, } + +VCLE/VCLEQ output: +VCLE/VCLEQ:0:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, 0, } +VCLE/VCLEQ:1:result_uint16x4 [] = { ffff, ffff, ffff, 0, } +VCLE/VCLEQ:2:result_uint32x2 [] = { ffffffff, 0, } +VCLE/VCLEQ:3:result_uint8x8 [] = { ff, ff, ff, ff, 0, 0, 0, 0, } +VCLE/VCLEQ:4:result_uint16x4 [] = { ffff, ffff, ffff, 0, } +VCLE/VCLEQ:5:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ:6:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, 0, 0, 0, } +VCLE/VCLEQ:7:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, 0, } +VCLE/VCLEQ:8:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } +VCLE/VCLEQ:9:result_uint8x16 [] = { ff, ff, ff, ff, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCLE/VCLEQ:10:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, 0, } +VCLE/VCLEQ:11:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } +VCLE/VCLEQ:12:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ:13:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } +VCLE/VCLEQ:14:result_uint32x2 [] = { ffffffff, 0, } +VCLE/VCLEQ:15:result_uint32x2 [] = { ffffffff, ffffffff, } +VCLE/VCLEQ:16:result_uint32x2 [] = { ffffffff, 0, } + +VCGT/VCGTQ output: +VCGT/VCGTQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, ff, } +VCGT/VCGTQ:1:result_uint16x4 [] = { 0, 0, 0, ffff, } +VCGT/VCGTQ:2:result_uint32x2 [] = { 0, ffffffff, } +VCGT/VCGTQ:3:result_uint8x8 [] = { 0, 0, 0, 0, ff, ff, ff, ff, } +VCGT/VCGTQ:4:result_uint16x4 [] = { 0, 0, 0, ffff, } +VCGT/VCGTQ:5:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, ff, ff, } +VCGT/VCGTQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, ffff, } +VCGT/VCGTQ:8:result_uint32x4 [] = { 0, 0, 0, ffffffff, } +VCGT/VCGTQ:9:result_uint8x16 [] = { 0, 0, 0, 0, 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VCGT/VCGTQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, ffff, } +VCGT/VCGTQ:11:result_uint32x4 [] = { 0, 0, 0, ffffffff, } +VCGT/VCGTQ:12:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ:13:result_uint32x4 [] = { 0, 0, 0, ffffffff, } +VCGT/VCGTQ:14:result_uint32x2 [] = { 0, ffffffff, } +VCGT/VCGTQ:15:result_uint32x2 [] = { 0, 0, } +VCGT/VCGTQ:16:result_uint32x2 [] = { 0, ffffffff, } + +VCLT/VCLTQ output: +VCLT/VCLTQ:0:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, 0, 0, } +VCLT/VCLTQ:1:result_uint16x4 [] = { ffff, ffff, 0, 0, } +VCLT/VCLTQ:2:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ:3:result_uint8x8 [] = { ff, ff, ff, 0, 0, 0, 0, 0, } +VCLT/VCLTQ:4:result_uint16x4 [] = { ffff, ffff, 0, 0, } +VCLT/VCLTQ:5:result_uint32x2 [] = { ffffffff, 0, } +VCLT/VCLTQ:6:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, 0, 0, 0, 0, } +VCLT/VCLTQ:7:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, 0, 0, } +VCLT/VCLTQ:8:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } +VCLT/VCLTQ:9:result_uint8x16 [] = { ff, ff, ff, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCLT/VCLTQ:10:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, 0, 0, } +VCLT/VCLTQ:11:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } +VCLT/VCLTQ:12:result_uint32x2 [] = { ffffffff, 0, } +VCLT/VCLTQ:13:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } +VCLT/VCLTQ:14:result_uint32x2 [] = { 0, 0, } +VCLT/VCLTQ:15:result_uint32x2 [] = { ffffffff, 0, } +VCLT/VCLTQ:16:result_uint32x2 [] = { 0, 0, } + +VBSL/VBSLQ output: +VBSL/VBSLQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, } +VBSL/VBSLQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2, } +VBSL/VBSLQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VBSL/VBSLQ:3:result_int64x1 [] = { fffffffffffffffd, } +VBSL/VBSLQ:4:result_uint8x8 [] = { f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:5:result_uint16x4 [] = { fff0, fff0, fff2, fff2, } +VBSL/VBSLQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VBSL/VBSLQ:7:result_uint64x1 [] = { fffffff1, } +VBSL/VBSLQ:8:result_float32x2 [] = { c1800004 -0x1.000008p+4 -16, c1700004 -0x1.e00008p+3 -15, } +VBSL/VBSLQ:9:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, } +VBSL/VBSLQ:10:result_int16x8 [] = { fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, } +VBSL/VBSLQ:11:result_int32x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2, } +VBSL/VBSLQ:12:result_int64x2 [] = { fffffffffffffffd, fffffffffffffffd, } +VBSL/VBSLQ:13:result_uint8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, f3, f3, f3, f3, f7, f7, f7, f7, } +VBSL/VBSLQ:14:result_uint16x8 [] = { fff0, fff0, fff2, fff2, fff4, fff4, fff6, fff6, } +VBSL/VBSLQ:15:result_uint32x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2, } +VBSL/VBSLQ:16:result_uint64x2 [] = { fffffff1, fffffff1, } +VBSL/VBSLQ:17:result_float32x4 [] = { c1800001 -0x1.000002p+4 -16, c1700001 -0x1.e00002p+3 -15, c1600001 -0x1.c00002p+3 -14, c1500001 -0x1.a00002p+3 -13, } + +VSHL/VSHLQ output: +VSHL/VSHLQ:0:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHL/VSHLQ:1:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VSHL/VSHLQ:2:result_int32x2 [] = { fffff000, fffff100, } +VSHL/VSHLQ:3:result_int64x1 [] = { ffffffffffffff80, } +VSHL/VSHLQ:4:result_uint8x8 [] = { e0, e2, e4, e6, e8, ea, ec, ee, } +VSHL/VSHLQ:5:result_uint16x4 [] = { ff80, ff88, ff90, ff98, } +VSHL/VSHLQ:6:result_uint32x2 [] = { fffff000, fffff100, } +VSHL/VSHLQ:7:result_uint64x1 [] = { ffffffffffffff80, } +VSHL/VSHLQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSHL/VSHLQ:9:result_int8x16 [] = { 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, } +VSHL/VSHLQ:10:result_int16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VSHL/VSHLQ:11:result_int32x4 [] = { 0, 40000000, 80000000, c0000000, } +VSHL/VSHLQ:12:result_int64x2 [] = { 0, 8000000000000000, } +VSHL/VSHLQ:13:result_uint8x16 [] = { 0, 20, 40, 60, 80, a0, c0, e0, 0, 20, 40, 60, 80, a0, c0, e0, } +VSHL/VSHLQ:14:result_uint16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VSHL/VSHLQ:15:result_uint32x4 [] = { 0, 40000000, 80000000, c0000000, } +VSHL/VSHLQ:16:result_uint64x2 [] = { 0, 8000000000000000, } +VSHL/VSHLQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VSHL/VSHLQ (large shift amount) output: +VSHL/VSHLQ:18:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:19:result_int16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:20:result_int32x2 [] = { 0, 0, } +VSHL/VSHLQ:21:result_int64x1 [] = { 0, } +VSHL/VSHLQ:22:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:23:result_uint16x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:24:result_uint32x2 [] = { 0, 0, } +VSHL/VSHLQ:25:result_uint64x1 [] = { 0, } +VSHL/VSHLQ:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSHL/VSHLQ:27:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:29:result_int32x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:30:result_int64x2 [] = { 0, 0, } +VSHL/VSHLQ:31:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:32:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VSHL/VSHLQ:33:result_uint32x4 [] = { 0, 0, 0, 0, } +VSHL/VSHLQ:34:result_uint64x2 [] = { 0, 0, } +VSHL/VSHLQ:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VSHL/VSHLQ (negative shift amount) output: +VSHL/VSHLQ:36:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VSHL/VSHLQ:37:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9, } +VSHL/VSHLQ:38:result_int32x2 [] = { fffffffc, fffffffc, } +VSHL/VSHLQ:39:result_int64x1 [] = { ffffffffffffffff, } +VSHL/VSHLQ:40:result_uint8x8 [] = { 78, 78, 79, 79, 7a, 7a, 7b, 7b, } +VSHL/VSHLQ:41:result_uint16x4 [] = { 7ff8, 7ff8, 7ff9, 7ff9, } +VSHL/VSHLQ:42:result_uint32x2 [] = { 3ffffffc, 3ffffffc, } +VSHL/VSHLQ:43:result_uint64x1 [] = { fffffffffffffff, } +VSHL/VSHLQ:44:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSHL/VSHLQ:45:result_int8x16 [] = { fffffffc, fffffffc, fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe, fffffffe, fffffffe, ffffffff, ffffffff, ffffffff, ffffffff, } +VSHL/VSHLQ:46:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VSHL/VSHLQ:47:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } +VSHL/VSHLQ:48:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VSHL/VSHLQ:49:result_uint8x16 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, } +VSHL/VSHLQ:50:result_uint16x8 [] = { 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, } +VSHL/VSHLQ:51:result_uint32x4 [] = { 1ffffffe, 1ffffffe, 1ffffffe, 1ffffffe, } +VSHL/VSHLQ:52:result_uint64x2 [] = { 7ffffffffffffff, 7ffffffffffffff, } +VSHL/VSHLQ:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VSHL_N output: +VSHL_N:0:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHL_N:1:result_int16x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VSHL_N:2:result_int32x2 [] = { ffffff80, ffffff88, } +VSHL_N:3:result_int64x1 [] = { ffffffffffffffc0, } +VSHL_N:4:result_uint8x8 [] = { c0, c4, c8, cc, d0, d4, d8, dc, } +VSHL_N:5:result_uint16x4 [] = { ff00, ff10, ff20, ff30, } +VSHL_N:6:result_uint32x2 [] = { ffffff80, ffffff88, } +VSHL_N:7:result_uint64x1 [] = { ffffffffffffffe0, } +VSHL_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSHL_N:9:result_int8x16 [] = { 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, } +VSHL_N:10:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHL_N:11:result_int32x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, } +VSHL_N:12:result_int64x2 [] = { ffffffffffffffc0, ffffffffffffffc4, } +VSHL_N:13:result_uint8x16 [] = { c0, c4, c8, cc, d0, d4, d8, dc, e0, e4, e8, ec, f0, f4, f8, fc, } +VSHL_N:14:result_uint16x8 [] = { ff80, ff88, ff90, ff98, ffa0, ffa8, ffb0, ffb8, } +VSHL_N:15:result_uint32x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, } +VSHL_N:16:result_uint64x2 [] = { ffffffffffffffe0, ffffffffffffffe2, } +VSHL_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHL/VQSHLQ (with input = 0) overflow output: +VQSHL/VQSHLQ:0:vqshl_s8 Neon overflow 0 +VQSHL/VQSHLQ:1:vqshl_s16 Neon overflow 0 +VQSHL/VQSHLQ:2:vqshl_s32 Neon overflow 0 +VQSHL/VQSHLQ:3:vqshl_s64 Neon overflow 0 +VQSHL/VQSHLQ:4:vqshl_u8 Neon overflow 0 +VQSHL/VQSHLQ:5:vqshl_u16 Neon overflow 0 +VQSHL/VQSHLQ:6:vqshl_u32 Neon overflow 0 +VQSHL/VQSHLQ:7:vqshl_u64 Neon overflow 0 +VQSHL/VQSHLQ:8:vqshlq_s8 Neon overflow 0 +VQSHL/VQSHLQ:9:vqshlq_s16 Neon overflow 0 +VQSHL/VQSHLQ:10:vqshlq_s32 Neon overflow 0 +VQSHL/VQSHLQ:11:vqshlq_s64 Neon overflow 0 +VQSHL/VQSHLQ:12:vqshlq_u8 Neon overflow 0 +VQSHL/VQSHLQ:13:vqshlq_u16 Neon overflow 0 +VQSHL/VQSHLQ:14:vqshlq_u32 Neon overflow 0 +VQSHL/VQSHLQ:15:vqshlq_u64 Neon overflow 0 + +VQSHL/VQSHLQ (with input = 0) output: +VQSHL/VQSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:17:result_int16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:18:result_int32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:19:result_int64x1 [] = { 0, } +VQSHL/VQSHLQ:20:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:21:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:22:result_uint32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:23:result_uint64x1 [] = { 0, } +VQSHL/VQSHLQ:24:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHL/VQSHLQ:25:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:26:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:27:result_int32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:28:result_int64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:29:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:30:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:31:result_uint32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:32:result_uint64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:33:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHL/VQSHLQ (input 0 and negative shift amount) overflow output: +VQSHL/VQSHLQ:34:vqshl_s8 Neon overflow 0 +VQSHL/VQSHLQ:35:vqshl_s16 Neon overflow 0 +VQSHL/VQSHLQ:36:vqshl_s32 Neon overflow 0 +VQSHL/VQSHLQ:37:vqshl_s64 Neon overflow 0 +VQSHL/VQSHLQ:38:vqshl_u8 Neon overflow 0 +VQSHL/VQSHLQ:39:vqshl_u16 Neon overflow 0 +VQSHL/VQSHLQ:40:vqshl_u32 Neon overflow 0 +VQSHL/VQSHLQ:41:vqshl_u64 Neon overflow 0 +VQSHL/VQSHLQ:42:vqshlq_s8 Neon overflow 0 +VQSHL/VQSHLQ:43:vqshlq_s16 Neon overflow 0 +VQSHL/VQSHLQ:44:vqshlq_s32 Neon overflow 0 +VQSHL/VQSHLQ:45:vqshlq_s64 Neon overflow 0 +VQSHL/VQSHLQ:46:vqshlq_u8 Neon overflow 0 +VQSHL/VQSHLQ:47:vqshlq_u16 Neon overflow 0 +VQSHL/VQSHLQ:48:vqshlq_u32 Neon overflow 0 +VQSHL/VQSHLQ:49:vqshlq_u64 Neon overflow 0 + +VQSHL/VQSHLQ (input 0 and negative shift amount) output: +VQSHL/VQSHLQ:50:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:51:result_int16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:52:result_int32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:53:result_int64x1 [] = { 0, } +VQSHL/VQSHLQ:54:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:55:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:56:result_uint32x2 [] = { 0, 0, } +VQSHL/VQSHLQ:57:result_uint64x1 [] = { 0, } +VQSHL/VQSHLQ:58:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHL/VQSHLQ:59:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:60:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:61:result_int32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:62:result_int64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:63:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:64:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHL/VQSHLQ:65:result_uint32x4 [] = { 0, 0, 0, 0, } +VQSHL/VQSHLQ:66:result_uint64x2 [] = { 0, 0, } +VQSHL/VQSHLQ:67:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHL/VQSHLQ overflow output: +VQSHL/VQSHLQ:68:vqshl_s8 Neon overflow 0 +VQSHL/VQSHLQ:69:vqshl_s16 Neon overflow 0 +VQSHL/VQSHLQ:70:vqshl_s32 Neon overflow 0 +VQSHL/VQSHLQ:71:vqshl_s64 Neon overflow 0 +VQSHL/VQSHLQ:72:vqshl_u8 Neon overflow 1 +VQSHL/VQSHLQ:73:vqshl_u16 Neon overflow 1 +VQSHL/VQSHLQ:74:vqshl_u32 Neon overflow 1 +VQSHL/VQSHLQ:75:vqshl_u64 Neon overflow 0 +VQSHL/VQSHLQ:76:vqshlq_s8 Neon overflow 1 +VQSHL/VQSHLQ:77:vqshlq_s16 Neon overflow 1 +VQSHL/VQSHLQ:78:vqshlq_s32 Neon overflow 1 +VQSHL/VQSHLQ:79:vqshlq_s64 Neon overflow 1 +VQSHL/VQSHLQ:80:vqshlq_u8 Neon overflow 1 +VQSHL/VQSHLQ:81:vqshlq_u16 Neon overflow 1 +VQSHL/VQSHLQ:82:vqshlq_u32 Neon overflow 1 +VQSHL/VQSHLQ:83:vqshlq_u64 Neon overflow 1 + +VQSHL/VQSHLQ output: +VQSHL/VQSHLQ:84:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VQSHL/VQSHLQ:85:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VQSHL/VQSHLQ:86:result_int32x2 [] = { fffff000, fffff100, } +VQSHL/VQSHLQ:87:result_int64x1 [] = { fffffffffffffffe, } +VQSHL/VQSHLQ:88:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:89:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:90:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:91:result_uint64x1 [] = { 1ffffffffffffffe, } +VQSHL/VQSHLQ:92:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHL/VQSHLQ:93:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSHL/VQSHLQ:94:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQSHL/VQSHLQ:95:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQSHL/VQSHLQ:96:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQSHL/VQSHLQ:97:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:98:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:99:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:100:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:101:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHL/VQSHLQ (negative shift amount) overflow output: +VQSHL/VQSHLQ:102:vqshl_s8 Neon overflow 0 +VQSHL/VQSHLQ:103:vqshl_s16 Neon overflow 0 +VQSHL/VQSHLQ:104:vqshl_s32 Neon overflow 0 +VQSHL/VQSHLQ:105:vqshl_s64 Neon overflow 0 +VQSHL/VQSHLQ:106:vqshl_u8 Neon overflow 0 +VQSHL/VQSHLQ:107:vqshl_u16 Neon overflow 0 +VQSHL/VQSHLQ:108:vqshl_u32 Neon overflow 0 +VQSHL/VQSHLQ:109:vqshl_u64 Neon overflow 0 +VQSHL/VQSHLQ:110:vqshlq_s8 Neon overflow 0 +VQSHL/VQSHLQ:111:vqshlq_s16 Neon overflow 0 +VQSHL/VQSHLQ:112:vqshlq_s32 Neon overflow 0 +VQSHL/VQSHLQ:113:vqshlq_s64 Neon overflow 0 +VQSHL/VQSHLQ:114:vqshlq_u8 Neon overflow 0 +VQSHL/VQSHLQ:115:vqshlq_u16 Neon overflow 0 +VQSHL/VQSHLQ:116:vqshlq_u32 Neon overflow 0 +VQSHL/VQSHLQ:117:vqshlq_u64 Neon overflow 0 + +VQSHL/VQSHLQ (negative shift amount) output: +VQSHL/VQSHLQ:118:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VQSHL/VQSHLQ:119:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffc, } +VQSHL/VQSHLQ:120:result_int32x2 [] = { fffffffe, fffffffe, } +VQSHL/VQSHLQ:121:result_int64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:122:result_uint8x8 [] = { 78, 78, 79, 79, 7a, 7a, 7b, 7b, } +VQSHL/VQSHLQ:123:result_uint16x4 [] = { 3ffc, 3ffc, 3ffc, 3ffc, } +VQSHL/VQSHLQ:124:result_uint32x2 [] = { 1ffffffe, 1ffffffe, } +VQSHL/VQSHLQ:125:result_uint64x1 [] = { fffffffffffffff, } +VQSHL/VQSHLQ:126:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHL/VQSHLQ:127:result_int8x16 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:128:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:129:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:130:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:131:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VQSHL/VQSHLQ:132:result_uint16x8 [] = { 1f, 1f, 1f, 1f, 1f, 1f, 1f, 1f, } +VQSHL/VQSHLQ:133:result_uint32x4 [] = { 7ffff, 7ffff, 7ffff, 7ffff, } +VQSHL/VQSHLQ:134:result_uint64x2 [] = { fffffffffff, fffffffffff, } +VQSHL/VQSHLQ:135:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHL/VQSHLQ (large shift amount, negative input) overflow output: +VQSHL/VQSHLQ:136:vqshl_s8 Neon overflow 1 +VQSHL/VQSHLQ:137:vqshl_s16 Neon overflow 1 +VQSHL/VQSHLQ:138:vqshl_s32 Neon overflow 1 +VQSHL/VQSHLQ:139:vqshl_s64 Neon overflow 1 +VQSHL/VQSHLQ:140:vqshl_u8 Neon overflow 1 +VQSHL/VQSHLQ:141:vqshl_u16 Neon overflow 1 +VQSHL/VQSHLQ:142:vqshl_u32 Neon overflow 1 +VQSHL/VQSHLQ:143:vqshl_u64 Neon overflow 1 +VQSHL/VQSHLQ:144:vqshlq_s8 Neon overflow 1 +VQSHL/VQSHLQ:145:vqshlq_s16 Neon overflow 1 +VQSHL/VQSHLQ:146:vqshlq_s32 Neon overflow 1 +VQSHL/VQSHLQ:147:vqshlq_s64 Neon overflow 1 +VQSHL/VQSHLQ:148:vqshlq_u8 Neon overflow 1 +VQSHL/VQSHLQ:149:vqshlq_u16 Neon overflow 1 +VQSHL/VQSHLQ:150:vqshlq_u32 Neon overflow 1 +VQSHL/VQSHLQ:151:vqshlq_u64 Neon overflow 1 + +VQSHL/VQSHLQ (large shift amount, negative input) output: +VQSHL/VQSHLQ:152:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSHL/VQSHLQ:153:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQSHL/VQSHLQ:154:result_int32x2 [] = { 80000000, 80000000, } +VQSHL/VQSHLQ:155:result_int64x1 [] = { 8000000000000000, } +VQSHL/VQSHLQ:156:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:157:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:158:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:159:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:160:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHL/VQSHLQ:161:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQSHL/VQSHLQ:162:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQSHL/VQSHLQ:163:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQSHL/VQSHLQ:164:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQSHL/VQSHLQ:165:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:166:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:167:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:168:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:169:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHL/VQSHLQ (check saturation/overflow) overflow output: +VQSHL/VQSHLQ:170:vqshl_s8 Neon overflow 0 +VQSHL/VQSHLQ:171:vqshl_s16 Neon overflow 0 +VQSHL/VQSHLQ:172:vqshl_s32 Neon overflow 0 +VQSHL/VQSHLQ:173:vqshl_s64 Neon overflow 0 +VQSHL/VQSHLQ:174:vqshl_u8 Neon overflow 0 +VQSHL/VQSHLQ:175:vqshl_u16 Neon overflow 0 +VQSHL/VQSHLQ:176:vqshl_u32 Neon overflow 0 +VQSHL/VQSHLQ:177:vqshl_u64 Neon overflow 0 +VQSHL/VQSHLQ:178:vqshlq_s8 Neon overflow 0 +VQSHL/VQSHLQ:179:vqshlq_s16 Neon overflow 0 +VQSHL/VQSHLQ:180:vqshlq_s32 Neon overflow 0 +VQSHL/VQSHLQ:181:vqshlq_s64 Neon overflow 0 +VQSHL/VQSHLQ:182:vqshlq_u8 Neon overflow 0 +VQSHL/VQSHLQ:183:vqshlq_u16 Neon overflow 0 +VQSHL/VQSHLQ:184:vqshlq_u32 Neon overflow 0 +VQSHL/VQSHLQ:185:vqshlq_u64 Neon overflow 0 + +VQSHL/VQSHLQ (check saturation/overflow) output: +VQSHL/VQSHLQ:186:result_int8x8 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, } +VQSHL/VQSHLQ:187:result_int16x4 [] = { 3fff, 3fff, 3fff, 3fff, } +VQSHL/VQSHLQ:188:result_int32x2 [] = { 3fffffff, 3fffffff, } +VQSHL/VQSHLQ:189:result_int64x1 [] = { 3fffffffffffffff, } +VQSHL/VQSHLQ:190:result_uint8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:191:result_uint16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:192:result_uint32x2 [] = { 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:193:result_uint64x1 [] = { 7fffffffffffffff, } +VQSHL/VQSHLQ:194:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHL/VQSHLQ:195:result_int8x16 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, } +VQSHL/VQSHLQ:196:result_int16x8 [] = { 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, } +VQSHL/VQSHLQ:197:result_int32x4 [] = { 3fffffff, 3fffffff, 3fffffff, 3fffffff, } +VQSHL/VQSHLQ:198:result_int64x2 [] = { 3fffffffffffffff, 3fffffffffffffff, } +VQSHL/VQSHLQ:199:result_uint8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:200:result_uint16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:201:result_uint32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:202:result_uint64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL/VQSHLQ:203:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHL/VQSHLQ (large shift amount, positive input) overflow output: +VQSHL/VQSHLQ:204:vqshl_s8 Neon overflow 1 +VQSHL/VQSHLQ:205:vqshl_s16 Neon overflow 1 +VQSHL/VQSHLQ:206:vqshl_s32 Neon overflow 1 +VQSHL/VQSHLQ:207:vqshl_s64 Neon overflow 1 +VQSHL/VQSHLQ:208:vqshl_u8 Neon overflow 1 +VQSHL/VQSHLQ:209:vqshl_u16 Neon overflow 1 +VQSHL/VQSHLQ:210:vqshl_u32 Neon overflow 1 +VQSHL/VQSHLQ:211:vqshl_u64 Neon overflow 1 +VQSHL/VQSHLQ:212:vqshlq_s8 Neon overflow 1 +VQSHL/VQSHLQ:213:vqshlq_s16 Neon overflow 1 +VQSHL/VQSHLQ:214:vqshlq_s32 Neon overflow 1 +VQSHL/VQSHLQ:215:vqshlq_s64 Neon overflow 1 +VQSHL/VQSHLQ:216:vqshlq_u8 Neon overflow 1 +VQSHL/VQSHLQ:217:vqshlq_u16 Neon overflow 1 +VQSHL/VQSHLQ:218:vqshlq_u32 Neon overflow 1 +VQSHL/VQSHLQ:219:vqshlq_u64 Neon overflow 1 + +VQSHL/VQSHLQ (large shift amount, positive input) output: +VQSHL/VQSHLQ:220:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:221:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:222:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:223:result_int64x1 [] = { 7fffffffffffffff, } +VQSHL/VQSHLQ:224:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:225:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:226:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:227:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:228:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHL/VQSHLQ:229:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:230:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:231:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:232:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL/VQSHLQ:233:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:234:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:235:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:236:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:237:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHL/VQSHLQ (check saturation on 64 bits) overflow output: +VQSHL/VQSHLQ:238:vqshl_s8 Neon overflow 1 +VQSHL/VQSHLQ:239:vqshl_s16 Neon overflow 1 +VQSHL/VQSHLQ:240:vqshl_s32 Neon overflow 1 +VQSHL/VQSHLQ:241:vqshl_s64 Neon overflow 1 +VQSHL/VQSHLQ:242:vqshl_u8 Neon overflow 1 +VQSHL/VQSHLQ:243:vqshl_u16 Neon overflow 1 +VQSHL/VQSHLQ:244:vqshl_u32 Neon overflow 1 +VQSHL/VQSHLQ:245:vqshl_u64 Neon overflow 1 +VQSHL/VQSHLQ:246:vqshlq_s8 Neon overflow 1 +VQSHL/VQSHLQ:247:vqshlq_s16 Neon overflow 1 +VQSHL/VQSHLQ:248:vqshlq_s32 Neon overflow 1 +VQSHL/VQSHLQ:249:vqshlq_s64 Neon overflow 1 +VQSHL/VQSHLQ:250:vqshlq_u8 Neon overflow 1 +VQSHL/VQSHLQ:251:vqshlq_u16 Neon overflow 1 +VQSHL/VQSHLQ:252:vqshlq_u32 Neon overflow 1 +VQSHL/VQSHLQ:253:vqshlq_u64 Neon overflow 1 + +VQSHL/VQSHLQ (check saturation on 64 bits) output: +VQSHL/VQSHLQ:254:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:255:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:256:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:257:result_int64x1 [] = { 8000000000000000, } +VQSHL/VQSHLQ:258:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:259:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:260:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL/VQSHLQ:261:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL/VQSHLQ:262:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHL/VQSHLQ:263:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL/VQSHLQ:264:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL/VQSHLQ:265:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL/VQSHLQ:266:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL/VQSHLQ:267:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL/VQSHLQ:268:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL/VQSHLQ:269:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL/VQSHLQ:270:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL/VQSHLQ:271:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHL_N/VQSHLQ_N overflow output: +VQSHL_N/VQSHLQ_N:0:vqshl_n_s8 Neon overflow 0 +VQSHL_N/VQSHLQ_N:1:vqshl_n_s16 Neon overflow 0 +VQSHL_N/VQSHLQ_N:2:vqshl_n_s32 Neon overflow 0 +VQSHL_N/VQSHLQ_N:3:vqshl_n_s64 Neon overflow 0 +VQSHL_N/VQSHLQ_N:4:vqshl_n_u8 Neon overflow 1 +VQSHL_N/VQSHLQ_N:5:vqshl_n_u16 Neon overflow 1 +VQSHL_N/VQSHLQ_N:6:vqshl_n_u32 Neon overflow 1 +VQSHL_N/VQSHLQ_N:7:vqshl_n_u64 Neon overflow 1 +VQSHL_N/VQSHLQ_N:8:vqshlq_n_s8 Neon overflow 0 +VQSHL_N/VQSHLQ_N:9:vqshlq_n_s16 Neon overflow 0 +VQSHL_N/VQSHLQ_N:10:vqshlq_n_s32 Neon overflow 0 +VQSHL_N/VQSHLQ_N:11:vqshlq_n_s64 Neon overflow 0 +VQSHL_N/VQSHLQ_N:12:vqshlq_n_u8 Neon overflow 1 +VQSHL_N/VQSHLQ_N:13:vqshlq_n_u16 Neon overflow 1 +VQSHL_N/VQSHLQ_N:14:vqshlq_n_u32 Neon overflow 1 +VQSHL_N/VQSHLQ_N:15:vqshlq_n_u64 Neon overflow 1 + +VQSHL_N/VQSHLQ_N output: +VQSHL_N/VQSHLQ_N:16:result_int8x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, } +VQSHL_N/VQSHLQ_N:17:result_int16x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VQSHL_N/VQSHLQ_N:18:result_int32x2 [] = { ffffffe0, ffffffe2, } +VQSHL_N/VQSHLQ_N:19:result_int64x1 [] = { ffffffffffffffc0, } +VQSHL_N/VQSHLQ_N:20:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:21:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:22:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:23:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:24:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHL_N/VQSHLQ_N:25:result_int8x16 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, ffffffe0, ffffffe4, ffffffe8, ffffffec, fffffff0, fffffff4, fffffff8, fffffffc, } +VQSHL_N/VQSHLQ_N:26:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VQSHL_N/VQSHLQ_N:27:result_int32x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VQSHL_N/VQSHLQ_N:28:result_int64x2 [] = { ffffffffffffffc0, ffffffffffffffc4, } +VQSHL_N/VQSHLQ_N:29:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:30:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:31:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:32:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:33:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHL_N/VQSHLQ_N (check saturation with large positive input) overflow output: +VQSHL_N/VQSHLQ_N:34:vqshl_n_s8 Neon overflow 1 +VQSHL_N/VQSHLQ_N:35:vqshl_n_s16 Neon overflow 1 +VQSHL_N/VQSHLQ_N:36:vqshl_n_s32 Neon overflow 1 +VQSHL_N/VQSHLQ_N:37:vqshl_n_s64 Neon overflow 1 +VQSHL_N/VQSHLQ_N:38:vqshl_n_u8 Neon overflow 1 +VQSHL_N/VQSHLQ_N:39:vqshl_n_u16 Neon overflow 1 +VQSHL_N/VQSHLQ_N:40:vqshl_n_u32 Neon overflow 1 +VQSHL_N/VQSHLQ_N:41:vqshl_n_u64 Neon overflow 1 +VQSHL_N/VQSHLQ_N:42:vqshlq_n_s8 Neon overflow 1 +VQSHL_N/VQSHLQ_N:43:vqshlq_n_s16 Neon overflow 1 +VQSHL_N/VQSHLQ_N:44:vqshlq_n_s32 Neon overflow 1 +VQSHL_N/VQSHLQ_N:45:vqshlq_n_s64 Neon overflow 1 +VQSHL_N/VQSHLQ_N:46:vqshlq_n_u8 Neon overflow 1 +VQSHL_N/VQSHLQ_N:47:vqshlq_n_u16 Neon overflow 1 +VQSHL_N/VQSHLQ_N:48:vqshlq_n_u32 Neon overflow 1 +VQSHL_N/VQSHLQ_N:49:vqshlq_n_u64 Neon overflow 1 + +VQSHL_N/VQSHLQ_N (check saturation with large positive input) output: +VQSHL_N/VQSHLQ_N:50:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL_N/VQSHLQ_N:51:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHL_N/VQSHLQ_N:52:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHL_N/VQSHLQ_N:53:result_int64x1 [] = { 7fffffffffffffff, } +VQSHL_N/VQSHLQ_N:54:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:55:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:56:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:57:result_uint64x1 [] = { ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:58:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHL_N/VQSHLQ_N:59:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHL_N/VQSHLQ_N:60:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQSHL_N/VQSHLQ_N:61:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQSHL_N/VQSHLQ_N:62:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQSHL_N/VQSHLQ_N:63:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHL_N/VQSHLQ_N:64:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHL_N/VQSHLQ_N:65:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHL_N/VQSHLQ_N:66:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHL_N/VQSHLQ_N:67:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHL/VRSHLQ (with input = 0) output: +VRSHL/VRSHLQ:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:1:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:2:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:3:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:4:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:5:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:6:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:7:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHL/VRSHLQ:9:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:10:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:11:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:12:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:13:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:14:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:15:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:16:result_uint64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHL/VRSHLQ (input 0 and negative shift amount) output: +VRSHL/VRSHLQ:18:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:19:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:20:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:21:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:22:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:23:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:24:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:25:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHL/VRSHLQ:27:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:29:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:30:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:31:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:32:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:33:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:34:result_uint64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHL/VRSHLQ output: +VRSHL/VRSHLQ:36:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VRSHL/VRSHLQ:37:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VRSHL/VRSHLQ:38:result_int32x2 [] = { fffff000, fffff100, } +VRSHL/VRSHLQ:39:result_int64x1 [] = { fffffffffffffffe, } +VRSHL/VRSHLQ:40:result_uint8x8 [] = { e0, e2, e4, e6, e8, ea, ec, ee, } +VRSHL/VRSHLQ:41:result_uint16x4 [] = { ff80, ff88, ff90, ff98, } +VRSHL/VRSHLQ:42:result_uint32x2 [] = { fffff000, fffff100, } +VRSHL/VRSHLQ:43:result_uint64x1 [] = { 1ffffffffffffffe, } +VRSHL/VRSHLQ:44:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHL/VRSHLQ:45:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:46:result_int16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VRSHL/VRSHLQ:47:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:48:result_int64x2 [] = { 0, 8000000000000000, } +VRSHL/VRSHLQ:49:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:50:result_uint16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, } +VRSHL/VRSHLQ:51:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:52:result_uint64x2 [] = { 0, 8000000000000000, } +VRSHL/VRSHLQ:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHL/VRSHLQ (negative shift amount) output: +VRSHL/VRSHLQ:54:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VRSHL/VRSHLQ:55:result_int16x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd, } +VRSHL/VRSHLQ:56:result_int32x2 [] = { fffffffe, fffffffe, } +VRSHL/VRSHLQ:57:result_int64x1 [] = { ffffffffffffffff, } +VRSHL/VRSHLQ:58:result_uint8x8 [] = { 78, 79, 79, 7a, 7a, 7b, 7b, 7c, } +VRSHL/VRSHLQ:59:result_uint16x4 [] = { 3ffc, 3ffc, 3ffd, 3ffd, } +VRSHL/VRSHLQ:60:result_uint32x2 [] = { 1ffffffe, 1ffffffe, } +VRSHL/VRSHLQ:61:result_uint64x1 [] = { fffffffffffffff, } +VRSHL/VRSHLQ:62:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHL/VRSHLQ:63:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:64:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:65:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:66:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:67:result_uint8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, } +VRSHL/VRSHLQ:68:result_uint16x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHL/VRSHLQ:69:result_uint32x4 [] = { 80000, 80000, 80000, 80000, } +VRSHL/VRSHLQ:70:result_uint64x2 [] = { 100000000000, 100000000000, } +VRSHL/VRSHLQ:71:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHL/VRSHLQ (checking round_const overflow: shift by -1) output: +VRSHL/VRSHLQ:72:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHL/VRSHLQ:73:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VRSHL/VRSHLQ:74:result_int32x2 [] = { 40000000, 40000000, } +VRSHL/VRSHLQ:75:result_int64x1 [] = { 4000000000000000, } +VRSHL/VRSHLQ:76:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHL/VRSHLQ:77:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSHL/VRSHLQ:78:result_uint32x2 [] = { 80000000, 80000000, } +VRSHL/VRSHLQ:79:result_uint64x1 [] = { 8000000000000000, } +VRSHL/VRSHLQ:80:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHL/VRSHLQ:81:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHL/VRSHLQ:82:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VRSHL/VRSHLQ:83:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VRSHL/VRSHLQ:84:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VRSHL/VRSHLQ:85:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHL/VRSHLQ:86:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSHL/VRSHLQ:87:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSHL/VRSHLQ:88:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSHL/VRSHLQ:89:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHL/VRSHLQ (checking round_const overflow: shift by -3) output: +VRSHL/VRSHLQ:90:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHL/VRSHLQ:91:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VRSHL/VRSHLQ:92:result_int32x2 [] = { 10000000, 10000000, } +VRSHL/VRSHLQ:93:result_int64x1 [] = { 1000000000000000, } +VRSHL/VRSHLQ:94:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHL/VRSHLQ:95:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSHL/VRSHLQ:96:result_uint32x2 [] = { 20000000, 20000000, } +VRSHL/VRSHLQ:97:result_uint64x1 [] = { 2000000000000000, } +VRSHL/VRSHLQ:98:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHL/VRSHLQ:99:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHL/VRSHLQ:100:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VRSHL/VRSHLQ:101:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VRSHL/VRSHLQ:102:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VRSHL/VRSHLQ:103:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHL/VRSHLQ:104:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSHL/VRSHLQ:105:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSHL/VRSHLQ:106:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSHL/VRSHLQ:107:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHL/VRSHLQ (large shift amount) output: +VRSHL/VRSHLQ:108:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:109:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:110:result_int32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:111:result_int64x1 [] = { 0, } +VRSHL/VRSHLQ:112:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:113:result_uint16x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:114:result_uint32x2 [] = { 0, 0, } +VRSHL/VRSHLQ:115:result_uint64x1 [] = { 0, } +VRSHL/VRSHLQ:116:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHL/VRSHLQ:117:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:118:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:119:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:120:result_int64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:121:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:122:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHL/VRSHLQ:123:result_uint32x4 [] = { 0, 0, 0, 0, } +VRSHL/VRSHLQ:124:result_uint64x2 [] = { 0, 0, } +VRSHL/VRSHLQ:125:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VLD2/VLD2Q chunk 0 output: +VLD2/VLD2Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD2/VLD2Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2/VLD2Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD2/VLD2Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD2/VLD2Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD2/VLD2Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2/VLD2Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD2/VLD2Q:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VLD2/VLD2Q:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD2/VLD2Q:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD2/VLD2Q:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD2/VLD2Q:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } + +VLD2/VLD2Q chunk 1 output: +VLD2/VLD2Q:18:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD2/VLD2Q:19:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:20:result_int32x2 [] = { fffffff2, fffffff3, } +VLD2/VLD2Q:21:result_int64x1 [] = { fffffffffffffff1, } +VLD2/VLD2Q:22:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD2/VLD2Q:23:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VLD2/VLD2Q:24:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD2/VLD2Q:25:result_uint64x1 [] = { fffffffffffffff1, } +VLD2/VLD2Q:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } +VLD2/VLD2Q:27:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD2/VLD2Q:28:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD2/VLD2Q:29:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:31:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD2/VLD2Q:32:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD2/VLD2Q:33:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD2/VLD2Q:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2/VLD2Q:35:result_float32x4 [] = { c1400000 -0x1.8p+3 -12, c1300000 -0x1.6p+3 -11, c1200000 -0x1.4p+3 -10, c1100000 -0x1.2p+3 -9, } + +VLD3/VLD3Q chunk 0 output: +VLD3/VLD3Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD3/VLD3Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD3/VLD3Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD3/VLD3Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD3/VLD3Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD3/VLD3Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD3/VLD3Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD3/VLD3Q:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VLD3/VLD3Q:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD3/VLD3Q:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD3/VLD3Q:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD3/VLD3Q:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } + +VLD3/VLD3Q chunk 1 output: +VLD3/VLD3Q:18:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD3/VLD3Q:19:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:20:result_int32x2 [] = { fffffff2, fffffff3, } +VLD3/VLD3Q:21:result_int64x1 [] = { fffffffffffffff1, } +VLD3/VLD3Q:22:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD3/VLD3Q:23:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VLD3/VLD3Q:24:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD3/VLD3Q:25:result_uint64x1 [] = { fffffffffffffff1, } +VLD3/VLD3Q:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } +VLD3/VLD3Q:27:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD3/VLD3Q:28:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD3/VLD3Q:29:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:31:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD3/VLD3Q:32:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD3/VLD3Q:33:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD3/VLD3Q:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:35:result_float32x4 [] = { c1400000 -0x1.8p+3 -12, c1300000 -0x1.6p+3 -11, c1200000 -0x1.4p+3 -10, c1100000 -0x1.2p+3 -9, } + +VLD3/VLD3Q chunk 2 output: +VLD3/VLD3Q:36:result_int8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:37:result_int16x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD3/VLD3Q:38:result_int32x2 [] = { fffffff4, fffffff5, } +VLD3/VLD3Q:39:result_int64x1 [] = { fffffffffffffff2, } +VLD3/VLD3Q:40:result_uint8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:41:result_uint16x4 [] = { fff8, fff9, fffa, fffb, } +VLD3/VLD3Q:42:result_uint32x2 [] = { fffffff4, fffffff5, } +VLD3/VLD3Q:43:result_uint64x1 [] = { fffffffffffffff2, } +VLD3/VLD3Q:44:result_float32x2 [] = { c1400000 -0x1.8p+3 -12, c1300000 -0x1.6p+3 -11, } +VLD3/VLD3Q:45:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VLD3/VLD3Q:46:result_int16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:47:result_int32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD3/VLD3Q:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:49:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VLD3/VLD3Q:50:result_uint16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD3/VLD3Q:51:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD3/VLD3Q:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3/VLD3Q:53:result_float32x4 [] = { c1000000 -0x1p+3 -8, c0e00000 -0x1.cp+2 -7, c0c00000 -0x1.8p+2 -6, c0a00000 -0x1.4p+2 -5, } + +VLD4/VLD4Q chunk 0 output: +VLD4/VLD4Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4/VLD4Q:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4/VLD4Q:3:result_int64x1 [] = { fffffffffffffff0, } +VLD4/VLD4Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VLD4/VLD4Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4/VLD4Q:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4/VLD4Q:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD4/VLD4Q:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VLD4/VLD4Q:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4/VLD4Q:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4/VLD4Q:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } + +VLD4/VLD4Q chunk 1 output: +VLD4/VLD4Q:18:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:19:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:20:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4/VLD4Q:21:result_int64x1 [] = { fffffffffffffff1, } +VLD4/VLD4Q:22:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff, } +VLD4/VLD4Q:23:result_uint16x4 [] = { fff4, fff5, fff6, fff7, } +VLD4/VLD4Q:24:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4/VLD4Q:25:result_uint64x1 [] = { fffffffffffffff1, } +VLD4/VLD4Q:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } +VLD4/VLD4Q:27:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:28:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:29:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:31:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:32:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:33:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VLD4/VLD4Q:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:35:result_float32x4 [] = { c1400000 -0x1.8p+3 -12, c1300000 -0x1.6p+3 -11, c1200000 -0x1.4p+3 -10, c1100000 -0x1.2p+3 -9, } + +VLD4/VLD4Q chunk 2 output: +VLD4/VLD4Q:36:result_int8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:37:result_int16x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD4/VLD4Q:38:result_int32x2 [] = { fffffff4, fffffff5, } +VLD4/VLD4Q:39:result_int64x1 [] = { fffffffffffffff2, } +VLD4/VLD4Q:40:result_uint8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:41:result_uint16x4 [] = { fff8, fff9, fffa, fffb, } +VLD4/VLD4Q:42:result_uint32x2 [] = { fffffff4, fffffff5, } +VLD4/VLD4Q:43:result_uint64x1 [] = { fffffffffffffff2, } +VLD4/VLD4Q:44:result_float32x2 [] = { c1400000 -0x1.8p+3 -12, c1300000 -0x1.6p+3 -11, } +VLD4/VLD4Q:45:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VLD4/VLD4Q:46:result_int16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:47:result_int32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD4/VLD4Q:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:49:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VLD4/VLD4Q:50:result_uint16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, } +VLD4/VLD4Q:51:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VLD4/VLD4Q:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:53:result_float32x4 [] = { c1000000 -0x1p+3 -8, c0e00000 -0x1.cp+2 -7, c0c00000 -0x1.8p+2 -6, c0a00000 -0x1.4p+2 -5, } + +VLD4/VLD4Q chunk 3 output: +VLD4/VLD4Q:54:result_int8x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:55:result_int16x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:56:result_int32x2 [] = { fffffff6, fffffff7, } +VLD4/VLD4Q:57:result_int64x1 [] = { fffffffffffffff3, } +VLD4/VLD4Q:58:result_uint8x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:59:result_uint16x4 [] = { fffc, fffd, fffe, ffff, } +VLD4/VLD4Q:60:result_uint32x2 [] = { fffffff6, fffffff7, } +VLD4/VLD4Q:61:result_uint64x1 [] = { fffffffffffffff3, } +VLD4/VLD4Q:62:result_float32x2 [] = { c1200000 -0x1.4p+3 -10, c1100000 -0x1.2p+3 -9, } +VLD4/VLD4Q:63:result_int8x16 [] = { fffffff8, ffffffff, fffffff9, ffffffff, fffffffa, ffffffff, fffffffb, ffffffff, fffffffc, ffffffff, fffffffd, ffffffff, fffffffe, ffffffff, ffffffff, ffffffff, } +VLD4/VLD4Q:64:result_int16x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:65:result_int32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:66:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:67:result_uint8x16 [] = { f8, ff, f9, ff, fa, ff, fb, ff, fc, ff, fd, ff, fe, ff, ff, ff, } +VLD4/VLD4Q:68:result_uint16x8 [] = { 8, 9, a, b, c, d, e, f, } +VLD4/VLD4Q:69:result_uint32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VLD4/VLD4Q:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4/VLD4Q:71:result_float32x4 [] = { c0800000 -0x1p+2 -4, c0400000 -0x1.8p+1 -3, c0000000 -0x1p+1 -2, bf800000 -0x1p+0 -1, } + +VDUP_LANE/VDUP_LANEQ output: +VDUP_LANE/VDUP_LANEQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP_LANE/VDUP_LANEQ:2:result_int32x2 [] = { fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:3:result_int64x1 [] = { fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:4:result_uint8x8 [] = { f7, f7, f7, f7, f7, f7, f7, f7, } +VDUP_LANE/VDUP_LANEQ:5:result_uint16x4 [] = { fff3, fff3, fff3, fff3, } +VDUP_LANE/VDUP_LANEQ:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, } +VDUP_LANE/VDUP_LANEQ:9:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, } +VDUP_LANE/VDUP_LANEQ:10:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, } +VDUP_LANE/VDUP_LANEQ:11:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1, } +VDUP_LANE/VDUP_LANEQ:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:13:result_uint8x16 [] = { f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, } +VDUP_LANE/VDUP_LANEQ:14:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1, } +VDUP_LANE/VDUP_LANEQ:15:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VDUP_LANE/VDUP_LANEQ:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VDUP_LANE/VDUP_LANEQ:17:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, } + +VQDMULL_LANE overflow output: +VQDMULL_LANE:0:vqdmull_lane_s16 Neon overflow 0 +VQDMULL_LANE:1:vqdmull_lane_s32 Neon overflow 0 + +VQDMULL_LANE output: +VQDMULL_LANE:2:result_int32x4 [] = { 8000, 8000, 8000, 8000, } +VQDMULL_LANE:3:result_int64x2 [] = { 4000, 4000, } + +VQDMULL_LANE (check mul overflow) overflow output: +VQDMULL_LANE:4:vqdmull_lane_s16 Neon overflow 1 +VQDMULL_LANE:5:vqdmull_lane_s32 Neon overflow 1 + +VQDMULL_LANE (check mul overflow) output: +VQDMULL_LANE:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULL_LANE:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } + +VQDMULL_N overflow output: +VQDMULL_N:0:vqdmull_n_s16 Neon overflow 0 +VQDMULL_N:1:vqdmull_n_s32 Neon overflow 0 + +VQDMULL_N output: +VQDMULL_N:2:result_int32x4 [] = { 44000, 44000, 44000, 44000, } +VQDMULL_N:3:result_int64x2 [] = { aa000, aa000, } + +VQDMULL_N (check mul overflow) overflow output: +VQDMULL_N:4:vqdmull_n_s16 Neon overflow 1 +VQDMULL_N:5:vqdmull_n_s32 Neon overflow 1 + +VQDMULL_N (check mul overflow) output: +VQDMULL_N:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQDMULL_N:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } + +VST1_LANE/VST1_LANEQ output: +VST1_LANE/VST1_LANEQ:0:result_int8x8 [] = { fffffff7, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:1:result_int16x4 [] = { fffffff3, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:2:result_int32x2 [] = { fffffff1, 33333333, } +VST1_LANE/VST1_LANEQ:3:result_int64x1 [] = { fffffffffffffff0, } +VST1_LANE/VST1_LANEQ:4:result_uint8x8 [] = { f6, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:5:result_uint16x4 [] = { fff2, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:6:result_uint32x2 [] = { fffffff0, 33333333, } +VST1_LANE/VST1_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VST1_LANE/VST1_LANEQ:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, 33333333 0x1.666666p-25 4.17233e-08, } +VST1_LANE/VST1_LANEQ:9:result_int8x16 [] = { ffffffff, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:10:result_int16x8 [] = { fffffff5, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:11:result_int32x4 [] = { fffffff1, 33333333, 33333333, 33333333, } +VST1_LANE/VST1_LANEQ:12:result_int64x2 [] = { fffffffffffffff1, 3333333333333333, } +VST1_LANE/VST1_LANEQ:13:result_uint8x16 [] = { fa, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST1_LANE/VST1_LANEQ:14:result_uint16x8 [] = { fff4, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VST1_LANE/VST1_LANEQ:15:result_uint32x4 [] = { fffffff3, 33333333, 33333333, 33333333, } +VST1_LANE/VST1_LANEQ:16:result_uint64x2 [] = { fffffffffffffff0, 3333333333333333, } +VST1_LANE/VST1_LANEQ:17:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VSUB/VSUBQ output: +VSUB/VSUBQ:0:result_int8x8 [] = { ffffffee, ffffffef, fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, } +VSUB/VSUBQ:1:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VSUB/VSUBQ:2:result_int32x2 [] = { ffffffed, ffffffee, } +VSUB/VSUBQ:3:result_int64x1 [] = { ffffffffffffff8c, } +VSUB/VSUBQ:4:result_uint8x8 [] = { dc, dd, de, df, e0, e1, e2, e3, } +VSUB/VSUBQ:5:result_uint16x4 [] = { ffd2, ffd3, ffd4, ffd5, } +VSUB/VSUBQ:6:result_uint32x2 [] = { ffffffc8, ffffffc9, } +VSUB/VSUBQ:7:result_uint64x1 [] = { ffffffffffffffee, } +VSUB/VSUBQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSUB/VSUBQ:9:result_int8x16 [] = { fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, } +VSUB/VSUBQ:10:result_int16x8 [] = { 4, 5, 6, 7, 8, 9, a, b, } +VSUB/VSUBQ:11:result_int32x4 [] = { e, f, 10, 11, } +VSUB/VSUBQ:12:result_int64x2 [] = { ffffffffffffffd8, ffffffffffffffd9, } +VSUB/VSUBQ:13:result_uint8x16 [] = { e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef, f0, f1, f2, f3, } +VSUB/VSUBQ:14:result_uint16x8 [] = { ffed, ffee, ffef, fff0, fff1, fff2, fff3, fff4, } +VSUB/VSUBQ:15:result_uint32x4 [] = { ffffffb9, ffffffba, ffffffbb, ffffffbc, } +VSUB/VSUBQ:16:result_uint64x2 [] = { ffffffffffffffed, ffffffffffffffee, } +VSUB/VSUBQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +float32: +VSUB/VSUBQ:18:result_float32x2 [] = { c00ccccd -0x1.19999ap+1 -2.2, c00ccccd -0x1.19999ap+1 -2.2, } +VSUB/VSUBQ:19:result_float32x4 [] = { c00ccccc -0x1.199998p+1 -2.2, c00ccccc -0x1.199998p+1 -2.2, c00ccccc -0x1.199998p+1 -2.2, c00ccccc -0x1.199998p+1 -2.2, } + +VQADD/VQADDQ overflow output: +VQADD/VQADDQ:0:vqadd_s8 Neon overflow 0 +VQADD/VQADDQ:1:vqadd_s16 Neon overflow 0 +VQADD/VQADDQ:2:vqadd_s32 Neon overflow 0 +VQADD/VQADDQ:3:vqadd_s64 Neon overflow 0 +VQADD/VQADDQ:4:vqadd_u8 Neon overflow 1 +VQADD/VQADDQ:5:vqadd_u16 Neon overflow 1 +VQADD/VQADDQ:6:vqadd_u32 Neon overflow 1 +VQADD/VQADDQ:7:vqadd_u64 Neon overflow 1 +VQADD/VQADDQ:8:vqaddq_s8 Neon overflow 0 +VQADD/VQADDQ:9:vqaddq_s16 Neon overflow 0 +VQADD/VQADDQ:10:vqaddq_s32 Neon overflow 0 +VQADD/VQADDQ:11:vqaddq_s64 Neon overflow 0 +VQADD/VQADDQ:12:vqaddq_u8 Neon overflow 1 +VQADD/VQADDQ:13:vqaddq_u16 Neon overflow 1 +VQADD/VQADDQ:14:vqaddq_u32 Neon overflow 1 +VQADD/VQADDQ:15:vqaddq_u64 Neon overflow 1 + +VQADD/VQADDQ output: +VQADD/VQADDQ:16:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8, } +VQADD/VQADDQ:17:result_int16x4 [] = { 12, 13, 14, 15, } +VQADD/VQADDQ:18:result_int32x2 [] = { 23, 24, } +VQADD/VQADDQ:19:result_int64x1 [] = { 34, } +VQADD/VQADDQ:20:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:21:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:22:result_uint32x2 [] = { ffffffff, ffffffff, } +VQADD/VQADDQ:23:result_uint64x1 [] = { ffffffffffffffff, } +VQADD/VQADDQ:24:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQADD/VQADDQ:25:result_int8x16 [] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, } +VQADD/VQADDQ:26:result_int16x8 [] = { 12, 13, 14, 15, 16, 17, 18, 19, } +VQADD/VQADDQ:27:result_int32x4 [] = { 23, 24, 25, 26, } +VQADD/VQADDQ:28:result_int64x2 [] = { 34, 35, } +VQADD/VQADDQ:29:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:30:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:31:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQADD/VQADDQ:32:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQADD/VQADDQ:33:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQADD/VQADDQ 64 bits saturation overflow output: +VQADD/VQADDQ:34:vqadd_s64 Neon overflow 0 +VQADD/VQADDQ:35:vqadd_u64 Neon overflow 0 +VQADD/VQADDQ:36:vqaddq_s64 Neon overflow 0 +VQADD/VQADDQ:37:vqaddq_u64 Neon overflow 0 + +64 bits saturation: +VQADD/VQADDQ:38:result_int64x1 [] = { fffffffffffffff0, } +VQADD/VQADDQ:39:result_uint64x1 [] = { fffffffffffffff0, } +VQADD/VQADDQ:40:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VQADD/VQADDQ:41:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } + +VQADD/VQADDQ 64 bits saturation overflow output: +VQADD/VQADDQ:42:vqadd_s64 Neon overflow 0 +VQADD/VQADDQ:43:vqadd_u64 Neon overflow 1 +VQADD/VQADDQ:44:vqaddq_s64 Neon overflow 0 +VQADD/VQADDQ:45:vqaddq_u64 Neon overflow 1 +VQADD/VQADDQ:46:result_int64x1 [] = { 34, } +VQADD/VQADDQ:47:result_uint64x1 [] = { ffffffffffffffff, } +VQADD/VQADDQ:48:result_int64x2 [] = { 34, 35, } +VQADD/VQADDQ:49:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } + +VQADD/VQADDQ 64 bits saturation overflow output: +VQADD/VQADDQ:50:vqadd_s64 Neon overflow 1 +VQADD/VQADDQ:51:vqadd_u64 Neon overflow 1 +VQADD/VQADDQ:52:vqaddq_s64 Neon overflow 1 +VQADD/VQADDQ:53:vqaddq_u64 Neon overflow 1 +VQADD/VQADDQ:54:result_int64x1 [] = { 8000000000000000, } +VQADD/VQADDQ:55:result_uint64x1 [] = { ffffffffffffffff, } +VQADD/VQADDQ:56:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff, } +VQADD/VQADDQ:57:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } + +less than 64 bits saturation: +VQADD/VQADDQ:58:vqadd_s8 Neon overflow 1 +VQADD/VQADDQ:59:vqadd_s16 Neon overflow 1 +VQADD/VQADDQ:60:vqadd_s32 Neon overflow 1 +VQADD/VQADDQ:61:vqaddq_s8 Neon overflow 1 +VQADD/VQADDQ:62:vqaddq_s16 Neon overflow 1 +VQADD/VQADDQ:63:vqaddq_s32 Neon overflow 1 +VQADD/VQADDQ:64:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQADD/VQADDQ:65:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000, } +VQADD/VQADDQ:66:result_int32x2 [] = { 80000000, 80000000, } +VQADD/VQADDQ:67:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQADD/VQADDQ:68:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQADD/VQADDQ:69:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } + +VQADD/VQADDQ less than 64 bits saturation overflow output: +VQADD/VQADDQ:70:vqadd_u8 Neon overflow 1 +VQADD/VQADDQ:71:vqadd_u16 Neon overflow 1 +VQADD/VQADDQ:72:vqadd_u32 Neon overflow 1 +VQADD/VQADDQ:73:vqaddq_u8 Neon overflow 1 +VQADD/VQADDQ:74:vqaddq_u16 Neon overflow 1 +VQADD/VQADDQ:75:vqaddq_u32 Neon overflow 1 +VQADD/VQADDQ:76:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:77:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:78:result_uint32x2 [] = { ffffffff, ffffffff, } +VQADD/VQADDQ:79:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQADD/VQADDQ:80:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQADD/VQADDQ:81:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } + +VABS/VABSQ output: +VABS/VABSQ:0:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VABS/VABSQ:1:result_int16x4 [] = { 10, f, e, d, } +VABS/VABSQ:2:result_int32x2 [] = { 10, f, } +VABS/VABSQ:3:result_int64x1 [] = { 3333333333333333, } +VABS/VABSQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABS/VABSQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VABS/VABSQ:7:result_uint64x1 [] = { 3333333333333333, } +VABS/VABSQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VABS/VABSQ:9:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VABS/VABSQ:10:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VABS/VABSQ:11:result_int32x4 [] = { 10, f, e, d, } +VABS/VABSQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VABS/VABSQ:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABS/VABSQ:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VABS/VABSQ:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VABS/VABSQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VABS/VABSQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +float32: +VABS/VABSQ:18:result_float32x2 [] = { 40133333 0x1.266666p+1 2.3, 40133333 0x1.266666p+1 2.3, } +VABS/VABSQ:19:result_float32x4 [] = { 4059999a 0x1.b33334p+1 3.4, 4059999a 0x1.b33334p+1 3.4, 4059999a 0x1.b33334p+1 3.4, 4059999a 0x1.b33334p+1 3.4, } + +VQABS/VQABSQ overflow output: +VQABS/VQABSQ:0:vqabs_s8 Neon overflow 0 +VQABS/VQABSQ:1:vqabs_s16 Neon overflow 0 +VQABS/VQABSQ:2:vqabs_s32 Neon overflow 0 +VQABS/VQABSQ:3:vqabsq_s8 Neon overflow 0 +VQABS/VQABSQ:4:vqabsq_s16 Neon overflow 0 +VQABS/VQABSQ:5:vqabsq_s32 Neon overflow 0 + +VQABS/VQABSQ output: +VQABS/VQABSQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VQABS/VQABSQ:7:result_int16x4 [] = { 10, f, e, d, } +VQABS/VQABSQ:8:result_int32x2 [] = { 10, f, } +VQABS/VQABSQ:9:result_int64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:13:result_uint64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQABS/VQABSQ:15:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VQABS/VQABSQ:16:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VQABS/VQABSQ:17:result_int32x4 [] = { 10, f, e, d, } +VQABS/VQABSQ:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQABS/VQABSQ:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQABS/VQABSQ overflow output: +VQABS/VQABSQ:0:vqabs_s8 Neon overflow 1 +VQABS/VQABSQ:1:vqabs_s16 Neon overflow 1 +VQABS/VQABSQ:2:vqabs_s32 Neon overflow 1 +VQABS/VQABSQ:3:vqabsq_s8 Neon overflow 1 +VQABS/VQABSQ:4:vqabsq_s16 Neon overflow 1 +VQABS/VQABSQ:5:vqabsq_s32 Neon overflow 1 + +VQABS/VQABSQ output: +VQABS/VQABSQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQABS/VQABSQ:7:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQABS/VQABSQ:8:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQABS/VQABSQ:9:result_int64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQABS/VQABSQ:13:result_uint64x1 [] = { 3333333333333333, } +VQABS/VQABSQ:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQABS/VQABSQ:15:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQABS/VQABSQ:16:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQABS/VQABSQ:17:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQABS/VQABSQ:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQABS/VQABSQ:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQABS/VQABSQ:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQABS/VQABSQ:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQABS/VQABSQ:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VCOMBINE output: +VCOMBINE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCOMBINE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VCOMBINE:2:result_int32x2 [] = { 33333333, 33333333, } +VCOMBINE:3:result_int64x1 [] = { 3333333333333333, } +VCOMBINE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCOMBINE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCOMBINE:6:result_uint32x2 [] = { 33333333, 33333333, } +VCOMBINE:7:result_uint64x1 [] = { 3333333333333333, } +VCOMBINE:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VCOMBINE:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 11, 11, 11, 11, 11, 11, 11, 11, } +VCOMBINE:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 22, 22, 22, 22, } +VCOMBINE:11:result_int32x4 [] = { fffffff0, fffffff1, 33, 33, } +VCOMBINE:12:result_int64x2 [] = { fffffffffffffff0, 44, } +VCOMBINE:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, 55, 55, 55, 55, 55, 55, 55, 55, } +VCOMBINE:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, 66, 66, 66, 66, } +VCOMBINE:15:result_uint32x4 [] = { fffffff0, fffffff1, 77, 77, } +VCOMBINE:16:result_uint64x2 [] = { fffffffffffffff0, 88, } +VCOMBINE:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, 40533333 0x1.a66666p+1 3.3, 40533333 0x1.a66666p+1 3.3, } + +VMAX/VMAXQ output: +VMAX/VMAXQ:0:result_int8x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMAX/VMAXQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff3, } +VMAX/VMAXQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VMAX/VMAXQ:3:result_int64x1 [] = { 3333333333333333, } +VMAX/VMAXQ:4:result_uint8x8 [] = { f3, f3, f3, f3, f4, f5, f6, f7, } +VMAX/VMAXQ:5:result_uint16x4 [] = { fff1, fff1, fff2, fff3, } +VMAX/VMAXQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VMAX/VMAXQ:7:result_uint64x1 [] = { 3333333333333333, } +VMAX/VMAXQ:8:result_float32x2 [] = { c1780000 -0x1.fp+3 -15.5, c1700000 -0x1.ep+3 -15, } +VMAX/VMAXQ:9:result_int8x16 [] = { fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VMAX/VMAXQ:10:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMAX/VMAXQ:11:result_int32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff3, } +VMAX/VMAXQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMAX/VMAXQ:13:result_uint8x16 [] = { f9, f9, f9, f9, f9, f9, f9, f9, f9, f9, fa, fb, fc, fd, fe, ff, } +VMAX/VMAXQ:14:result_uint16x8 [] = { fff2, fff2, fff2, fff3, fff4, fff5, fff6, fff7, } +VMAX/VMAXQ:15:result_uint32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff3, } +VMAX/VMAXQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMAX/VMAXQ:17:result_float32x4 [] = { c1680000 -0x1.dp+3 -14.5, c1680000 -0x1.dp+3 -14.5, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } + +VMIN/VMINQ output: +VMIN/VMINQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, } +VMIN/VMINQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff2, } +VMIN/VMINQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VMIN/VMINQ:3:result_int64x1 [] = { 3333333333333333, } +VMIN/VMINQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f3, f3, f3, f3, } +VMIN/VMINQ:5:result_uint16x4 [] = { fff0, fff1, fff1, fff1, } +VMIN/VMINQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VMIN/VMINQ:7:result_uint64x1 [] = { 3333333333333333, } +VMIN/VMINQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1780000 -0x1.fp+3 -15.5, } +VMIN/VMINQ:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, } +VMIN/VMINQ:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, } +VMIN/VMINQ:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff1, } +VMIN/VMINQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMIN/VMINQ:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f9, f9, f9, f9, f9, f9, } +VMIN/VMINQ:14:result_uint16x8 [] = { fff0, fff1, fff2, fff2, fff2, fff2, fff2, fff2, } +VMIN/VMINQ:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff1, } +VMIN/VMINQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMIN/VMINQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1680000 -0x1.dp+3 -14.5, c1680000 -0x1.dp+3 -14.5, } + +VNEG/VNEGQ output: +VNEG/VNEGQ:0:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VNEG/VNEGQ:1:result_int16x4 [] = { 10, f, e, d, } +VNEG/VNEGQ:2:result_int32x2 [] = { 10, f, } +VNEG/VNEGQ:3:result_int64x1 [] = { 3333333333333333, } +VNEG/VNEGQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VNEG/VNEGQ:7:result_uint64x1 [] = { 3333333333333333, } +VNEG/VNEGQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VNEG/VNEGQ:9:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VNEG/VNEGQ:10:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VNEG/VNEGQ:11:result_int32x4 [] = { 10, f, e, d, } +VNEG/VNEGQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VNEG/VNEGQ:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VNEG/VNEGQ:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VNEG/VNEGQ:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VNEG/VNEGQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VNEG/VNEGQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +float32: +VNEG/VNEGQ:18:result_float32x2 [] = { c0133333 -0x1.266666p+1 -2.3, c0133333 -0x1.266666p+1 -2.3, } +VNEG/VNEGQ:19:result_float32x4 [] = { c059999a -0x1.b33334p+1 -3.4, c059999a -0x1.b33334p+1 -3.4, c059999a -0x1.b33334p+1 -3.4, c059999a -0x1.b33334p+1 -3.4, } + +VQNEG/VQNEGQ overflow output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon overflow 0 +VQNEG/VQNEGQ:1:vqneg_s16 Neon overflow 0 +VQNEG/VQNEGQ:2:vqneg_s32 Neon overflow 0 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon overflow 0 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon overflow 0 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon overflow 0 + +VQNEG/VQNEGQ output: +VQNEG/VQNEGQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9, } +VQNEG/VQNEGQ:7:result_int16x4 [] = { 10, f, e, d, } +VQNEG/VQNEGQ:8:result_int32x2 [] = { 10, f, } +VQNEG/VQNEGQ:9:result_int64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:13:result_uint64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQNEG/VQNEGQ:15:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, } +VQNEG/VQNEGQ:16:result_int16x8 [] = { 10, f, e, d, c, b, a, 9, } +VQNEG/VQNEGQ:17:result_int32x4 [] = { 10, f, e, d, } +VQNEG/VQNEGQ:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQNEG/VQNEGQ:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQNEG/VQNEGQ overflow output: +VQNEG/VQNEGQ:0:vqneg_s8 Neon overflow 1 +VQNEG/VQNEGQ:1:vqneg_s16 Neon overflow 1 +VQNEG/VQNEGQ:2:vqneg_s32 Neon overflow 1 +VQNEG/VQNEGQ:3:vqnegq_s8 Neon overflow 1 +VQNEG/VQNEGQ:4:vqnegq_s16 Neon overflow 1 +VQNEG/VQNEGQ:5:vqnegq_s32 Neon overflow 1 + +VQNEG/VQNEGQ output: +VQNEG/VQNEGQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQNEG/VQNEGQ:7:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQNEG/VQNEGQ:8:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQNEG/VQNEGQ:9:result_int64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:12:result_uint32x2 [] = { 33333333, 33333333, } +VQNEG/VQNEGQ:13:result_uint64x1 [] = { 3333333333333333, } +VQNEG/VQNEGQ:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQNEG/VQNEGQ:15:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQNEG/VQNEGQ:16:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQNEG/VQNEGQ:17:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQNEG/VQNEGQ:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQNEG/VQNEGQ:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQNEG/VQNEGQ:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQNEG/VQNEGQ:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQNEG/VQNEGQ:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VMLAL output: +VMLAL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL:2:result_int32x2 [] = { 33333333, 33333333, } +VMLAL:3:result_int64x1 [] = { 3333333333333333, } +VMLAL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLAL:7:result_uint64x1 [] = { 3333333333333333, } +VMLAL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VMLAL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:10:result_int16x8 [] = { ffffe907, ffffe908, ffffe909, ffffe90a, ffffe90b, ffffe90c, ffffe90d, ffffe90e, } +VMLAL:11:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL:12:result_int64x2 [] = { 3e07, 3e08, } +VMLAL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL:14:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLAL:15:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL:16:result_uint64x2 [] = { 3e07, 3e08, } +VMLAL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VMLSL output: +VMLSL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL:2:result_int32x2 [] = { 33333333, 33333333, } +VMLSL:3:result_int64x1 [] = { 3333333333333333, } +VMLSL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLSL:7:result_uint64x1 [] = { 3333333333333333, } +VMLSL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VMLSL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:10:result_int16x8 [] = { 16d9, 16da, 16db, 16dc, 16dd, 16de, 16df, 16e0, } +VMLSL:11:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL:12:result_int64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL:14:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0, } +VMLSL:15:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL:16:result_uint64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VMLAL_LANE output: +VMLAL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_LANE:2:result_int32x2 [] = { 33333333, 33333333, } +VMLAL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLAL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_LANE:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLAL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLAL_LANE:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VMLAL_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_LANE:11:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL_LANE:12:result_int64x2 [] = { 3e07, 3e08, } +VMLAL_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_LANE:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_LANE:15:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLAL_LANE:16:result_uint64x2 [] = { 3e07, 3e08, } +VMLAL_LANE:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VMLSL_LANE output: +VMLSL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_LANE:2:result_int32x2 [] = { 33333333, 33333333, } +VMLSL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLSL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_LANE:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLSL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLSL_LANE:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VMLSL_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_LANE:11:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL_LANE:12:result_int64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_LANE:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_LANE:15:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLSL_LANE:16:result_uint64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da, } +VMLSL_LANE:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VMLAL_N output: +VMLAL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_N:2:result_int32x2 [] = { 33333333, 33333333, } +VMLAL_N:3:result_int64x1 [] = { 3333333333333333, } +VMLAL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLAL_N:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLAL_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLAL_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VMLAL_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_N:11:result_int32x4 [] = { 595, 596, 597, 598, } +VMLAL_N:12:result_int64x2 [] = { b3a, b3b, } +VMLAL_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLAL_N:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLAL_N:15:result_uint32x4 [] = { 10df, 10e0, 10e1, 10e2, } +VMLAL_N:16:result_uint64x2 [] = { 10df, 10e0, } +VMLAL_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VMLSL_N output: +VMLSL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_N:2:result_int32x2 [] = { 33333333, 33333333, } +VMLSL_N:3:result_int64x1 [] = { 3333333333333333, } +VMLSL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMLSL_N:6:result_uint32x2 [] = { 33333333, 33333333, } +VMLSL_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLSL_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VMLSL_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_N:11:result_int32x4 [] = { fffffa4b, fffffa4c, fffffa4d, fffffa4e, } +VMLSL_N:12:result_int64x2 [] = { fffffffffffff4a6, fffffffffffff4a7, } +VMLSL_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLSL_N:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMLSL_N:15:result_uint32x4 [] = { ffffef01, ffffef02, ffffef03, ffffef04, } +VMLSL_N:16:result_uint64x2 [] = { ffffffffffffef01, ffffffffffffef02, } +VMLSL_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VMOVL output: +VMOVL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVL:2:result_int32x2 [] = { 33333333, 33333333, } +VMOVL:3:result_int64x1 [] = { 3333333333333333, } +VMOVL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMOVL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMOVL:7:result_uint64x1 [] = { 3333333333333333, } +VMOVL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VMOVL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMOVL:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VMOVL:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VMOVL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVL:14:result_uint16x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VMOVL:15:result_uint32x4 [] = { fff0, fff1, fff2, fff3, } +VMOVL:16:result_uint64x2 [] = { fffffff0, fffffff1, } +VMOVL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VMOVN output: +VMOVN:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VMOVN:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VMOVN:2:result_int32x2 [] = { fffffff0, fffffff1, } +VMOVN:3:result_int64x1 [] = { 3333333333333333, } +VMOVN:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VMOVN:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VMOVN:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VMOVN:7:result_uint64x1 [] = { 3333333333333333, } +VMOVN:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VMOVN:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVN:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMOVN:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMOVN:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMOVN:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMOVN:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VMOVN:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMOVN:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VMULL output: +VMULL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL:2:result_int32x2 [] = { 33333333, 33333333, } +VMULL:3:result_int64x1 [] = { 3333333333333333, } +VMULL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL:6:result_uint32x2 [] = { 33333333, 33333333, } +VMULL:7:result_uint64x1 [] = { 3333333333333333, } +VMULL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VMULL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:10:result_int16x8 [] = { 100, e1, c4, a9, 90, 79, 64, 51, } +VMULL:11:result_int32x4 [] = { 100, e1, c4, a9, } +VMULL:12:result_int64x2 [] = { 100, e1, } +VMULL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL:14:result_uint16x8 [] = { e100, e2e1, e4c4, e6a9, e890, ea79, ec64, ee51, } +VMULL:15:result_uint32x4 [] = { ffe00100, ffe200e1, ffe400c4, ffe600a9, } +VMULL:16:result_uint64x2 [] = { ffffffe000000100, ffffffe2000000e1, } +VMULL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VMULL_LANE output: +VMULL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL_LANE:2:result_int32x2 [] = { 33333333, 33333333, } +VMULL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMULL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VMULL_LANE:6:result_uint32x2 [] = { 33333333, 33333333, } +VMULL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMULL_LANE:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VMULL_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMULL_LANE:11:result_int32x4 [] = { 4000, 4000, 4000, 4000, } +VMULL_LANE:12:result_int64x2 [] = { 2000, 2000, } +VMULL_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMULL_LANE:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VMULL_LANE:15:result_uint32x4 [] = { 4000, 4000, 4000, 4000, } +VMULL_LANE:16:result_uint64x2 [] = { 2000, 2000, } +VMULL_LANE:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VREV16 output: +VREV16:0:result_int8x8 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, } +VREV16:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VREV16:2:result_int32x2 [] = { 33333333, 33333333, } +VREV16:3:result_int64x1 [] = { 3333333333333333, } +VREV16:4:result_uint8x8 [] = { f1, f0, f3, f2, f5, f4, f7, f6, } +VREV16:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VREV16:6:result_uint32x2 [] = { 33333333, 33333333, } +VREV16:7:result_uint64x1 [] = { 3333333333333333, } +VREV16:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VREV16:9:result_int8x16 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, fffffff9, fffffff8, fffffffb, fffffffa, fffffffd, fffffffc, ffffffff, fffffffe, } +VREV16:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VREV16:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV16:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VREV16:13:result_uint8x16 [] = { f1, f0, f3, f2, f5, f4, f7, f6, f9, f8, fb, fa, fd, fc, ff, fe, } +VREV16:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VREV16:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV16:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VREV16:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VREV32 output: +VREV32:18:result_int8x8 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, } +VREV32:19:result_int16x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2, } +VREV32:20:result_int32x2 [] = { 33333333, 33333333, } +VREV32:21:result_int64x1 [] = { 3333333333333333, } +VREV32:22:result_uint8x8 [] = { f3, f2, f1, f0, f7, f6, f5, f4, } +VREV32:23:result_uint16x4 [] = { fff1, fff0, fff3, fff2, } +VREV32:24:result_uint32x2 [] = { 33333333, 33333333, } +VREV32:25:result_uint64x1 [] = { 3333333333333333, } +VREV32:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VREV32:27:result_int8x16 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, fffffffb, fffffffa, fffffff9, fffffff8, ffffffff, fffffffe, fffffffd, fffffffc, } +VREV32:28:result_int16x8 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, } +VREV32:29:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV32:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VREV32:31:result_uint8x16 [] = { f3, f2, f1, f0, f7, f6, f5, f4, fb, fa, f9, f8, ff, fe, fd, fc, } +VREV32:32:result_uint16x8 [] = { fff1, fff0, fff3, fff2, fff5, fff4, fff7, fff6, } +VREV32:33:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VREV32:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VREV32:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VREV64 output: +VREV64:36:result_int8x8 [] = { fffffff7, fffffff6, fffffff5, fffffff4, fffffff3, fffffff2, fffffff1, fffffff0, } +VREV64:37:result_int16x4 [] = { fffffff3, fffffff2, fffffff1, fffffff0, } +VREV64:38:result_int32x2 [] = { fffffff1, fffffff0, } +VREV64:39:result_int64x1 [] = { 3333333333333333, } +VREV64:40:result_uint8x8 [] = { f7, f6, f5, f4, f3, f2, f1, f0, } +VREV64:41:result_uint16x4 [] = { fff3, fff2, fff1, fff0, } +VREV64:42:result_uint32x2 [] = { fffffff1, fffffff0, } +VREV64:43:result_uint64x1 [] = { 3333333333333333, } +VREV64:44:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1800000 -0x1p+4 -16, } +VREV64:45:result_int8x16 [] = { fffffff7, fffffff6, fffffff5, fffffff4, fffffff3, fffffff2, fffffff1, fffffff0, ffffffff, fffffffe, fffffffd, fffffffc, fffffffb, fffffffa, fffffff9, fffffff8, } +VREV64:46:result_int16x8 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, } +VREV64:47:result_int32x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2, } +VREV64:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VREV64:49:result_uint8x16 [] = { f7, f6, f5, f4, f3, f2, f1, f0, ff, fe, fd, fc, fb, fa, f9, f8, } +VREV64:50:result_uint16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4, } +VREV64:51:result_uint32x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2, } +VREV64:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VREV64:53:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, c1800000 -0x1p+4 -16, c1500000 -0x1.ap+3 -13, c1600000 -0x1.cp+3 -14, } + +VSRA_N output: +VSRA_N:0:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VSRA_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRA_N:2:result_int32x2 [] = { fffffffc, fffffffd, } +VSRA_N:3:result_int64x1 [] = { fffffffffffffff0, } +VSRA_N:4:result_uint8x8 [] = { 5, 6, 7, 8, 9, a, b, c, } +VSRA_N:5:result_uint16x4 [] = { fffc, fffd, fffe, ffff, } +VSRA_N:6:result_uint32x2 [] = { fffffff3, fffffff4, } +VSRA_N:7:result_uint64x1 [] = { fffffffffffffff0, } +VSRA_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSRA_N:9:result_int8x16 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, } +VSRA_N:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VSRA_N:11:result_int32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VSRA_N:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRA_N:13:result_uint8x16 [] = { 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, 11, 12, 13, 14, } +VSRA_N:14:result_uint16x8 [] = { fffc, fffd, fffe, ffff, 0, 1, 2, 3, } +VSRA_N:15:result_uint32x4 [] = { fffffff3, fffffff4, fffffff5, fffffff6, } +VSRA_N:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRA_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VTRN/VTRNQ chunk 0 output: +VTRN/VTRNQ:0:result_int8x8 [] = { fffffff0, fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, } +VTRN/VTRNQ:1:result_int16x4 [] = { fffffff0, fffffff1, 22, 22, } +VTRN/VTRNQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VTRN/VTRNQ:3:result_int64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:4:result_uint8x8 [] = { f0, f1, 55, 55, f2, f3, 55, 55, } +VTRN/VTRNQ:5:result_uint16x4 [] = { fff0, fff1, 66, 66, } +VTRN/VTRNQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VTRN/VTRNQ:7:result_uint64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VTRN/VTRNQ:9:result_int8x16 [] = { fffffff0, fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, } +VTRN/VTRNQ:10:result_int16x8 [] = { fffffff0, fffffff1, 22, 22, fffffff2, fffffff3, 22, 22, } +VTRN/VTRNQ:11:result_int32x4 [] = { fffffff0, fffffff1, 33, 33, } +VTRN/VTRNQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:13:result_uint8x16 [] = { f0, f1, 55, 55, f2, f3, 55, 55, f4, f5, 55, 55, f6, f7, 55, 55, } +VTRN/VTRNQ:14:result_uint16x8 [] = { fff0, fff1, 66, 66, fff2, fff3, 66, 66, } +VTRN/VTRNQ:15:result_uint32x4 [] = { fffffff0, fffffff1, 77, 77, } +VTRN/VTRNQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, 42073333 0x1.0e6666p+5 33.8, 42073333 0x1.0e6666p+5 33.8, } + +VTRN/VTRNQ chunk 1 output: +VTRN/VTRNQ:18:result_int8x8 [] = { fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4, } +VTRN/VTRNQ:19:result_int16x4 [] = { fffffff1, 22, 22, fffffff2, } +VTRN/VTRNQ:20:result_int32x2 [] = { fffffff1, 33, } +VTRN/VTRNQ:21:result_int64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:22:result_uint8x8 [] = { f1, 55, 55, f2, f3, 55, 55, f4, } +VTRN/VTRNQ:23:result_uint16x4 [] = { fff1, 66, 66, fff2, } +VTRN/VTRNQ:24:result_uint32x2 [] = { fffffff1, 77, } +VTRN/VTRNQ:25:result_uint64x1 [] = { 3333333333333333, } +VTRN/VTRNQ:26:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, 42066666 0x1.0cccccp+5 33.6, } +VTRN/VTRNQ:27:result_int8x16 [] = { fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, fffffff8, } +VTRN/VTRNQ:28:result_int16x8 [] = { fffffff1, 22, 22, fffffff2, fffffff3, 22, 22, fffffff4, } +VTRN/VTRNQ:29:result_int32x4 [] = { fffffff1, 33, 33, fffffff2, } +VTRN/VTRNQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:31:result_uint8x16 [] = { f1, 55, 55, f2, f3, 55, 55, f4, f5, 55, 55, f6, f7, 55, 55, f8, } +VTRN/VTRNQ:32:result_uint16x8 [] = { fff1, 66, 66, fff2, fff3, 66, 66, fff4, } +VTRN/VTRNQ:33:result_uint32x4 [] = { fffffff1, 77, 77, fffffff2, } +VTRN/VTRNQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTRN/VTRNQ:35:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, 42073333 0x1.0e6666p+5 33.8, 42073333 0x1.0e6666p+5 33.8, c1600000 -0x1.cp+3 -14, } + +VUZP/VUZPQ chunk 0 output: +VUZP/VUZPQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VUZP/VUZPQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VUZP/VUZPQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VUZP/VUZPQ:3:result_int64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VUZP/VUZPQ:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VUZP/VUZPQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VUZP/VUZPQ:7:result_uint64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VUZP/VUZPQ:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VUZP/VUZPQ:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VUZP/VUZPQ:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VUZP/VUZPQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VUZP/VUZPQ:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VUZP/VUZPQ:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VUZP/VUZPQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } + +VUZP/VUZPQ chunk 1 output: +VUZP/VUZPQ:18:result_int8x8 [] = { fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 11, } +VUZP/VUZPQ:19:result_int16x4 [] = { fffffff1, fffffff2, fffffff3, 22, } +VUZP/VUZPQ:20:result_int32x2 [] = { fffffff1, 33, } +VUZP/VUZPQ:21:result_int64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:22:result_uint8x8 [] = { f1, f2, f3, f4, f5, f6, f7, 55, } +VUZP/VUZPQ:23:result_uint16x4 [] = { fff1, fff2, fff3, 66, } +VUZP/VUZPQ:24:result_uint32x2 [] = { fffffff1, 77, } +VUZP/VUZPQ:25:result_uint64x1 [] = { 3333333333333333, } +VUZP/VUZPQ:26:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, 42066666 0x1.0cccccp+5 33.6, } +VUZP/VUZPQ:27:result_int8x16 [] = { fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 11, } +VUZP/VUZPQ:28:result_int16x8 [] = { fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 22, } +VUZP/VUZPQ:29:result_int32x4 [] = { fffffff1, fffffff2, fffffff3, 33, } +VUZP/VUZPQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:31:result_uint8x16 [] = { f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, 55, } +VUZP/VUZPQ:32:result_uint16x8 [] = { fff1, fff2, fff3, fff4, fff5, fff6, fff7, 66, } +VUZP/VUZPQ:33:result_uint32x4 [] = { fffffff1, fffffff2, fffffff3, 77, } +VUZP/VUZPQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VUZP/VUZPQ:35:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, 42073333 0x1.0e6666p+5 33.8, } + +VZIP/VZIPQ chunk 0 output: +VZIP/VZIPQ:0:result_int8x8 [] = { fffffff0, fffffff4, 11, 11, fffffff1, fffffff5, 11, 11, } +VZIP/VZIPQ:1:result_int16x4 [] = { fffffff0, fffffff2, 22, 22, } +VZIP/VZIPQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VZIP/VZIPQ:3:result_int64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:4:result_uint8x8 [] = { f0, f4, 55, 55, f1, f5, 55, 55, } +VZIP/VZIPQ:5:result_uint16x4 [] = { fff0, fff2, 66, 66, } +VZIP/VZIPQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VZIP/VZIPQ:7:result_uint64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VZIP/VZIPQ:9:result_int8x16 [] = { fffffff0, fffffff8, 11, 11, fffffff1, fffffff9, 11, 11, fffffff2, fffffffa, 11, 11, fffffff3, fffffffb, 11, 11, } +VZIP/VZIPQ:10:result_int16x8 [] = { fffffff0, fffffff4, 22, 22, fffffff1, fffffff5, 22, 22, } +VZIP/VZIPQ:11:result_int32x4 [] = { fffffff0, fffffff2, 33, 33, } +VZIP/VZIPQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:13:result_uint8x16 [] = { f0, f8, 55, 55, f1, f9, 55, 55, f2, fa, 55, 55, f3, fb, 55, 55, } +VZIP/VZIPQ:14:result_uint16x8 [] = { fff0, fff4, 66, 66, fff1, fff5, 66, 66, } +VZIP/VZIPQ:15:result_uint32x4 [] = { fffffff0, fffffff2, 77, 77, } +VZIP/VZIPQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1600000 -0x1.cp+3 -14, 42073333 0x1.0e6666p+5 33.8, 42073333 0x1.0e6666p+5 33.8, } + +VZIP/VZIPQ chunk 1 output: +VZIP/VZIPQ:18:result_int8x8 [] = { fffffff4, 11, 11, fffffff1, fffffff5, 11, 11, fffffff2, } +VZIP/VZIPQ:19:result_int16x4 [] = { fffffff2, 22, 22, fffffff1, } +VZIP/VZIPQ:20:result_int32x2 [] = { fffffff1, 33, } +VZIP/VZIPQ:21:result_int64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:22:result_uint8x8 [] = { f4, 55, 55, f1, f5, 55, 55, f2, } +VZIP/VZIPQ:23:result_uint16x4 [] = { fff2, 66, 66, fff1, } +VZIP/VZIPQ:24:result_uint32x2 [] = { fffffff1, 77, } +VZIP/VZIPQ:25:result_uint64x1 [] = { 3333333333333333, } +VZIP/VZIPQ:26:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, 42066666 0x1.0cccccp+5 33.6, } +VZIP/VZIPQ:27:result_int8x16 [] = { fffffff8, 11, 11, fffffff1, fffffff9, 11, 11, fffffff2, fffffffa, 11, 11, fffffff3, fffffffb, 11, 11, fffffff4, } +VZIP/VZIPQ:28:result_int16x8 [] = { fffffff4, 22, 22, fffffff1, fffffff5, 22, 22, fffffff2, } +VZIP/VZIPQ:29:result_int32x4 [] = { fffffff2, 33, 33, fffffff1, } +VZIP/VZIPQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:31:result_uint8x16 [] = { f8, 55, 55, f1, f9, 55, 55, f2, fa, 55, 55, f3, fb, 55, 55, f4, } +VZIP/VZIPQ:32:result_uint16x8 [] = { fff4, 66, 66, fff1, fff5, 66, 66, fff2, } +VZIP/VZIPQ:33:result_uint32x4 [] = { fffffff2, 77, 77, fffffff1, } +VZIP/VZIPQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VZIP/VZIPQ:35:result_float32x4 [] = { c1600000 -0x1.cp+3 -14, 42073333 0x1.0e6666p+5 33.8, 42073333 0x1.0e6666p+5 33.8, c1700000 -0x1.ep+3 -15, } + +VREINTERPRET/VREINTERPRETQ output: +VREINTERPRET/VREINTERPRETQ:0:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:1:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:2:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:3:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:4:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:5:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:6:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:7:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, } +VREINTERPRET/VREINTERPRETQ:8:result_int16x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:9:result_int16x4 [] = { fffffff0, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:10:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, } +VREINTERPRET/VREINTERPRETQ:11:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:12:result_int16x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:13:result_int16x4 [] = { fffffff0, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:14:result_int32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:15:result_int32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:16:result_int32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:17:result_int32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:18:result_int32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:19:result_int32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:20:result_int32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:21:result_int64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:22:result_int64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:23:result_int64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:24:result_int64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:25:result_int64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:26:result_int64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:27:result_int64x1 [] = { fffffffffffffff0, } +VREINTERPRET/VREINTERPRETQ:28:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VREINTERPRET/VREINTERPRETQ:29:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:30:result_uint8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:31:result_uint8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:32:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff, } +VREINTERPRET/VREINTERPRETQ:33:result_uint8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:34:result_uint8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:35:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:36:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VREINTERPRET/VREINTERPRETQ:37:result_uint16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:38:result_uint16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:39:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6, } +VREINTERPRET/VREINTERPRETQ:40:result_uint16x4 [] = { fff0, ffff, fff1, ffff, } +VREINTERPRET/VREINTERPRETQ:41:result_uint16x4 [] = { fff0, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:42:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:43:result_uint32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:44:result_uint32x2 [] = { fffffff0, fffffff1, } +VREINTERPRET/VREINTERPRETQ:45:result_uint32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:46:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4, } +VREINTERPRET/VREINTERPRETQ:47:result_uint32x2 [] = { fff1fff0, fff3fff2, } +VREINTERPRET/VREINTERPRETQ:48:result_uint32x2 [] = { fffffff0, ffffffff, } +VREINTERPRET/VREINTERPRETQ:49:result_uint64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:50:result_uint64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:51:result_uint64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:52:result_uint64x1 [] = { fffffffffffffff0, } +VREINTERPRET/VREINTERPRETQ:53:result_uint64x1 [] = { f7f6f5f4f3f2f1f0, } +VREINTERPRET/VREINTERPRETQ:54:result_uint64x1 [] = { fff3fff2fff1fff0, } +VREINTERPRET/VREINTERPRETQ:55:result_uint64x1 [] = { fffffff1fffffff0, } +VREINTERPRET/VREINTERPRETQ:56:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VREINTERPRET/VREINTERPRETQ:57:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, fffffff2, ffffffff, ffffffff, ffffffff, fffffff3, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:58:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:59:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VREINTERPRET/VREINTERPRETQ:60:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, } +VREINTERPRET/VREINTERPRETQ:61:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, fffffff2, ffffffff, ffffffff, ffffffff, fffffff3, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:62:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:63:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe, } +VREINTERPRET/VREINTERPRETQ:64:result_int16x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:65:result_int16x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:66:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe, } +VREINTERPRET/VREINTERPRETQ:67:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VREINTERPRET/VREINTERPRETQ:68:result_int16x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, } +VREINTERPRET/VREINTERPRETQ:69:result_int16x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, } +VREINTERPRET/VREINTERPRETQ:70:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:71:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:72:result_int32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:73:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:74:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:75:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:76:result_int32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:77:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:78:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:79:result_int64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:80:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:81:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:82:result_int64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:83:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VREINTERPRET/VREINTERPRETQ:84:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:85:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VREINTERPRET/VREINTERPRETQ:86:result_uint16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff, } +VREINTERPRET/VREINTERPRETQ:87:result_uint16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:88:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe, } +VREINTERPRET/VREINTERPRETQ:89:result_uint16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff, } +VREINTERPRET/VREINTERPRETQ:90:result_uint16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff, } +VREINTERPRET/VREINTERPRETQ:91:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:92:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:93:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VREINTERPRET/VREINTERPRETQ:94:result_uint32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:95:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc, } +VREINTERPRET/VREINTERPRETQ:96:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6, } +VREINTERPRET/VREINTERPRETQ:97:result_uint32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff, } +VREINTERPRET/VREINTERPRETQ:98:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:99:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:100:result_uint64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:101:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VREINTERPRET/VREINTERPRETQ:102:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8, } +VREINTERPRET/VREINTERPRETQ:103:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4, } +VREINTERPRET/VREINTERPRETQ:104:result_uint64x2 [] = { fffffff1fffffff0, fffffff3fffffff2, } +VREINTERPRET/VREINTERPRETQ:105:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VREINTERPRET/VREINTERPRETQ:106:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VREINTERPRET/VREINTERPRETQ:107:result_uint8x16 [] = { f0, ff, ff, ff, f1, ff, ff, ff, f2, ff, ff, ff, f3, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:108:result_uint8x16 [] = { f0, ff, ff, ff, ff, ff, ff, ff, f1, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:109:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, } +VREINTERPRET/VREINTERPRETQ:110:result_uint8x16 [] = { f0, ff, ff, ff, f1, ff, ff, ff, f2, ff, ff, ff, f3, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:111:result_uint8x16 [] = { f0, ff, ff, ff, ff, ff, ff, ff, f1, ff, ff, ff, ff, ff, ff, ff, } +VREINTERPRET/VREINTERPRETQ:112:result_float32x2 [] = { f3f2f1f0 -0x1.e5e3ep+104 -3.84962e+31, f7f6f5f4 -0x1.edebe8p+112 -1.00179e+34, } +VREINTERPRET/VREINTERPRETQ:113:result_float32x2 [] = { fff1fff0 nan nan, fff3fff2 nan nan, } +VREINTERPRET/VREINTERPRETQ:114:result_float32x2 [] = { fffffff0 nan nan, fffffff1 nan nan, } +VREINTERPRET/VREINTERPRETQ:115:result_float32x2 [] = { fffffff0 nan nan, ffffffff nan nan, } +VREINTERPRET/VREINTERPRETQ:116:result_float32x2 [] = { f3f2f1f0 -0x1.e5e3ep+104 -3.84962e+31, f7f6f5f4 -0x1.edebe8p+112 -1.00179e+34, } +VREINTERPRET/VREINTERPRETQ:117:result_float32x2 [] = { fff1fff0 nan nan, fff3fff2 nan nan, } +VREINTERPRET/VREINTERPRETQ:118:result_float32x2 [] = { fffffff0 nan nan, fffffff1 nan nan, } +VREINTERPRET/VREINTERPRETQ:119:result_float32x2 [] = { fffffff0 nan nan, ffffffff nan nan, } +VREINTERPRET/VREINTERPRETQ:120:result_float32x4 [] = { f3f2f1f0 -0x1.e5e3ep+104 -3.84962e+31, f7f6f5f4 -0x1.edebe8p+112 -1.00179e+34, fbfaf9f8 -0x1.f5f3fp+120 -2.60629e+36, fffefdfc nan nan, } +VREINTERPRET/VREINTERPRETQ:121:result_float32x4 [] = { fff1fff0 nan nan, fff3fff2 nan nan, fff5fff4 nan nan, fff7fff6 nan nan, } +VREINTERPRET/VREINTERPRETQ:122:result_float32x4 [] = { fffffff0 nan nan, fffffff1 nan nan, fffffff2 nan nan, fffffff3 nan nan, } +VREINTERPRET/VREINTERPRETQ:123:result_float32x4 [] = { fffffff0 nan nan, ffffffff nan nan, fffffff1 nan nan, ffffffff nan nan, } +VREINTERPRET/VREINTERPRETQ:124:result_float32x4 [] = { f3f2f1f0 -0x1.e5e3ep+104 -3.84962e+31, f7f6f5f4 -0x1.edebe8p+112 -1.00179e+34, fbfaf9f8 -0x1.f5f3fp+120 -2.60629e+36, fffefdfc nan nan, } +VREINTERPRET/VREINTERPRETQ:125:result_float32x4 [] = { fff1fff0 nan nan, fff3fff2 nan nan, fff5fff4 nan nan, fff7fff6 nan nan, } +VREINTERPRET/VREINTERPRETQ:126:result_float32x4 [] = { fffffff0 nan nan, fffffff1 nan nan, fffffff2 nan nan, fffffff3 nan nan, } +VREINTERPRET/VREINTERPRETQ:127:result_float32x4 [] = { fffffff0 nan nan, ffffffff nan nan, fffffff1 nan nan, ffffffff nan nan, } +VREINTERPRET/VREINTERPRETQ:128:result_int8x8 [] = { 0, 0, ffffff80, ffffffc1, 0, 0, 70, ffffffc1, } +VREINTERPRET/VREINTERPRETQ:129:result_int16x4 [] = { 0, ffffc180, 0, ffffc170, } +VREINTERPRET/VREINTERPRETQ:130:result_int32x2 [] = { c1800000, c1700000, } +VREINTERPRET/VREINTERPRETQ:131:result_int64x1 [] = { c1700000c1800000, } +VREINTERPRET/VREINTERPRETQ:132:result_uint8x8 [] = { 0, 0, 80, c1, 0, 0, 70, c1, } +VREINTERPRET/VREINTERPRETQ:133:result_uint16x4 [] = { 0, c180, 0, c170, } +VREINTERPRET/VREINTERPRETQ:134:result_uint32x2 [] = { c1800000, c1700000, } +VREINTERPRET/VREINTERPRETQ:135:result_uint64x1 [] = { c1700000c1800000, } +VREINTERPRET/VREINTERPRETQ:136:result_int8x16 [] = { 0, 0, ffffff80, ffffffc1, 0, 0, 70, ffffffc1, 0, 0, 60, ffffffc1, 0, 0, 50, ffffffc1, } +VREINTERPRET/VREINTERPRETQ:137:result_int16x8 [] = { 0, ffffc180, 0, ffffc170, 0, ffffc160, 0, ffffc150, } +VREINTERPRET/VREINTERPRETQ:138:result_int32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VREINTERPRET/VREINTERPRETQ:139:result_int64x2 [] = { c1700000c1800000, c1500000c1600000, } +VREINTERPRET/VREINTERPRETQ:140:result_uint8x16 [] = { 0, 0, 80, c1, 0, 0, 70, c1, 0, 0, 60, c1, 0, 0, 50, c1, } +VREINTERPRET/VREINTERPRETQ:141:result_uint16x8 [] = { 0, c180, 0, c170, 0, c160, 0, c150, } +VREINTERPRET/VREINTERPRETQ:142:result_uint32x4 [] = { c1800000, c1700000, c1600000, c1500000, } +VREINTERPRET/VREINTERPRETQ:143:result_uint64x2 [] = { c1700000c1800000, c1500000c1600000, } + +VQRDMULH overflow output: +VQRDMULH:0:vqrdmulh_s16 Neon overflow 0 +VQRDMULH:1:vqrdmulh_s32 Neon overflow 0 +VQRDMULH:2:vqrdmulhq_s16 Neon overflow 0 +VQRDMULH:3:vqrdmulhq_s32 Neon overflow 0 + +VQRDMULH output: +VQRDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:5:result_int16x4 [] = { fffffff5, fffffff6, fffffff7, fffffff7, } +VQRDMULH:6:result_int32x2 [] = { 0, 0, } +VQRDMULH:7:result_int64x1 [] = { 3333333333333333, } +VQRDMULH:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:10:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH:11:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH:12:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRDMULH:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:14:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRDMULH:15:result_int32x4 [] = { 0, 0, 0, 0, } +VQRDMULH:16:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:21:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRDMULH (check mul overflow) overflow output: +VQRDMULH:22:vqrdmulh_s16 Neon overflow 1 +VQRDMULH:23:vqrdmulh_s32 Neon overflow 1 +VQRDMULH:24:vqrdmulhq_s16 Neon overflow 1 +VQRDMULH:25:vqrdmulhq_s32 Neon overflow 1 + +VQRDMULH (check mul overflow) output: +VQRDMULH:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:27:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:28:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH:29:result_int64x1 [] = { 3333333333333333, } +VQRDMULH:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:32:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH:33:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH:34:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRDMULH:35:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:36:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:37:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH:38:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:39:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:40:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:41:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:42:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:43:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRDMULH (check rounding overflow) overflow output: +VQRDMULH:44:vqrdmulh_s16 Neon overflow 0 +VQRDMULH:45:vqrdmulh_s32 Neon overflow 0 +VQRDMULH:46:vqrdmulhq_s16 Neon overflow 0 +VQRDMULH:47:vqrdmulhq_s32 Neon overflow 0 + +VQRDMULH (check rounding overflow) output: +VQRDMULH:48:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:49:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:50:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH:51:result_int64x1 [] = { 3333333333333333, } +VQRDMULH:52:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:53:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH:54:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH:55:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH:56:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRDMULH:57:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:58:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH:59:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH:60:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:61:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH:62:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH:63:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH:64:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH:65:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRDMULH_LANE overflow output: +VQRDMULH_LANE:0:vqrdmulh_lane_s16 Neon overflow 0 +VQRDMULH_LANE:1:vqrdmulh_lane_s32 Neon overflow 0 +VQRDMULH_LANE:2:vqrdmulhq_lane_s16 Neon overflow 0 +VQRDMULH_LANE:3:vqrdmulhq_lane_s32 Neon overflow 0 + +VQRDMULH_LANE output: +VQRDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:5:result_int16x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:6:result_int32x2 [] = { 0, 0, } +VQRDMULH_LANE:7:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:10:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:11:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:12:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRDMULH_LANE:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:14:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRDMULH_LANE:15:result_int32x4 [] = { 0, 0, 0, 0, } +VQRDMULH_LANE:16:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:21:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRDMULH_LANE (check mul overflow) overflow output: +VQRDMULH_LANE:22:vqrdmulh_lane_s16 Neon overflow 1 +VQRDMULH_LANE:23:vqrdmulh_lane_s32 Neon overflow 1 +VQRDMULH_LANE:24:vqrdmulhq_lane_s16 Neon overflow 1 +VQRDMULH_LANE:25:vqrdmulhq_lane_s32 Neon overflow 1 + +VQRDMULH_LANE (check mul overflow) output: +VQRDMULH_LANE:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:27:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:28:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_LANE:29:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:32:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:33:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:34:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRDMULH_LANE:35:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:36:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:37:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_LANE:38:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:39:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:40:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:41:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:42:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:43:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRDMULH_LANE (check rounding overflow) overflow output: +VQRDMULH_LANE:44:vqrdmulh_lane_s16 Neon overflow 0 +VQRDMULH_LANE:45:vqrdmulh_lane_s32 Neon overflow 0 +VQRDMULH_LANE:46:vqrdmulhq_lane_s16 Neon overflow 0 +VQRDMULH_LANE:47:vqrdmulhq_lane_s32 Neon overflow 0 + +VQRDMULH_LANE (check rounding overflow) output: +VQRDMULH_LANE:48:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:49:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:50:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_LANE:51:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:52:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:53:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:54:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_LANE:55:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_LANE:56:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRDMULH_LANE:57:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:58:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_LANE:59:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_LANE:60:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:61:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_LANE:62:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_LANE:63:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_LANE:64:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_LANE:65:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRDMULH_N overflow output: +VQRDMULH_N:0:vqrdmulh_n_s16 Neon overflow 0 +VQRDMULH_N:1:vqrdmulh_n_s32 Neon overflow 0 +VQRDMULH_N:2:vqrdmulhq_n_s16 Neon overflow 0 +VQRDMULH_N:3:vqrdmulhq_n_s32 Neon overflow 0 + +VQRDMULH_N output: +VQRDMULH_N:4:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffd, } +VQRDMULH_N:5:result_int32x2 [] = { fffffffe, fffffffe, } +VQRDMULH_N:6:result_int16x8 [] = { 6, 6, 6, 5, 5, 4, 4, 4, } +VQRDMULH_N:7:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } + +VQRDMULH_N (check mul overflow) overflow output: +VQRDMULH_N:8:vqrdmulh_n_s16 Neon overflow 1 +VQRDMULH_N:9:vqrdmulh_n_s32 Neon overflow 1 +VQRDMULH_N:10:vqrdmulhq_n_s16 Neon overflow 1 +VQRDMULH_N:11:vqrdmulhq_n_s32 Neon overflow 1 + +VQRDMULH_N (check mul overflow) output: +VQRDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_N:15:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_N:16:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:17:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:18:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:19:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_N:20:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRDMULH_N:21:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:22:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:23:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_N:24:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:25:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:26:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:27:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_N:28:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:29:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRDMULH_N (check rounding overflow) overflow output: +VQRDMULH_N:30:vqrdmulh_n_s16 Neon overflow 0 +VQRDMULH_N:31:vqrdmulh_n_s32 Neon overflow 0 +VQRDMULH_N:32:vqrdmulhq_n_s16 Neon overflow 0 +VQRDMULH_N:33:vqrdmulhq_n_s32 Neon overflow 0 + +VQRDMULH_N (check rounding overflow) output: +VQRDMULH_N:34:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:35:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:36:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQRDMULH_N:37:result_int64x1 [] = { 3333333333333333, } +VQRDMULH_N:38:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:39:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VQRDMULH_N:40:result_uint32x2 [] = { 33333333, 33333333, } +VQRDMULH_N:41:result_uint64x1 [] = { 3333333333333333, } +VQRDMULH_N:42:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRDMULH_N:43:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:44:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, } +VQRDMULH_N:45:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } +VQRDMULH_N:46:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:47:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRDMULH_N:48:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRDMULH_N:49:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRDMULH_N:50:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRDMULH_N:51:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHL/VQRSHLQ (with input = 0) overflow output: +VQRSHL/VQRSHLQ:0:vqrshl_s8 Neon overflow 0 +VQRSHL/VQRSHLQ:1:vqrshl_s16 Neon overflow 0 +VQRSHL/VQRSHLQ:2:vqrshl_s32 Neon overflow 0 +VQRSHL/VQRSHLQ:3:vqrshl_s64 Neon overflow 0 +VQRSHL/VQRSHLQ:4:vqrshl_u8 Neon overflow 0 +VQRSHL/VQRSHLQ:5:vqrshl_u16 Neon overflow 0 +VQRSHL/VQRSHLQ:6:vqrshl_u32 Neon overflow 0 +VQRSHL/VQRSHLQ:7:vqrshl_u64 Neon overflow 0 +VQRSHL/VQRSHLQ:8:vqrshlq_s8 Neon overflow 0 +VQRSHL/VQRSHLQ:9:vqrshlq_s16 Neon overflow 0 +VQRSHL/VQRSHLQ:10:vqrshlq_s32 Neon overflow 0 +VQRSHL/VQRSHLQ:11:vqrshlq_s64 Neon overflow 0 +VQRSHL/VQRSHLQ:12:vqrshlq_u8 Neon overflow 0 +VQRSHL/VQRSHLQ:13:vqrshlq_u16 Neon overflow 0 +VQRSHL/VQRSHLQ:14:vqrshlq_u32 Neon overflow 0 +VQRSHL/VQRSHLQ:15:vqrshlq_u64 Neon overflow 0 + +VQRSHL/VQRSHLQ (with input = 0) output: +VQRSHL/VQRSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:17:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:18:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:19:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:20:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:21:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:22:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:23:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:24:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHL/VQRSHLQ:25:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:26:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:27:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:28:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:29:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:30:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:31:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:32:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:33:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHL/VQRSHLQ (input 0 and negative shift amount) overflow output: +VQRSHL/VQRSHLQ:34:vqrshl_s8 Neon overflow 0 +VQRSHL/VQRSHLQ:35:vqrshl_s16 Neon overflow 0 +VQRSHL/VQRSHLQ:36:vqrshl_s32 Neon overflow 0 +VQRSHL/VQRSHLQ:37:vqrshl_s64 Neon overflow 0 +VQRSHL/VQRSHLQ:38:vqrshl_u8 Neon overflow 0 +VQRSHL/VQRSHLQ:39:vqrshl_u16 Neon overflow 0 +VQRSHL/VQRSHLQ:40:vqrshl_u32 Neon overflow 0 +VQRSHL/VQRSHLQ:41:vqrshl_u64 Neon overflow 0 +VQRSHL/VQRSHLQ:42:vqrshlq_s8 Neon overflow 0 +VQRSHL/VQRSHLQ:43:vqrshlq_s16 Neon overflow 0 +VQRSHL/VQRSHLQ:44:vqrshlq_s32 Neon overflow 0 +VQRSHL/VQRSHLQ:45:vqrshlq_s64 Neon overflow 0 +VQRSHL/VQRSHLQ:46:vqrshlq_u8 Neon overflow 0 +VQRSHL/VQRSHLQ:47:vqrshlq_u16 Neon overflow 0 +VQRSHL/VQRSHLQ:48:vqrshlq_u32 Neon overflow 0 +VQRSHL/VQRSHLQ:49:vqrshlq_u64 Neon overflow 0 + +VQRSHL/VQRSHLQ (input 0 and negative shift amount) output: +VQRSHL/VQRSHLQ:50:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:51:result_int16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:52:result_int32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:53:result_int64x1 [] = { 0, } +VQRSHL/VQRSHLQ:54:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:55:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:56:result_uint32x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:57:result_uint64x1 [] = { 0, } +VQRSHL/VQRSHLQ:58:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHL/VQRSHLQ:59:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:60:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:61:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:62:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:63:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:64:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:65:result_uint32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:66:result_uint64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:67:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHL/VQRSHLQ overflow output: +VQRSHL/VQRSHLQ:68:vqrshl_s8 Neon overflow 0 +VQRSHL/VQRSHLQ:69:vqrshl_s16 Neon overflow 0 +VQRSHL/VQRSHLQ:70:vqrshl_s32 Neon overflow 0 +VQRSHL/VQRSHLQ:71:vqrshl_s64 Neon overflow 0 +VQRSHL/VQRSHLQ:72:vqrshl_u8 Neon overflow 1 +VQRSHL/VQRSHLQ:73:vqrshl_u16 Neon overflow 1 +VQRSHL/VQRSHLQ:74:vqrshl_u32 Neon overflow 1 +VQRSHL/VQRSHLQ:75:vqrshl_u64 Neon overflow 1 +VQRSHL/VQRSHLQ:76:vqrshlq_s8 Neon overflow 1 +VQRSHL/VQRSHLQ:77:vqrshlq_s16 Neon overflow 1 +VQRSHL/VQRSHLQ:78:vqrshlq_s32 Neon overflow 1 +VQRSHL/VQRSHLQ:79:vqrshlq_s64 Neon overflow 1 +VQRSHL/VQRSHLQ:80:vqrshlq_u8 Neon overflow 1 +VQRSHL/VQRSHLQ:81:vqrshlq_u16 Neon overflow 1 +VQRSHL/VQRSHLQ:82:vqrshlq_u32 Neon overflow 1 +VQRSHL/VQRSHLQ:83:vqrshlq_u64 Neon overflow 1 + +VQRSHL/VQRSHLQ output: +VQRSHL/VQRSHLQ:84:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VQRSHL/VQRSHLQ:85:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98, } +VQRSHL/VQRSHLQ:86:result_int32x2 [] = { fffff000, fffff100, } +VQRSHL/VQRSHLQ:87:result_int64x1 [] = { ffffffffffffff80, } +VQRSHL/VQRSHLQ:88:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:89:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:90:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:91:result_uint64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:92:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHL/VQRSHLQ:93:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, } +VQRSHL/VQRSHLQ:94:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, } +VQRSHL/VQRSHLQ:95:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQRSHL/VQRSHLQ:96:result_int64x2 [] = { 8000000000000000, 8000000000000000, } +VQRSHL/VQRSHLQ:97:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHL/VQRSHLQ:98:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQRSHL/VQRSHLQ:99:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQRSHL/VQRSHLQ:100:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQRSHL/VQRSHLQ:101:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHL/VQRSHLQ (negative shift amount) overflow output: +VQRSHL/VQRSHLQ:102:vqrshl_s8 Neon overflow 0 +VQRSHL/VQRSHLQ:103:vqrshl_s16 Neon overflow 0 +VQRSHL/VQRSHLQ:104:vqrshl_s32 Neon overflow 0 +VQRSHL/VQRSHLQ:105:vqrshl_s64 Neon overflow 0 +VQRSHL/VQRSHLQ:106:vqrshl_u8 Neon overflow 0 +VQRSHL/VQRSHLQ:107:vqrshl_u16 Neon overflow 0 +VQRSHL/VQRSHLQ:108:vqrshl_u32 Neon overflow 0 +VQRSHL/VQRSHLQ:109:vqrshl_u64 Neon overflow 0 +VQRSHL/VQRSHLQ:110:vqrshlq_s8 Neon overflow 0 +VQRSHL/VQRSHLQ:111:vqrshlq_s16 Neon overflow 0 +VQRSHL/VQRSHLQ:112:vqrshlq_s32 Neon overflow 0 +VQRSHL/VQRSHLQ:113:vqrshlq_s64 Neon overflow 0 +VQRSHL/VQRSHLQ:114:vqrshlq_u8 Neon overflow 0 +VQRSHL/VQRSHLQ:115:vqrshlq_u16 Neon overflow 0 +VQRSHL/VQRSHLQ:116:vqrshlq_u32 Neon overflow 0 +VQRSHL/VQRSHLQ:117:vqrshlq_u64 Neon overflow 0 + +VQRSHL/VQRSHLQ (negative shift amount) output: +VQRSHL/VQRSHLQ:118:result_int8x8 [] = { fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe, } +VQRSHL/VQRSHLQ:119:result_int16x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd, } +VQRSHL/VQRSHLQ:120:result_int32x2 [] = { fffffffe, fffffffe, } +VQRSHL/VQRSHLQ:121:result_int64x1 [] = { ffffffffffffffff, } +VQRSHL/VQRSHLQ:122:result_uint8x8 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, } +VQRSHL/VQRSHLQ:123:result_uint16x4 [] = { 3ffc, 3ffc, 3ffd, 3ffd, } +VQRSHL/VQRSHLQ:124:result_uint32x2 [] = { 1ffffffe, 1ffffffe, } +VQRSHL/VQRSHLQ:125:result_uint64x1 [] = { fffffffffffffff, } +VQRSHL/VQRSHLQ:126:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHL/VQRSHLQ:127:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:128:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:129:result_int32x4 [] = { 0, 0, 0, 0, } +VQRSHL/VQRSHLQ:130:result_int64x2 [] = { 0, 0, } +VQRSHL/VQRSHLQ:131:result_uint8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, } +VQRSHL/VQRSHLQ:132:result_uint16x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VQRSHL/VQRSHLQ:133:result_uint32x4 [] = { 80000, 80000, 80000, 80000, } +VQRSHL/VQRSHLQ:134:result_uint64x2 [] = { 100000000000, 100000000000, } +VQRSHL/VQRSHLQ:135:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHL/VQRSHLQ (checking overflow: shift by -1) overflow output: +VQRSHL/VQRSHLQ:136:vqrshl_s8 Neon overflow 0 +VQRSHL/VQRSHLQ:137:vqrshl_s16 Neon overflow 0 +VQRSHL/VQRSHLQ:138:vqrshl_s32 Neon overflow 0 +VQRSHL/VQRSHLQ:139:vqrshl_s64 Neon overflow 0 +VQRSHL/VQRSHLQ:140:vqrshl_u8 Neon overflow 0 +VQRSHL/VQRSHLQ:141:vqrshl_u16 Neon overflow 0 +VQRSHL/VQRSHLQ:142:vqrshl_u32 Neon overflow 0 +VQRSHL/VQRSHLQ:143:vqrshl_u64 Neon overflow 0 +VQRSHL/VQRSHLQ:144:vqrshlq_s8 Neon overflow 0 +VQRSHL/VQRSHLQ:145:vqrshlq_s16 Neon overflow 0 +VQRSHL/VQRSHLQ:146:vqrshlq_s32 Neon overflow 0 +VQRSHL/VQRSHLQ:147:vqrshlq_s64 Neon overflow 0 +VQRSHL/VQRSHLQ:148:vqrshlq_u8 Neon overflow 0 +VQRSHL/VQRSHLQ:149:vqrshlq_u16 Neon overflow 0 +VQRSHL/VQRSHLQ:150:vqrshlq_u32 Neon overflow 0 +VQRSHL/VQRSHLQ:151:vqrshlq_u64 Neon overflow 0 + +VQRSHL/VQRSHLQ (checking overflow: shift by -1) output: +VQRSHL/VQRSHLQ:152:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VQRSHL/VQRSHLQ:153:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VQRSHL/VQRSHLQ:154:result_int32x2 [] = { 40000000, 40000000, } +VQRSHL/VQRSHLQ:155:result_int64x1 [] = { 4000000000000000, } +VQRSHL/VQRSHLQ:156:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VQRSHL/VQRSHLQ:157:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VQRSHL/VQRSHLQ:158:result_uint32x2 [] = { 80000000, 80000000, } +VQRSHL/VQRSHLQ:159:result_uint64x1 [] = { 8000000000000000, } +VQRSHL/VQRSHLQ:160:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHL/VQRSHLQ:161:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VQRSHL/VQRSHLQ:162:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VQRSHL/VQRSHLQ:163:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VQRSHL/VQRSHLQ:164:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VQRSHL/VQRSHLQ:165:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VQRSHL/VQRSHLQ:166:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VQRSHL/VQRSHLQ:167:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VQRSHL/VQRSHLQ:168:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VQRSHL/VQRSHLQ:169:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHL/VQRSHLQ (checking overflow: shift by -3) overflow output: +VQRSHL/VQRSHLQ:170:vqrshl_s8 Neon overflow 0 +VQRSHL/VQRSHLQ:171:vqrshl_s16 Neon overflow 0 +VQRSHL/VQRSHLQ:172:vqrshl_s32 Neon overflow 0 +VQRSHL/VQRSHLQ:173:vqrshl_s64 Neon overflow 0 +VQRSHL/VQRSHLQ:174:vqrshl_u8 Neon overflow 0 +VQRSHL/VQRSHLQ:175:vqrshl_u16 Neon overflow 0 +VQRSHL/VQRSHLQ:176:vqrshl_u32 Neon overflow 0 +VQRSHL/VQRSHLQ:177:vqrshl_u64 Neon overflow 0 +VQRSHL/VQRSHLQ:178:vqrshlq_s8 Neon overflow 0 +VQRSHL/VQRSHLQ:179:vqrshlq_s16 Neon overflow 0 +VQRSHL/VQRSHLQ:180:vqrshlq_s32 Neon overflow 0 +VQRSHL/VQRSHLQ:181:vqrshlq_s64 Neon overflow 0 +VQRSHL/VQRSHLQ:182:vqrshlq_u8 Neon overflow 0 +VQRSHL/VQRSHLQ:183:vqrshlq_u16 Neon overflow 0 +VQRSHL/VQRSHLQ:184:vqrshlq_u32 Neon overflow 0 +VQRSHL/VQRSHLQ:185:vqrshlq_u64 Neon overflow 0 + +VQRSHL/VQRSHLQ (checking overflow: shift by -3) output: +VQRSHL/VQRSHLQ:186:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VQRSHL/VQRSHLQ:187:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VQRSHL/VQRSHLQ:188:result_int32x2 [] = { 10000000, 10000000, } +VQRSHL/VQRSHLQ:189:result_int64x1 [] = { 1000000000000000, } +VQRSHL/VQRSHLQ:190:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VQRSHL/VQRSHLQ:191:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VQRSHL/VQRSHLQ:192:result_uint32x2 [] = { 20000000, 20000000, } +VQRSHL/VQRSHLQ:193:result_uint64x1 [] = { 2000000000000000, } +VQRSHL/VQRSHLQ:194:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHL/VQRSHLQ:195:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VQRSHL/VQRSHLQ:196:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VQRSHL/VQRSHLQ:197:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VQRSHL/VQRSHLQ:198:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VQRSHL/VQRSHLQ:199:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VQRSHL/VQRSHLQ:200:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VQRSHL/VQRSHLQ:201:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VQRSHL/VQRSHLQ:202:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VQRSHL/VQRSHLQ:203:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VABA/VABAQ output: +VABA/VABAQ:0:result_int8x8 [] = { fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, } +VABA/VABAQ:1:result_int16x4 [] = { 16, 17, 18, 19, } +VABA/VABAQ:2:result_int32x2 [] = { 20, 21, } +VABA/VABAQ:3:result_int64x1 [] = { 3333333333333333, } +VABA/VABAQ:4:result_uint8x8 [] = { 53, 54, 55, 56, 57, 58, 59, 5a, } +VABA/VABAQ:5:result_uint16x4 [] = { 907, 908, 909, 90a, } +VABA/VABAQ:6:result_uint32x2 [] = { ffffffe7, ffffffe8, } +VABA/VABAQ:7:result_uint64x1 [] = { 3333333333333333, } +VABA/VABAQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VABA/VABAQ:9:result_int8x16 [] = { 5e, 5f, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 6a, 6b, 6c, 6d, } +VABA/VABAQ:10:result_int16x8 [] = { b9c, b9d, b9e, b9f, ba0, ba1, ba2, ba3, } +VABA/VABAQ:11:result_int32x4 [] = { 26e0, 26e1, 26e2, 26e3, } +VABA/VABAQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VABA/VABAQ:13:result_uint8x16 [] = { f8, f9, fa, fb, fc, fd, fe, ff, 0, 1, 2, 3, 4, 5, 6, 7, } +VABA/VABAQ:14:result_uint16x8 [] = { fff9, fffa, fffb, fffc, fffd, fffe, ffff, 0, } +VABA/VABAQ:15:result_uint32x4 [] = { c, d, e, f, } +VABA/VABAQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VABA/VABAQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VABAL output: +VABAL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:2:result_int32x2 [] = { 33333333, 33333333, } +VABAL:3:result_int64x1 [] = { 3333333333333333, } +VABAL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABAL:6:result_uint32x2 [] = { 33333333, 33333333, } +VABAL:7:result_uint64x1 [] = { 3333333333333333, } +VABAL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VABAL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:10:result_int16x8 [] = { fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, } +VABAL:11:result_int32x4 [] = { 16, 17, 18, 19, } +VABAL:12:result_int64x2 [] = { 20, 21, } +VABAL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABAL:14:result_uint16x8 [] = { 53, 54, 55, 56, 57, 58, 59, 5a, } +VABAL:15:result_uint32x4 [] = { 907, 908, 909, 90a, } +VABAL:16:result_uint64x2 [] = { ffffffe7, ffffffe8, } +VABAL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VABD/VABDQ output: +VABD/VABDQ:0:result_int8x8 [] = { 11, 10, f, e, d, c, b, a, } +VABD/VABDQ:1:result_int16x4 [] = { 3, 2, 1, 0, } +VABD/VABDQ:2:result_int32x2 [] = { 18, 17, } +VABD/VABDQ:3:result_int64x1 [] = { 3333333333333333, } +VABD/VABDQ:4:result_uint8x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6, } +VABD/VABDQ:5:result_uint16x4 [] = { ffe3, ffe4, ffe5, ffe6, } +VABD/VABDQ:6:result_uint32x2 [] = { ffffffe8, ffffffe9, } +VABD/VABDQ:7:result_uint64x1 [] = { 3333333333333333, } +VABD/VABDQ:8:result_float32x2 [] = { 41c26666 0x1.84ccccp+4 24.3, 41ba6666 0x1.74ccccp+4 23.3, } +VABD/VABDQ:9:result_int8x16 [] = { 1a, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, f, e, d, c, b, } +VABD/VABDQ:10:result_int16x8 [] = { 4, 3, 2, 1, 0, 1, 2, 3, } +VABD/VABDQ:11:result_int32x4 [] = { 30, 2f, 2e, 2d, } +VABD/VABDQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VABD/VABDQ:13:result_uint8x16 [] = { e6, e7, e8, e9, ea, eb, ec, ed, ee, ef, f0, f1, f2, f3, f4, f5, } +VABD/VABDQ:14:result_uint16x8 [] = { ffe4, ffe5, ffe6, ffe7, ffe8, ffe9, ffea, ffeb, } +VABD/VABDQ:15:result_uint32x4 [] = { ffffffd0, ffffffd1, ffffffd2, ffffffd3, } +VABD/VABDQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VABD/VABDQ:17:result_float32x4 [] = { 42407ae1 0x1.80f5c2p+5 48.12, 423c7ae1 0x1.78f5c2p+5 47.12, 42387ae1 0x1.70f5c2p+5 46.12, 42347ae1 0x1.68f5c2p+5 45.12, } + +VABDL output: +VABDL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VABDL:2:result_int32x2 [] = { 33333333, 33333333, } +VABDL:3:result_int64x1 [] = { 3333333333333333, } +VABDL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VABDL:6:result_uint32x2 [] = { 33333333, 33333333, } +VABDL:7:result_uint64x1 [] = { 3333333333333333, } +VABDL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VABDL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:10:result_int16x8 [] = { 11, 10, f, e, d, c, b, a, } +VABDL:11:result_int32x4 [] = { 3, 2, 1, 0, } +VABDL:12:result_int64x2 [] = { 18, 17, } +VABDL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VABDL:14:result_uint16x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6, } +VABDL:15:result_uint32x4 [] = { ffe3, ffe4, ffe5, ffe6, } +VABDL:16:result_uint64x2 [] = { ffffffe8, ffffffe9, } +VABDL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VAND/VANDQ output: +VAND/VANDQ:0:result_int8x8 [] = { 0, 0, 2, 2, 0, 0, 2, 2, } +VAND/VANDQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0, } +VAND/VANDQ:2:result_int32x2 [] = { 0, 1, } +VAND/VANDQ:3:result_int64x1 [] = { 60, } +VAND/VANDQ:4:result_uint8x8 [] = { 10, 10, 10, 10, 14, 14, 14, 14, } +VAND/VANDQ:5:result_uint16x4 [] = { 10, 10, 12, 12, } +VAND/VANDQ:6:result_uint32x2 [] = { 20, 20, } +VAND/VANDQ:7:result_uint64x1 [] = { 0, } +VAND/VANDQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VAND/VANDQ:9:result_int8x16 [] = { fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, } +VAND/VANDQ:10:result_int16x8 [] = { ffffffe0, ffffffe0, ffffffe0, ffffffe0, ffffffe4, ffffffe4, ffffffe4, ffffffe4, } +VAND/VANDQ:11:result_int32x4 [] = { ffffffe0, ffffffe0, ffffffe2, ffffffe2, } +VAND/VANDQ:12:result_int64x2 [] = { 10, 10, } +VAND/VANDQ:13:result_uint8x16 [] = { 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, c, c, c, c, } +VAND/VANDQ:14:result_uint16x8 [] = { 0, 1, 2, 3, 0, 1, 2, 3, } +VAND/VANDQ:15:result_uint32x4 [] = { 30, 31, 32, 33, } +VAND/VANDQ:16:result_uint64x2 [] = { 0, 1, } +VAND/VANDQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VORR/VORRQ output: +VORR/VORRQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff2, fffffff3, fffffff6, fffffff7, fffffff6, fffffff7, } +VORR/VORRQ:1:result_int16x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, } +VORR/VORRQ:2:result_int32x2 [] = { fffffff3, fffffff3, } +VORR/VORRQ:3:result_int64x1 [] = { fffffffffffffff4, } +VORR/VORRQ:4:result_uint8x8 [] = { f4, f5, f6, f7, f4, f5, f6, f7, } +VORR/VORRQ:5:result_uint16x4 [] = { fffe, ffff, fffe, ffff, } +VORR/VORRQ:6:result_uint32x2 [] = { fffffff8, fffffff9, } +VORR/VORRQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VORR/VORRQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VORR/VORRQ:9:result_int8x16 [] = { fffffff6, fffffff7, fffffff6, fffffff7, fffffff6, fffffff7, fffffff6, fffffff7, fffffffe, ffffffff, fffffffe, ffffffff, fffffffe, ffffffff, fffffffe, ffffffff, } +VORR/VORRQ:10:result_int16x8 [] = { fffffffc, fffffffd, fffffffe, ffffffff, fffffffc, fffffffd, fffffffe, ffffffff, } +VORR/VORRQ:11:result_int32x4 [] = { fffffff2, fffffff3, fffffff2, fffffff3, } +VORR/VORRQ:12:result_int64x2 [] = { fffffffffffffff8, fffffffffffffff9, } +VORR/VORRQ:13:result_uint8x16 [] = { fc, fd, fe, ff, fc, fd, fe, ff, fc, fd, fe, ff, fc, fd, fe, ff, } +VORR/VORRQ:14:result_uint16x8 [] = { fff3, fff3, fff3, fff3, fff7, fff7, fff7, fff7, } +VORR/VORRQ:15:result_uint32x4 [] = { fffffff7, fffffff7, fffffff7, fffffff7, } +VORR/VORRQ:16:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff3, } +VORR/VORRQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VORN/VORNQ output: +VORN/VORNQ:0:result_int8x8 [] = { fffffffd, fffffffd, ffffffff, ffffffff, fffffffd, fffffffd, ffffffff, ffffffff, } +VORN/VORNQ:1:result_int16x4 [] = { fffffff3, fffffff3, fffffff3, fffffff3, } +VORN/VORNQ:2:result_int32x2 [] = { fffffffc, fffffffd, } +VORN/VORNQ:3:result_int64x1 [] = { fffffffffffffffb, } +VORN/VORNQ:4:result_uint8x8 [] = { fb, fb, fb, fb, ff, ff, ff, ff, } +VORN/VORNQ:5:result_uint16x4 [] = { fff1, fff1, fff3, fff3, } +VORN/VORNQ:6:result_uint32x2 [] = { fffffff7, fffffff7, } +VORN/VORNQ:7:result_uint64x1 [] = { fffffffffffffffd, } +VORN/VORNQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VORN/VORNQ:9:result_int8x16 [] = { fffffff9, fffffff9, fffffffb, fffffffb, fffffffd, fffffffd, ffffffff, ffffffff, fffffff9, fffffff9, fffffffb, fffffffb, fffffffd, fffffffd, ffffffff, ffffffff, } +VORN/VORNQ:10:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff7, fffffff7, fffffff7, fffffff7, } +VORN/VORNQ:11:result_int32x4 [] = { fffffffd, fffffffd, ffffffff, ffffffff, } +VORN/VORNQ:12:result_int64x2 [] = { fffffffffffffff7, fffffffffffffff7, } +VORN/VORNQ:13:result_uint8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, fb, fb, fb, fb, ff, ff, ff, ff, } +VORN/VORNQ:14:result_uint16x8 [] = { fffc, fffd, fffe, ffff, fffc, fffd, fffe, ffff, } +VORN/VORNQ:15:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, } +VORN/VORNQ:16:result_uint64x2 [] = { fffffffffffffffc, fffffffffffffffd, } +VORN/VORNQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VEOR/VEORQ output: +VEOR/VEORQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff0, fffffff1, fffffff6, fffffff7, fffffff4, fffffff5, } +VEOR/VEORQ:1:result_int16x4 [] = { c, d, e, f, } +VEOR/VEORQ:2:result_int32x2 [] = { fffffff3, fffffff2, } +VEOR/VEORQ:3:result_int64x1 [] = { ffffffffffffff94, } +VEOR/VEORQ:4:result_uint8x8 [] = { e4, e5, e6, e7, e0, e1, e2, e3, } +VEOR/VEORQ:5:result_uint16x4 [] = { ffee, ffef, ffec, ffed, } +VEOR/VEORQ:6:result_uint32x2 [] = { ffffffd8, ffffffd9, } +VEOR/VEORQ:7:result_uint64x1 [] = { fffffffffffffff2, } +VEOR/VEORQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VEOR/VEORQ:9:result_int8x16 [] = { 6, 7, 4, 5, 2, 3, 0, 1, e, f, c, d, a, b, 8, 9, } +VEOR/VEORQ:10:result_int16x8 [] = { 1c, 1d, 1e, 1f, 18, 19, 1a, 1b, } +VEOR/VEORQ:11:result_int32x4 [] = { 12, 13, 10, 11, } +VEOR/VEORQ:12:result_int64x2 [] = { ffffffffffffffe8, ffffffffffffffe9, } +VEOR/VEORQ:13:result_uint8x16 [] = { fc, fd, fe, ff, f8, f9, fa, fb, f4, f5, f6, f7, f0, f1, f2, f3, } +VEOR/VEORQ:14:result_uint16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4, } +VEOR/VEORQ:15:result_uint32x4 [] = { ffffffc7, ffffffc6, ffffffc5, ffffffc4, } +VEOR/VEORQ:16:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff2, } +VEOR/VEORQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VBIC/VBICQ output: +VBIC/VBICQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff4, fffffff5, fffffff4, fffffff5, } +VBIC/VBICQ:1:result_int16x4 [] = { 0, 1, 2, 3, } +VBIC/VBICQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VBIC/VBICQ:3:result_int64x1 [] = { ffffffffffffff90, } +VBIC/VBICQ:4:result_uint8x8 [] = { e0, e1, e2, e3, e0, e1, e2, e3, } +VBIC/VBICQ:5:result_uint16x4 [] = { ffe0, ffe1, ffe0, ffe1, } +VBIC/VBICQ:6:result_uint32x2 [] = { ffffffd0, ffffffd1, } +VBIC/VBICQ:7:result_uint64x1 [] = { fffffffffffffff0, } +VBIC/VBICQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VBIC/VBICQ:9:result_int8x16 [] = { 0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9, } +VBIC/VBICQ:10:result_int16x8 [] = { 10, 11, 12, 13, 10, 11, 12, 13, } +VBIC/VBICQ:11:result_int32x4 [] = { 10, 11, 10, 11, } +VBIC/VBICQ:12:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1, } +VBIC/VBICQ:13:result_uint8x16 [] = { f0, f1, f2, f3, f0, f1, f2, f3, f0, f1, f2, f3, f0, f1, f2, f3, } +VBIC/VBICQ:14:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff4, fff4, fff4, fff4, } +VBIC/VBICQ:15:result_uint32x4 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VBIC/VBICQ:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0, } +VBIC/VBICQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VCREATE output: +VCREATE:0:result_int8x8 [] = { fffffff0, ffffffde, ffffffbc, ffffff9a, 78, 56, 34, 12, } +VCREATE:1:result_int16x4 [] = { ffffdef0, ffff9abc, 5678, 1234, } +VCREATE:2:result_int32x2 [] = { 9abcdef0, 12345678, } +VCREATE:3:result_int64x1 [] = { 123456789abcdef0, } +VCREATE:4:result_uint8x8 [] = { f0, de, bc, 9a, 78, 56, 34, 12, } +VCREATE:5:result_uint16x4 [] = { def0, 9abc, 5678, 1234, } +VCREATE:6:result_uint32x2 [] = { 9abcdef0, 12345678, } +VCREATE:7:result_uint64x1 [] = { 123456789abcdef0, } +VCREATE:8:result_float32x2 [] = { 9abcdef0 -0x1.79bdep-74 -7.81152e-23, 12345678 0x1.68acfp-91 5.69046e-28, } +VCREATE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCREATE:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCREATE:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCREATE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCREATE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCREATE:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCREATE:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCREATE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCREATE:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VLD2_LANE/VLD2Q_LANE chunk 0 output: +VLD2_LANE/VLD2Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD2_LANE/VLD2Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2_LANE/VLD2Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VLD2_LANE/VLD2Q_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:10:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:11:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:14:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:15:result_uint32x4 [] = { fffffff0, fffffff1, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:17:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } + +VLD2_LANE/VLD2Q_LANE chunk 1 output: +VLD2_LANE/VLD2Q_LANE:18:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, } +VLD2_LANE/VLD2Q_LANE:19:result_int16x4 [] = { fffffff0, fffffff1, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:20:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:21:result_int64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:22:result_uint8x8 [] = { f0, f1, aa, aa, aa, aa, aa, aa, } +VLD2_LANE/VLD2Q_LANE:23:result_uint16x4 [] = { aaaa, aaaa, fff0, fff1, } +VLD2_LANE/VLD2Q_LANE:24:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2_LANE/VLD2Q_LANE:25:result_uint64x1 [] = { 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:26:result_float32x2 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } +VLD2_LANE/VLD2Q_LANE:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:28:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, fffffff1, ffffaaaa, ffffaaaa, } +VLD2_LANE/VLD2Q_LANE:29:result_int32x4 [] = { fffffff0, fffffff1, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_LANE/VLD2Q_LANE:32:result_uint16x8 [] = { aaaa, aaaa, fff0, fff1, aaaa, aaaa, aaaa, aaaa, } +VLD2_LANE/VLD2Q_LANE:33:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD2_LANE/VLD2Q_LANE:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_LANE/VLD2Q_LANE:35:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } + +VLD3_LANE/VLD3Q_LANE chunk 0 output: +VLD3_LANE/VLD3Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD3_LANE/VLD3Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD3_LANE/VLD3Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VLD3_LANE/VLD3Q_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:10:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:11:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:14:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:17:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } + +VLD3_LANE/VLD3Q_LANE chunk 1 output: +VLD3_LANE/VLD3Q_LANE:18:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD3_LANE/VLD3Q_LANE:19:result_int16x4 [] = { ffffaaaa, ffffaaaa, fffffff0, fffffff1, } +VLD3_LANE/VLD3Q_LANE:20:result_int32x2 [] = { fffffff2, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:21:result_int64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:22:result_uint8x8 [] = { aa, aa, aa, aa, f0, f1, f2, aa, } +VLD3_LANE/VLD3Q_LANE:23:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:24:result_uint32x2 [] = { aaaaaaaa, fffffff0, } +VLD3_LANE/VLD3Q_LANE:25:result_uint64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } +VLD3_LANE/VLD3Q_LANE:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:28:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:29:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, fffffff1, } +VLD3_LANE/VLD3Q_LANE:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:32:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, } +VLD3_LANE/VLD3Q_LANE:33:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:35:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } + +VLD3_LANE/VLD3Q_LANE chunk 2 output: +VLD3_LANE/VLD3Q_LANE:36:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, fffffff2, } +VLD3_LANE/VLD3Q_LANE:37:result_int16x4 [] = { fffffff2, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:38:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:39:result_int64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:40:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD3_LANE/VLD3Q_LANE:41:result_uint16x4 [] = { aaaa, fff0, fff1, fff2, } +VLD3_LANE/VLD3Q_LANE:42:result_uint32x2 [] = { fffffff1, fffffff2, } +VLD3_LANE/VLD3Q_LANE:43:result_uint64x1 [] = { 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:44:result_float32x2 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } +VLD3_LANE/VLD3Q_LANE:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:46:result_int16x8 [] = { ffffaaaa, ffffaaaa, fffffff0, fffffff1, fffffff2, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD3_LANE/VLD3Q_LANE:47:result_int32x4 [] = { fffffff2, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_LANE/VLD3Q_LANE:50:result_uint16x8 [] = { fff1, fff2, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD3_LANE/VLD3Q_LANE:51:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD3_LANE/VLD3Q_LANE:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_LANE/VLD3Q_LANE:53:result_float32x4 [] = { c1600000 -0x1.cp+3 -14, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } + +VLD4_LANE/VLD4Q_LANE chunk 0 output: +VLD4_LANE/VLD4Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD4_LANE/VLD4Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4_LANE/VLD4Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VLD4_LANE/VLD4Q_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:10:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:11:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:14:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:17:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } + +VLD4_LANE/VLD4Q_LANE chunk 1 output: +VLD4_LANE/VLD4Q_LANE:18:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD4_LANE/VLD4Q_LANE:19:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:20:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:21:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:22:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:23:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:24:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:25:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } +VLD4_LANE/VLD4Q_LANE:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:28:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:29:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:32:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:33:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:35:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } + +VLD4_LANE/VLD4Q_LANE chunk 2 output: +VLD4_LANE/VLD4Q_LANE:36:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, } +VLD4_LANE/VLD4Q_LANE:37:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:38:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:39:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:40:result_uint8x8 [] = { f0, f1, f2, f3, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:41:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:42:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4_LANE/VLD4Q_LANE:43:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:44:result_float32x2 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } +VLD4_LANE/VLD4Q_LANE:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:46:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:47:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:50:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:51:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:53:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } + +VLD4_LANE/VLD4Q_LANE chunk 3 output: +VLD4_LANE/VLD4Q_LANE:54:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:55:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:56:result_int32x2 [] = { aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:57:result_int64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:58:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa, } +VLD4_LANE/VLD4Q_LANE:59:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_LANE/VLD4Q_LANE:60:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4_LANE/VLD4Q_LANE:61:result_uint64x1 [] = { 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:62:result_float32x2 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } +VLD4_LANE/VLD4Q_LANE:63:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:64:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, } +VLD4_LANE/VLD4Q_LANE:65:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:66:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:67:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_LANE/VLD4Q_LANE:68:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, } +VLD4_LANE/VLD4Q_LANE:69:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa, } +VLD4_LANE/VLD4Q_LANE:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_LANE/VLD4Q_LANE:71:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, } + +VLD2_DUP/VLD2Q_DUP chunk 0 output: +VLD2_DUP/VLD2Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:3:result_int64x1 [] = { fffffffffffffff0, } +VLD2_DUP/VLD2Q_DUP:4:result_uint8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD2_DUP/VLD2Q_DUP:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VLD2_DUP/VLD2Q_DUP:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VLD2_DUP/VLD2Q_DUP chunk 1 output: +VLD2_DUP/VLD2Q_DUP:18:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:19:result_int16x4 [] = { fffffff0, fffffff1, fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:20:result_int32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:21:result_int64x1 [] = { fffffffffffffff1, } +VLD2_DUP/VLD2Q_DUP:22:result_uint8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1, } +VLD2_DUP/VLD2Q_DUP:23:result_uint16x4 [] = { fff0, fff1, fff0, fff1, } +VLD2_DUP/VLD2Q_DUP:24:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD2_DUP/VLD2Q_DUP:25:result_uint64x1 [] = { fffffffffffffff1, } +VLD2_DUP/VLD2Q_DUP:26:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VLD2_DUP/VLD2Q_DUP:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:28:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:29:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD2_DUP/VLD2Q_DUP:32:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD2_DUP/VLD2Q_DUP:33:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD2_DUP/VLD2Q_DUP:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD2_DUP/VLD2Q_DUP:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VLD3_DUP/VLD3Q_DUP chunk 0 output: +VLD3_DUP/VLD3Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:3:result_int64x1 [] = { fffffffffffffff0, } +VLD3_DUP/VLD3Q_DUP:4:result_uint8x8 [] = { f0, f1, f2, f0, f1, f2, f0, f1, } +VLD3_DUP/VLD3Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff2, fff0, } +VLD3_DUP/VLD3Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD3_DUP/VLD3Q_DUP:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VLD3_DUP/VLD3Q_DUP:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VLD3_DUP/VLD3Q_DUP chunk 1 output: +VLD3_DUP/VLD3Q_DUP:18:result_int8x8 [] = { fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:19:result_int16x4 [] = { fffffff1, fffffff2, fffffff0, fffffff1, } +VLD3_DUP/VLD3Q_DUP:20:result_int32x2 [] = { fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:21:result_int64x1 [] = { fffffffffffffff1, } +VLD3_DUP/VLD3Q_DUP:22:result_uint8x8 [] = { f2, f0, f1, f2, f0, f1, f2, f0, } +VLD3_DUP/VLD3Q_DUP:23:result_uint16x4 [] = { fff1, fff2, fff0, fff1, } +VLD3_DUP/VLD3Q_DUP:24:result_uint32x2 [] = { fffffff2, fffffff0, } +VLD3_DUP/VLD3Q_DUP:25:result_uint64x1 [] = { fffffffffffffff1, } +VLD3_DUP/VLD3Q_DUP:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1800000 -0x1p+4 -16, } +VLD3_DUP/VLD3Q_DUP:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:28:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:29:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:32:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:33:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VLD3_DUP/VLD3Q_DUP chunk 2 output: +VLD3_DUP/VLD3Q_DUP:36:result_int8x8 [] = { fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:37:result_int16x4 [] = { fffffff2, fffffff0, fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:38:result_int32x2 [] = { fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:39:result_int64x1 [] = { fffffffffffffff2, } +VLD3_DUP/VLD3Q_DUP:40:result_uint8x8 [] = { f1, f2, f0, f1, f2, f0, f1, f2, } +VLD3_DUP/VLD3Q_DUP:41:result_uint16x4 [] = { fff2, fff0, fff1, fff2, } +VLD3_DUP/VLD3Q_DUP:42:result_uint32x2 [] = { fffffff1, fffffff2, } +VLD3_DUP/VLD3Q_DUP:43:result_uint64x1 [] = { fffffffffffffff2, } +VLD3_DUP/VLD3Q_DUP:44:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, } +VLD3_DUP/VLD3Q_DUP:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:46:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:47:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD3_DUP/VLD3Q_DUP:50:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD3_DUP/VLD3Q_DUP:51:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD3_DUP/VLD3Q_DUP:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD3_DUP/VLD3Q_DUP:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VLD4_DUP/VLD4Q_DUP chunk 0 output: +VLD4_DUP/VLD4Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:3:result_int64x1 [] = { fffffffffffffff0, } +VLD4_DUP/VLD4Q_DUP:4:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0, } +VLD4_DUP/VLD4Q_DUP:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VLD4_DUP/VLD4Q_DUP:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VLD4_DUP/VLD4Q_DUP chunk 1 output: +VLD4_DUP/VLD4Q_DUP:18:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:19:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:20:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:21:result_int64x1 [] = { fffffffffffffff1, } +VLD4_DUP/VLD4Q_DUP:22:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:23:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:24:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:25:result_uint64x1 [] = { fffffffffffffff1, } +VLD4_DUP/VLD4Q_DUP:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } +VLD4_DUP/VLD4Q_DUP:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:28:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:29:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:32:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:33:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VLD4_DUP/VLD4Q_DUP chunk 2 output: +VLD4_DUP/VLD4Q_DUP:36:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:37:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:38:result_int32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:39:result_int64x1 [] = { fffffffffffffff2, } +VLD4_DUP/VLD4Q_DUP:40:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:41:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:42:result_uint32x2 [] = { fffffff0, fffffff1, } +VLD4_DUP/VLD4Q_DUP:43:result_uint64x1 [] = { fffffffffffffff2, } +VLD4_DUP/VLD4Q_DUP:44:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VLD4_DUP/VLD4Q_DUP:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:46:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:47:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:50:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:51:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VLD4_DUP/VLD4Q_DUP chunk 3 output: +VLD4_DUP/VLD4Q_DUP:54:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:55:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:56:result_int32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:57:result_int64x1 [] = { fffffffffffffff3, } +VLD4_DUP/VLD4Q_DUP:58:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3, } +VLD4_DUP/VLD4Q_DUP:59:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VLD4_DUP/VLD4Q_DUP:60:result_uint32x2 [] = { fffffff2, fffffff3, } +VLD4_DUP/VLD4Q_DUP:61:result_uint64x1 [] = { fffffffffffffff3, } +VLD4_DUP/VLD4Q_DUP:62:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } +VLD4_DUP/VLD4Q_DUP:63:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:64:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:65:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:66:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:67:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VLD4_DUP/VLD4Q_DUP:68:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VLD4_DUP/VLD4Q_DUP:69:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VLD4_DUP/VLD4Q_DUP:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VLD4_DUP/VLD4Q_DUP:71:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VMLA output: +VMLA:0:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, } +VMLA:1:result_int16x4 [] = { 1f8c, 1f8d, 1f8e, 1f8f, } +VMLA:2:result_int32x2 [] = { 2bf7, 2bf8, } +VMLA:3:result_int64x1 [] = { 3333333333333333, } +VMLA:4:result_uint8x8 [] = { 20, 21, 22, 23, 24, 25, 26, 27, } +VMLA:5:result_uint16x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA:6:result_uint32x2 [] = { 43ac, 43ad, } +VMLA:7:result_uint64x1 [] = { 3333333333333333, } +VMLA:8:result_float32x2 [] = { 43a14e76 0x1.429cecp+8 322.613, 43a1ce76 0x1.439cecp+8 323.613, } +VMLA:9:result_int8x16 [] = { f, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, } +VMLA:10:result_int16x8 [] = { 4830, 4831, 4832, 4833, 4834, 4835, 4836, 4837, } +VMLA:11:result_int32x4 [] = { 470f, 4710, 4711, 4712, } +VMLA:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA:13:result_uint8x16 [] = { ac, ad, ae, af, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, } +VMLA:14:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLA:15:result_uint32x4 [] = { 3620, 3621, 3622, 3623, } +VMLA:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA:17:result_float32x4 [] = { 45f0ae15 0x1.e15c2ap+12 7701.76, 45f0b615 0x1.e16c2ap+12 7702.76, 45f0be15 0x1.e17c2ap+12 7703.76, 45f0c615 0x1.e18c2ap+12 7704.76, } + +VMLS output: +VMLS:0:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8, } +VMLS:1:result_int16x4 [] = { ffffe054, ffffe055, ffffe056, ffffe057, } +VMLS:2:result_int32x2 [] = { ffffd3e9, ffffd3ea, } +VMLS:3:result_int64x1 [] = { 3333333333333333, } +VMLS:4:result_uint8x8 [] = { c0, c1, c2, c3, c4, c5, c6, c7, } +VMLS:5:result_uint16x4 [] = { c1d9, c1da, c1db, c1dc, } +VMLS:6:result_uint32x2 [] = { ffffbc34, ffffbc35, } +VMLS:7:result_uint64x1 [] = { 3333333333333333, } +VMLS:8:result_float32x2 [] = { c3b14e76 -0x1.629cecp+8 -354.613, c3b0ce76 -0x1.619cecp+8 -353.613, } +VMLS:9:result_int8x16 [] = { ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, ffffffd6, ffffffd7, ffffffd8, ffffffd9, ffffffda, ffffffdb, ffffffdc, ffffffdd, ffffffde, ffffffdf, ffffffe0, } +VMLS:10:result_int16x8 [] = { ffffb7b0, ffffb7b1, ffffb7b2, ffffb7b3, ffffb7b4, ffffb7b5, ffffb7b6, ffffb7b7, } +VMLS:11:result_int32x4 [] = { ffffb8d1, ffffb8d2, ffffb8d3, ffffb8d4, } +VMLS:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS:13:result_uint8x16 [] = { 34, 35, 36, 37, 38, 39, 3a, 3b, 3c, 3d, 3e, 3f, 40, 41, 42, 43, } +VMLS:14:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0, } +VMLS:15:result_uint32x4 [] = { ffffc9c0, ffffc9c1, ffffc9c2, ffffc9c3, } +VMLS:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS:17:result_float32x4 [] = { c5f1ae15 -0x1.e35c2ap+12 -7733.76, c5f1a615 -0x1.e34c2ap+12 -7732.76, c5f19e15 -0x1.e33c2ap+12 -7731.76, c5f19615 -0x1.e32c2ap+12 -7730.76, } + +VMUL output: +VMUL:0:result_int8x8 [] = { fffffff0, 1, 12, 23, 34, 45, 56, 67, } +VMUL:1:result_int16x4 [] = { fffffde0, fffffe02, fffffe24, fffffe46, } +VMUL:2:result_int32x2 [] = { fffffcd0, fffffd03, } +VMUL:3:result_int64x1 [] = { 3333333333333333, } +VMUL:4:result_uint8x8 [] = { c0, 4, 48, 8c, d0, 14, 58, 9c, } +VMUL:5:result_uint16x4 [] = { fab0, fb05, fb5a, fbaf, } +VMUL:6:result_uint32x2 [] = { fffff9a0, fffffa06, } +VMUL:7:result_uint64x1 [] = { 3333333333333333, } +VMUL:8:result_float32x2 [] = { c4053333 -0x1.0a6666p+9 -532.8, c3f9c000 -0x1.f38p+8 -499.5, } +VMUL:9:result_int8x16 [] = { ffffff90, 7, 7e, fffffff5, 6c, ffffffe3, 5a, ffffffd1, 48, ffffffbf, 36, ffffffad, 24, ffffff9b, 12, ffffff89, } +VMUL:10:result_int16x8 [] = { fffff780, fffff808, fffff890, fffff918, fffff9a0, fffffa28, fffffab0, fffffb38, } +VMUL:11:result_int32x4 [] = { fffff670, fffff709, fffff7a2, fffff83b, } +VMUL:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL:13:result_uint8x16 [] = { 60, a, b4, 5e, 8, b2, 5c, 6, b0, 5a, 4, ae, 58, 2, ac, 56, } +VMUL:14:result_uint16x8 [] = { f450, f50b, f5c6, f681, f73c, f7f7, f8b2, f96d, } +VMUL:15:result_uint32x4 [] = { fffff340, fffff40c, fffff4d8, fffff5a4, } +VMUL:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL:17:result_float32x4 [] = { c4c73333 -0x1.8e6666p+10 -1593.6, c4bac000 -0x1.758p+10 -1494, c4ae4ccd -0x1.5c999ap+10 -1394.4, c4a1d999 -0x1.43b332p+10 -1294.8, } + +VMUL_LANE output: +VMUL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:1:result_int16x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, } +VMUL_LANE:2:result_int32x2 [] = { fffffde0, fffffe02, } +VMUL_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMUL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:5:result_uint16x4 [] = { bbc0, c004, c448, c88c, } +VMUL_LANE:6:result_uint32x2 [] = { fffface0, ffffb212, } +VMUL_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMUL_LANE:8:result_float32x2 [] = { c3b66666 -0x1.6cccccp+8 -364.8, c3ab0000 -0x1.56p+8 -342, } +VMUL_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:10:result_int16x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, } +VMUL_LANE:11:result_int32x4 [] = { fffffde0, fffffe02, fffffe24, fffffe46, } +VMUL_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_LANE:14:result_uint16x8 [] = { bbc0, c004, c448, c88c, ccd0, d114, d558, d99c, } +VMUL_LANE:15:result_uint32x4 [] = { fffface0, ffffb212, ffffb744, ffffbc76, } +VMUL_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_LANE:17:result_float32x4 [] = { c3b66666 -0x1.6cccccp+8 -364.8, c3ab0000 -0x1.56p+8 -342, c39f9999 -0x1.3f3332p+8 -319.2, c3943333 -0x1.286666p+8 -296.4, } + +VMUL_N output: +VMUL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:1:result_int16x4 [] = { fffffef0, ffffff01, ffffff12, ffffff23, } +VMUL_N:2:result_int32x2 [] = { fffffde0, fffffe02, } +VMUL_N:3:result_int64x1 [] = { 3333333333333333, } +VMUL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:5:result_uint16x4 [] = { fcd0, fd03, fd36, fd69, } +VMUL_N:6:result_uint32x2 [] = { fffffbc0, fffffc04, } +VMUL_N:7:result_uint64x1 [] = { 3333333333333333, } +VMUL_N:8:result_float32x2 [] = { c3b26666 -0x1.64ccccp+8 -356.8, c3a74000 -0x1.4e8p+8 -334.5, } +VMUL_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:10:result_int16x8 [] = { fffffab0, fffffb05, fffffb5a, fffffbaf, fffffc04, fffffc59, fffffcae, fffffd03, } +VMUL_N:11:result_int32x4 [] = { fffff9a0, fffffa06, fffffa6c, fffffad2, } +VMUL_N:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMUL_N:14:result_uint16x8 [] = { f890, f907, f97e, f9f5, fa6c, fae3, fb5a, fbd1, } +VMUL_N:15:result_uint32x4 [] = { fffff780, fffff808, fffff890, fffff918, } +VMUL_N:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMUL_N:17:result_float32x4 [] = { c4b1cccd -0x1.63999ap+10 -1422.4, c4a6b000 -0x1.4d6p+10 -1333.5, c49b9333 -0x1.372666p+10 -1244.6, c4907667 -0x1.20eccep+10 -1155.7, } + +VMULL_N output: +VMULL_N:0:result_int32x4 [] = { 11000, 11000, 11000, 11000, } +VMULL_N:1:result_int64x2 [] = { 22000, 22000, } +VMULL_N:2:result_uint32x4 [] = { 33000, 33000, 33000, 33000, } +VMULL_N:3:result_uint64x2 [] = { 44000, 44000, } + +VMLA_LANE output: +VMLA_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:1:result_int16x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:2:result_int32x2 [] = { 3e07, 3e08, } +VMLA_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLA_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:5:result_uint16x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:6:result_uint32x2 [] = { 3e07, 3e08, } +VMLA_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLA_LANE:8:result_float32x2 [] = { 4418c687 0x1.318d0ep+9 611.102, 44190687 0x1.320d0ep+9 612.102, } +VMLA_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:10:result_int16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLA_LANE:11:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_LANE:14:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e, } +VMLA_LANE:15:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a, } +VMLA_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_LANE:17:result_float32x4 [] = { 441a3168 0x1.3462dp+9 616.772, 441a7168 0x1.34e2dp+9 617.772, 441ab168 0x1.3562dp+9 618.772, 441af168 0x1.35e2dp+9 619.772, } + +VMLS_LANE output: +VMLS_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:1:result_int16x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLS_LANE:2:result_int32x2 [] = { ffffc1d9, ffffc1da, } +VMLS_LANE:3:result_int64x1 [] = { 3333333333333333, } +VMLS_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:5:result_uint16x4 [] = { c1d9, c1da, c1db, c1dc, } +VMLS_LANE:6:result_uint32x2 [] = { ffffc1d9, ffffc1da, } +VMLS_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VMLS_LANE:8:result_float32x2 [] = { c420c687 -0x1.418d0ep+9 -643.102, c4208687 -0x1.410d0ep+9 -642.102, } +VMLS_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:10:result_int16x8 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, ffffc1dd, ffffc1de, ffffc1df, ffffc1e0, } +VMLS_LANE:11:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLS_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_LANE:14:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0, } +VMLS_LANE:15:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, } +VMLS_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_LANE:17:result_float32x4 [] = { c4223168 -0x1.4462dp+9 -648.772, c421f168 -0x1.43e2dp+9 -647.772, c421b168 -0x1.4362dp+9 -646.772, c4217168 -0x1.42e2dp+9 -645.772, } + +VMLA_N output: +VMLA_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:1:result_int16x4 [] = { 595, 596, 597, 598, } +VMLA_N:2:result_int32x2 [] = { b3a, b3b, } +VMLA_N:3:result_int64x1 [] = { 3333333333333333, } +VMLA_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:5:result_uint16x4 [] = { 10df, 10e0, 10e1, 10e2, } +VMLA_N:6:result_uint32x2 [] = { 1684, 1685, } +VMLA_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLA_N:8:result_float32x2 [] = { 4497deb8 0x1.2fbd7p+10 1214.96, 4497feb8 0x1.2ffd7p+10 1215.96, } +VMLA_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:10:result_int16x8 [] = { 1c29, 1c2a, 1c2b, 1c2c, 1c2d, 1c2e, 1c2f, 1c30, } +VMLA_N:11:result_int32x4 [] = { 21ce, 21cf, 21d0, 21d1, } +VMLA_N:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLA_N:14:result_uint16x8 [] = { 2773, 2774, 2775, 2776, 2777, 2778, 2779, 277a, } +VMLA_N:15:result_uint32x4 [] = { 2d18, 2d19, 2d1a, 2d1b, } +VMLA_N:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLA_N:17:result_float32x4 [] = { 4568087b 0x1.d010f6p+11 3712.53, 4568187b 0x1.d030f6p+11 3713.53, 4568287b 0x1.d050f6p+11 3714.53, 4568387b 0x1.d070f6p+11 3715.53, } + +VMLS_N output: +VMLS_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:1:result_int16x4 [] = { fffffa4b, fffffa4c, fffffa4d, fffffa4e, } +VMLS_N:2:result_int32x2 [] = { fffff4a6, fffff4a7, } +VMLS_N:3:result_int64x1 [] = { 3333333333333333, } +VMLS_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:5:result_uint16x4 [] = { ef01, ef02, ef03, ef04, } +VMLS_N:6:result_uint32x2 [] = { ffffe95c, ffffe95d, } +VMLS_N:7:result_uint64x1 [] = { 3333333333333333, } +VMLS_N:8:result_float32x2 [] = { c49bdeb8 -0x1.37bd7p+10 -1246.96, c49bbeb8 -0x1.377d7p+10 -1245.96, } +VMLS_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:10:result_int16x8 [] = { ffffe3b7, ffffe3b8, ffffe3b9, ffffe3ba, ffffe3bb, ffffe3bc, ffffe3bd, ffffe3be, } +VMLS_N:11:result_int32x4 [] = { ffffde12, ffffde13, ffffde14, ffffde15, } +VMLS_N:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VMLS_N:14:result_uint16x8 [] = { d86d, d86e, d86f, d870, d871, d872, d873, d874, } +VMLS_N:15:result_uint32x4 [] = { ffffd2c8, ffffd2c9, ffffd2ca, ffffd2cb, } +VMLS_N:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMLS_N:17:result_float32x4 [] = { c56a087b -0x1.d410f6p+11 -3744.53, c569f87b -0x1.d3f0f6p+11 -3743.53, c569e87b -0x1.d3d0f6p+11 -3742.53, c569d87b -0x1.d3b0f6p+11 -3741.53, } + +VSLI_N output: +VSLI_N:0:result_int8x8 [] = { 20, 21, 22, 23, 24, 25, 26, 27, } +VSLI_N:1:result_int16x4 [] = { ffffffe0, ffffffe1, ffffffe2, ffffffe3, } +VSLI_N:2:result_int32x2 [] = { 6, 7, } +VSLI_N:3:result_int64x1 [] = { 64fffffff0, } +VSLI_N:4:result_uint8x8 [] = { 50, 51, 52, 53, 50, 51, 52, 53, } +VSLI_N:5:result_uint16x4 [] = { 7bf0, 7bf1, 7bf2, 7bf3, } +VSLI_N:6:result_uint32x2 [] = { 3ffffff0, 3ffffff1, } +VSLI_N:7:result_uint64x1 [] = { 10, } +VSLI_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSLI_N:9:result_int8x16 [] = { ffffffd0, ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, ffffffd6, ffffffd7, ffffffd8, ffffffd9, ffffffda, ffffffdb, ffffffdc, ffffffdd, ffffffde, ffffffdf, } +VSLI_N:10:result_int16x8 [] = { ffffff60, ffffff61, ffffff62, ffffff63, ffffff64, ffffff65, ffffff66, ffffff67, } +VSLI_N:11:result_int32x4 [] = { fe2ffff0, fe2ffff1, fe2ffff2, fe2ffff3, } +VSLI_N:12:result_int64x2 [] = { 18fff0, 18fff1, } +VSLI_N:13:result_uint8x16 [] = { 60, 61, 62, 63, 64, 65, 66, 67, 60, 61, 62, 63, 64, 65, 66, 67, } +VSLI_N:14:result_uint16x8 [] = { 3ff0, 3ff1, 3ff2, 3ff3, 3ff4, 3ff5, 3ff6, 3ff7, } +VSLI_N:15:result_uint32x4 [] = { 1bfffff0, 1bfffff1, 1bfffff2, 1bfffff3, } +VSLI_N:16:result_uint64x2 [] = { 7ffffffffffff0, 7ffffffffffff1, } +VSLI_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VSRI_N output: +VSRI_N:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VSRI_N:1:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VSRI_N:2:result_int32x2 [] = { 80000001, 80000001, } +VSRI_N:3:result_int64x1 [] = { ffffffff00000000, } +VSRI_N:4:result_uint8x8 [] = { c5, c5, c5, c5, c5, c5, c5, c5, } +VSRI_N:5:result_uint16x4 [] = { ffc0, ffc0, ffc0, ffc0, } +VSRI_N:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VSRI_N:7:result_uint64x1 [] = { e000000000000000, } +VSRI_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSRI_N:9:result_int8x16 [] = { fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, } +VSRI_N:10:result_int16x8 [] = { fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, } +VSRI_N:11:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VSRI_N:12:result_int64x2 [] = { ffff000000000000, ffff000000000000, } +VSRI_N:13:result_uint8x16 [] = { e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, } +VSRI_N:14:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0, } +VSRI_N:15:result_uint32x4 [] = { fffffe00, fffffe00, fffffe00, fffffe00, } +VSRI_N:16:result_uint64x2 [] = { fffffffffffff800, fffffffffffff800, } +VSRI_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VTST/VTSTQ (signed input) output: +VTST/VTSTQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:2:result_int32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:3:result_int64x1 [] = { 3333333333333333, } +VTST/VTSTQ:4:result_uint8x8 [] = { 0, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:5:result_uint16x4 [] = { 0, ffff, 0, ffff, } +VTST/VTSTQ:6:result_uint32x2 [] = { 0, ffffffff, } +VTST/VTSTQ:7:result_uint64x1 [] = { 3333333333333333, } +VTST/VTSTQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VTST/VTSTQ:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTST/VTSTQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:13:result_uint8x16 [] = { 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:14:result_uint16x8 [] = { 0, ffff, 0, ffff, ffff, ffff, ffff, ffff, } +VTST/VTSTQ:15:result_uint32x4 [] = { 0, ffffffff, 0, ffffffff, } +VTST/VTSTQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VTST/VTSTQ (unsigned input) output: +VTST/VTSTQ:18:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:19:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTST/VTSTQ:20:result_int32x2 [] = { 33333333, 33333333, } +VTST/VTSTQ:21:result_int64x1 [] = { 3333333333333333, } +VTST/VTSTQ:22:result_uint8x8 [] = { 0, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:23:result_uint16x4 [] = { 0, ffff, 0, ffff, } +VTST/VTSTQ:24:result_uint32x2 [] = { 0, ffffffff, } +VTST/VTSTQ:25:result_uint64x1 [] = { 3333333333333333, } +VTST/VTSTQ:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VTST/VTSTQ:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTST/VTSTQ:28:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTST/VTSTQ:29:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTST/VTSTQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:31:result_uint8x16 [] = { 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VTST/VTSTQ:32:result_uint16x8 [] = { 0, ffff, 0, ffff, ffff, ffff, ffff, ffff, } +VTST/VTSTQ:33:result_uint32x4 [] = { 0, ffffffff, 0, ffffffff, } +VTST/VTSTQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTST/VTSTQ:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VADDHN output: +VADDHN:0:result_int8x8 [] = { 32, 32, 32, 32, 32, 32, 32, 32, } +VADDHN:1:result_int16x4 [] = { 32, 32, 32, 32, } +VADDHN:2:result_int32x2 [] = { 18, 18, } +VADDHN:3:result_int64x1 [] = { 3333333333333333, } +VADDHN:4:result_uint8x8 [] = { 3, 3, 3, 3, 3, 3, 3, 3, } +VADDHN:5:result_uint16x4 [] = { 37, 37, 37, 37, } +VADDHN:6:result_uint32x2 [] = { 3, 3, } +VADDHN:7:result_uint64x1 [] = { 3333333333333333, } +VADDHN:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VADDHN:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDHN:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDHN:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VADDHN:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDHN:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VADDHN:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VADDHN:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VADDHN:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRADDHN output: +VRADDHN:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:1:result_int16x4 [] = { 33, 33, 33, 33, } +VRADDHN:2:result_int32x2 [] = { 19, 19, } +VRADDHN:3:result_int64x1 [] = { 3333333333333333, } +VRADDHN:4:result_uint8x8 [] = { 4, 4, 4, 4, 4, 4, 4, 4, } +VRADDHN:5:result_uint16x4 [] = { 38, 38, 38, 38, } +VRADDHN:6:result_uint32x2 [] = { 4, 4, } +VRADDHN:7:result_uint64x1 [] = { 3333333333333333, } +VRADDHN:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRADDHN:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRADDHN:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRADDHN:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRADDHN:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRADDHN:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRADDHN:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRADDHN:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRADDHN:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VADDL output: +VADDL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VADDL:2:result_int32x2 [] = { 33333333, 33333333, } +VADDL:3:result_int64x1 [] = { 3333333333333333, } +VADDL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VADDL:6:result_uint32x2 [] = { 33333333, 33333333, } +VADDL:7:result_uint64x1 [] = { 3333333333333333, } +VADDL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VADDL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:10:result_int16x8 [] = { ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, } +VADDL:11:result_int32x4 [] = { ffffffe2, ffffffe3, ffffffe4, ffffffe5, } +VADDL:12:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1, } +VADDL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDL:14:result_uint16x8 [] = { 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1ea, } +VADDL:15:result_uint32x4 [] = { 1ffe1, 1ffe2, 1ffe3, 1ffe4, } +VADDL:16:result_uint64x2 [] = { 1ffffffe0, 1ffffffe1, } +VADDL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VADDW output: +VADDW:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VADDW:2:result_int32x2 [] = { 33333333, 33333333, } +VADDW:3:result_int64x1 [] = { 3333333333333333, } +VADDW:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VADDW:6:result_uint32x2 [] = { 33333333, 33333333, } +VADDW:7:result_uint64x1 [] = { 3333333333333333, } +VADDW:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VADDW:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:10:result_int16x8 [] = { ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, } +VADDW:11:result_int32x4 [] = { ffffffe2, ffffffe3, ffffffe4, ffffffe5, } +VADDW:12:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1, } +VADDW:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VADDW:14:result_uint16x8 [] = { e3, e4, e5, e6, e7, e8, e9, ea, } +VADDW:15:result_uint32x4 [] = { ffe1, ffe2, ffe3, ffe4, } +VADDW:16:result_uint64x2 [] = { ffffffe0, ffffffe1, } +VADDW:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VHADD/VHADDQ output: +VHADD/VHADDQ:0:result_int8x8 [] = { fffffff1, fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, } +VHADD/VHADDQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2, } +VHADD/VHADDQ:2:result_int32x2 [] = { fffffff0, fffffff0, } +VHADD/VHADDQ:3:result_int64x1 [] = { 3333333333333333, } +VHADD/VHADDQ:4:result_uint8x8 [] = { f1, f2, f2, f3, f3, f4, f4, f5, } +VHADD/VHADDQ:5:result_uint16x4 [] = { fff0, fff1, fff1, fff2, } +VHADD/VHADDQ:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VHADD/VHADDQ:7:result_uint64x1 [] = { 3333333333333333, } +VHADD/VHADDQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VHADD/VHADDQ:9:result_int8x16 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, fffffff6, fffffff6, fffffff7, fffffff7, fffffff8, fffffff8, fffffff9, fffffff9, } +VHADD/VHADDQ:10:result_int16x8 [] = { fffffff1, fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, } +VHADD/VHADDQ:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff2, } +VHADD/VHADDQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VHADD/VHADDQ:13:result_uint8x16 [] = { f4, f5, f5, f6, f6, f7, f7, f8, f8, f9, f9, fa, fa, fb, fb, fc, } +VHADD/VHADDQ:14:result_uint16x8 [] = { fff1, fff1, fff2, fff2, fff3, fff3, fff4, fff4, } +VHADD/VHADDQ:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff2, } +VHADD/VHADDQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VHADD/VHADDQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRHADD/VRHADDQ output: +VRHADD/VRHADDQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, } +VRHADD/VRHADDQ:1:result_int16x4 [] = { fffffff1, fffffff2, fffffff2, fffffff3, } +VRHADD/VRHADDQ:2:result_int32x2 [] = { fffffff0, fffffff1, } +VRHADD/VRHADDQ:3:result_int64x1 [] = { 3333333333333333, } +VRHADD/VRHADDQ:4:result_uint8x8 [] = { f2, f2, f3, f3, f4, f4, f5, f5, } +VRHADD/VRHADDQ:5:result_uint16x4 [] = { fff1, fff1, fff2, fff2, } +VRHADD/VRHADDQ:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VRHADD/VRHADDQ:7:result_uint64x1 [] = { 3333333333333333, } +VRHADD/VRHADDQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRHADD/VRHADDQ:9:result_int8x16 [] = { fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, fffffff6, fffffff6, fffffff7, fffffff7, fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, } +VRHADD/VRHADDQ:10:result_int16x8 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, } +VRHADD/VRHADDQ:11:result_int32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2, } +VRHADD/VRHADDQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRHADD/VRHADDQ:13:result_uint8x16 [] = { f5, f5, f6, f6, f7, f7, f8, f8, f9, f9, fa, fa, fb, fb, fc, fc, } +VRHADD/VRHADDQ:14:result_uint16x8 [] = { fff1, fff2, fff2, fff3, fff3, fff4, fff4, fff5, } +VRHADD/VRHADDQ:15:result_uint32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2, } +VRHADD/VRHADDQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRHADD/VRHADDQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VHSUB/VHSUBQ output: +VHSUB/VHSUBQ:0:result_int8x8 [] = { fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, } +VHSUB/VHSUBQ:1:result_int16x4 [] = { ffffffff, ffffffff, 0, 0, } +VHSUB/VHSUBQ:2:result_int32x2 [] = { 0, 0, } +VHSUB/VHSUBQ:3:result_int64x1 [] = { 3333333333333333, } +VHSUB/VHSUBQ:4:result_uint8x8 [] = { fe, ff, ff, 0, 0, 1, 1, 2, } +VHSUB/VHSUBQ:5:result_uint16x4 [] = { ffff, 0, 0, 1, } +VHSUB/VHSUBQ:6:result_uint32x2 [] = { 0, 0, } +VHSUB/VHSUBQ:7:result_uint64x1 [] = { 3333333333333333, } +VHSUB/VHSUBQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VHSUB/VHSUBQ:9:result_int8x16 [] = { fffffffe, fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, } +VHSUB/VHSUBQ:10:result_int16x8 [] = { fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, } +VHSUB/VHSUBQ:11:result_int32x4 [] = { ffffffff, 0, 0, 1, } +VHSUB/VHSUBQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VHSUB/VHSUBQ:13:result_uint8x16 [] = { fb, fc, fc, fd, fd, fe, fe, ff, ff, 0, 0, 1, 1, 2, 2, 3, } +VHSUB/VHSUBQ:14:result_uint16x8 [] = { ffff, ffff, 0, 0, 1, 1, 2, 2, } +VHSUB/VHSUBQ:15:result_uint32x4 [] = { ffffffff, 0, 0, 1, } +VHSUB/VHSUBQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VHSUB/VHSUBQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VSUBL output: +VSUBL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBL:2:result_int32x2 [] = { 33333333, 33333333, } +VSUBL:3:result_int64x1 [] = { 3333333333333333, } +VSUBL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBL:6:result_uint32x2 [] = { 33333333, 33333333, } +VSUBL:7:result_uint64x1 [] = { 3333333333333333, } +VSUBL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSUBL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:10:result_int16x8 [] = { fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, } +VSUBL:11:result_int32x4 [] = { fffffffe, ffffffff, 0, 1, } +VSUBL:12:result_int64x2 [] = { 0, 1, } +VSUBL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBL:14:result_uint16x8 [] = { fffd, fffe, ffff, 0, 1, 2, 3, 4, } +VSUBL:15:result_uint32x4 [] = { ffffffff, 0, 1, 2, } +VSUBL:16:result_uint64x2 [] = { 0, 1, } +VSUBL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VSUBW output: +VSUBW:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBW:2:result_int32x2 [] = { 33333333, 33333333, } +VSUBW:3:result_int64x1 [] = { 3333333333333333, } +VSUBW:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VSUBW:6:result_uint32x2 [] = { 33333333, 33333333, } +VSUBW:7:result_uint64x1 [] = { 3333333333333333, } +VSUBW:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSUBW:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:10:result_int16x8 [] = { fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, } +VSUBW:11:result_int32x4 [] = { fffffffe, ffffffff, 0, 1, } +VSUBW:12:result_int64x2 [] = { 0, 1, } +VSUBW:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBW:14:result_uint16x8 [] = { fefd, fefe, feff, ff00, ff01, ff02, ff03, ff04, } +VSUBW:15:result_uint32x4 [] = { fffeffff, ffff0000, ffff0001, ffff0002, } +VSUBW:16:result_uint64x2 [] = { ffffffff00000000, ffffffff00000001, } +VSUBW:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VSUBHN output: +VSUBHN:0:result_int8x8 [] = { 31, 31, 31, 31, 31, 31, 31, 31, } +VSUBHN:1:result_int16x4 [] = { 31, 31, 31, 31, } +VSUBHN:2:result_int32x2 [] = { 17, 17, } +VSUBHN:3:result_int64x1 [] = { 3333333333333333, } +VSUBHN:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VSUBHN:5:result_uint16x4 [] = { 36, 36, 36, 36, } +VSUBHN:6:result_uint32x2 [] = { 2, 2, } +VSUBHN:7:result_uint64x1 [] = { 3333333333333333, } +VSUBHN:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSUBHN:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBHN:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBHN:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VSUBHN:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSUBHN:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VSUBHN:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VSUBHN:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VSUBHN:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSUBHN output: +VRSUBHN:0:result_int8x8 [] = { 31, 31, 31, 31, 31, 31, 31, 31, } +VRSUBHN:1:result_int16x4 [] = { 31, 31, 31, 31, } +VRSUBHN:2:result_int32x2 [] = { 17, 17, } +VRSUBHN:3:result_int64x1 [] = { 3333333333333333, } +VRSUBHN:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VRSUBHN:5:result_uint16x4 [] = { 36, 36, 36, 36, } +VRSUBHN:6:result_uint32x2 [] = { 2, 2, } +VRSUBHN:7:result_uint64x1 [] = { 3333333333333333, } +VRSUBHN:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSUBHN:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSUBHN:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSUBHN:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VRSUBHN:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VRSUBHN:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VRSUBHN:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VRSUBHN:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VRSUBHN:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VMVN/VMVNQ output: +VMVN/VMVNQ:0:result_int8x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:1:result_int16x4 [] = { f, e, d, c, } +VMVN/VMVNQ:2:result_int32x2 [] = { f, e, } +VMVN/VMVNQ:3:result_int64x1 [] = { 3333333333333333, } +VMVN/VMVNQ:4:result_uint8x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:5:result_uint16x4 [] = { f, e, d, c, } +VMVN/VMVNQ:6:result_uint32x2 [] = { f, e, } +VMVN/VMVNQ:7:result_uint64x1 [] = { 3333333333333333, } +VMVN/VMVNQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VMVN/VMVNQ:9:result_int8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, } +VMVN/VMVNQ:10:result_int16x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:11:result_int32x4 [] = { f, e, d, c, } +VMVN/VMVNQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VMVN/VMVNQ:13:result_uint8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, } +VMVN/VMVNQ:14:result_uint16x8 [] = { f, e, d, c, b, a, 9, 8, } +VMVN/VMVNQ:15:result_uint32x4 [] = { f, e, d, c, } +VMVN/VMVNQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VMVN/VMVNQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQMOVN overflow output: +VQMOVN:0:vqmovn_s16 Neon overflow 0 +VQMOVN:1:vqmovn_s32 Neon overflow 0 +VQMOVN:2:vqmovn_s64 Neon overflow 0 +VQMOVN:3:vqmovn_u16 Neon overflow 0 +VQMOVN:4:vqmovn_u32 Neon overflow 0 +VQMOVN:5:vqmovn_u64 Neon overflow 0 + +VQMOVN output: +VQMOVN:6:result_int8x8 [] = { 12, 12, 12, 12, 12, 12, 12, 12, } +VQMOVN:7:result_int16x4 [] = { 1278, 1278, 1278, 1278, } +VQMOVN:8:result_int32x2 [] = { 12345678, 12345678, } +VQMOVN:9:result_int64x1 [] = { 3333333333333333, } +VQMOVN:10:result_uint8x8 [] = { 82, 82, 82, 82, 82, 82, 82, 82, } +VQMOVN:11:result_uint16x4 [] = { 8765, 8765, 8765, 8765, } +VQMOVN:12:result_uint32x2 [] = { 87654321, 87654321, } +VQMOVN:13:result_uint64x1 [] = { 3333333333333333, } +VQMOVN:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQMOVN:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:16:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:17:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQMOVN overflow output: +VQMOVN:24:vqmovn_s16 Neon overflow 1 +VQMOVN:25:vqmovn_s32 Neon overflow 1 +VQMOVN:26:vqmovn_s64 Neon overflow 1 +VQMOVN:27:vqmovn_u16 Neon overflow 1 +VQMOVN:28:vqmovn_u32 Neon overflow 1 +VQMOVN:29:vqmovn_u64 Neon overflow 1 + +VQMOVN output: +VQMOVN:30:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQMOVN:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQMOVN:32:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQMOVN:33:result_int64x1 [] = { 3333333333333333, } +VQMOVN:34:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQMOVN:35:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQMOVN:36:result_uint32x2 [] = { ffffffff, ffffffff, } +VQMOVN:37:result_uint64x1 [] = { 3333333333333333, } +VQMOVN:38:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQMOVN:39:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:40:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:41:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:42:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:43:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVN:44:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVN:45:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVN:46:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVN:47:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQMOVUN overflow output: +VQMOVUN:0:vqmovun_s16 Neon overflow 0 +VQMOVUN:1:vqmovun_s32 Neon overflow 0 +VQMOVUN:2:vqmovun_s64 Neon overflow 0 + +VQMOVUN output: +VQMOVUN:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:4:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:5:result_int32x2 [] = { 33333333, 33333333, } +VQMOVUN:6:result_int64x1 [] = { 3333333333333333, } +VQMOVUN:7:result_uint8x8 [] = { 34, 34, 34, 34, 34, 34, 34, 34, } +VQMOVUN:8:result_uint16x4 [] = { 5678, 5678, 5678, 5678, } +VQMOVUN:9:result_uint32x2 [] = { 12345678, 12345678, } +VQMOVUN:10:result_uint64x1 [] = { 3333333333333333, } +VQMOVUN:11:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQMOVUN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:20:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQMOVUN (negative input) overflow output: +VQMOVUN:21:vqmovun_s16 Neon overflow 1 +VQMOVUN:22:vqmovun_s32 Neon overflow 1 +VQMOVUN:23:vqmovun_s64 Neon overflow 1 + +VQMOVUN (negative input) output: +VQMOVUN:24:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:25:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQMOVUN:26:result_int32x2 [] = { 33333333, 33333333, } +VQMOVUN:27:result_int64x1 [] = { 3333333333333333, } +VQMOVUN:28:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQMOVUN:29:result_uint16x4 [] = { 0, 0, 0, 0, } +VQMOVUN:30:result_uint32x2 [] = { 0, 0, } +VQMOVUN:31:result_uint64x1 [] = { 3333333333333333, } +VQMOVUN:32:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQMOVUN:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:34:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:35:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQMOVUN:38:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQMOVUN:39:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQMOVUN:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQMOVUN:41:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHR_N output: +VRSHR_N:0:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, } +VRSHR_N:1:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:2:result_int32x2 [] = { fffffffc, fffffffc, } +VRSHR_N:3:result_int64x1 [] = { 0, } +VRSHR_N:4:result_uint8x8 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, } +VRSHR_N:5:result_uint16x4 [] = { 1ffe, 1ffe, 1ffe, 1ffe, } +VRSHR_N:6:result_uint32x2 [] = { 8000000, 8000000, } +VRSHR_N:7:result_uint64x1 [] = { 80000000, } +VRSHR_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHR_N:9:result_int8x16 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, fffffffc, fffffffd, fffffffd, fffffffe, fffffffe, ffffffff, ffffffff, 0, } +VRSHR_N:10:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:11:result_int32x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd, } +VRSHR_N:12:result_int64x2 [] = { 0, 0, } +VRSHR_N:13:result_uint8x16 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, 40, 40, } +VRSHR_N:14:result_uint16x8 [] = { 1ffe, 1ffe, 1ffe, 1ffe, 1fff, 1fff, 1fff, 1fff, } +VRSHR_N:15:result_uint32x4 [] = { 8000000, 8000000, 8000000, 8000000, } +VRSHR_N:16:result_uint64x2 [] = { 80000000, 80000000, } +VRSHR_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHR_N (overflow test: max shift amount, positive input) output: +VRSHR_N:18:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:19:result_int16x4 [] = { 0, 0, 0, 0, } +VRSHR_N:20:result_int32x2 [] = { 0, 0, } +VRSHR_N:21:result_int64x1 [] = { 0, } +VRSHR_N:22:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHR_N:23:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSHR_N:24:result_uint32x2 [] = { 1, 1, } +VRSHR_N:25:result_uint64x1 [] = { 1, } +VRSHR_N:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHR_N:27:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSHR_N:29:result_int32x4 [] = { 0, 0, 0, 0, } +VRSHR_N:30:result_int64x2 [] = { 0, 0, } +VRSHR_N:31:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHR_N:32:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSHR_N:33:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSHR_N:34:result_uint64x2 [] = { 1, 1, } +VRSHR_N:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHR_N (overflow test: shift by 1, with negative input) output: +VRSHR_N:36:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHR_N:37:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VRSHR_N:38:result_int32x2 [] = { 40000000, 40000000, } +VRSHR_N:39:result_int64x1 [] = { 4000000000000000, } +VRSHR_N:40:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:41:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSHR_N:42:result_uint32x2 [] = { 80000000, 80000000, } +VRSHR_N:43:result_uint64x1 [] = { 8000000000000000, } +VRSHR_N:44:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHR_N:45:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VRSHR_N:46:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VRSHR_N:47:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VRSHR_N:48:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VRSHR_N:49:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:50:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSHR_N:51:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSHR_N:52:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSHR_N:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHR_N (overflow test: shift by 3, positive input) output: +VRSHR_N:54:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHR_N:55:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VRSHR_N:56:result_int32x2 [] = { 10000000, 10000000, } +VRSHR_N:57:result_int64x1 [] = { 1000000000000000, } +VRSHR_N:58:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:59:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSHR_N:60:result_uint32x2 [] = { 20000000, 20000000, } +VRSHR_N:61:result_uint64x1 [] = { 2000000000000000, } +VRSHR_N:62:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHR_N:63:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VRSHR_N:64:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VRSHR_N:65:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VRSHR_N:66:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VRSHR_N:67:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:68:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSHR_N:69:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSHR_N:70:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSHR_N:71:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHR_N (overflow test: shift by 1, with negative input) output: +VRSHR_N:72:result_int8x8 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSHR_N:73:result_int16x4 [] = { ffffc000, ffffc000, ffffc000, ffffc000, } +VRSHR_N:74:result_int32x2 [] = { c0000000, c0000000, } +VRSHR_N:75:result_int64x1 [] = { c000000000000000, } +VRSHR_N:76:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:77:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSHR_N:78:result_uint32x2 [] = { 80000000, 80000000, } +VRSHR_N:79:result_uint64x1 [] = { 8000000000000000, } +VRSHR_N:80:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHR_N:81:result_int8x16 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSHR_N:82:result_int16x8 [] = { ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, } +VRSHR_N:83:result_int32x4 [] = { c0000000, c0000000, c0000000, c0000000, } +VRSHR_N:84:result_int64x2 [] = { c000000000000000, c000000000000000, } +VRSHR_N:85:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSHR_N:86:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSHR_N:87:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSHR_N:88:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSHR_N:89:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSHR_N (overflow test: shift by 3, with negative input) output: +VRSHR_N:90:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSHR_N:91:result_int16x4 [] = { fffff000, fffff000, fffff000, fffff000, } +VRSHR_N:92:result_int32x2 [] = { f0000000, f0000000, } +VRSHR_N:93:result_int64x1 [] = { f000000000000000, } +VRSHR_N:94:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:95:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSHR_N:96:result_uint32x2 [] = { 20000000, 20000000, } +VRSHR_N:97:result_uint64x1 [] = { 2000000000000000, } +VRSHR_N:98:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSHR_N:99:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSHR_N:100:result_int16x8 [] = { fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, } +VRSHR_N:101:result_int32x4 [] = { f0000000, f0000000, f0000000, f0000000, } +VRSHR_N:102:result_int64x2 [] = { f000000000000000, f000000000000000, } +VRSHR_N:103:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSHR_N:104:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSHR_N:105:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSHR_N:106:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSHR_N:107:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSRA_N output: +VRSRA_N:0:result_int8x8 [] = { fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, } +VRSRA_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VRSRA_N:2:result_int32x2 [] = { fffffffd, fffffffe, } +VRSRA_N:3:result_int64x1 [] = { fffffffffffffff0, } +VRSRA_N:4:result_uint8x8 [] = { 5, 6, 7, 8, 9, a, b, c, } +VRSRA_N:5:result_uint16x4 [] = { fffd, fffe, ffff, 0, } +VRSRA_N:6:result_uint32x2 [] = { fffffff4, fffffff5, } +VRSRA_N:7:result_uint64x1 [] = { fffffffffffffff0, } +VRSRA_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSRA_N:9:result_int8x16 [] = { fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, 8, } +VRSRA_N:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VRSRA_N:11:result_int32x4 [] = { fffffffd, fffffffe, ffffffff, 0, } +VRSRA_N:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VRSRA_N:13:result_uint8x16 [] = { 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, 11, 12, 13, 14, } +VRSRA_N:14:result_uint16x8 [] = { fffd, fffe, ffff, 0, 1, 2, 3, 4, } +VRSRA_N:15:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7, } +VRSRA_N:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VRSRA_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSRA_N (checking overflow: shift by 1, positive input) output: +VRSRA_N:18:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40, } +VRSRA_N:19:result_int16x4 [] = { 4000, 4000, 4000, 4000, } +VRSRA_N:20:result_int32x2 [] = { 40000000, 40000000, } +VRSRA_N:21:result_int64x1 [] = { 4000000000000000, } +VRSRA_N:22:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VRSRA_N:23:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VRSRA_N:24:result_uint32x2 [] = { 80000000, 80000000, } +VRSRA_N:25:result_uint64x1 [] = { 8000000000000000, } +VRSRA_N:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSRA_N:27:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, } +VRSRA_N:28:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, } +VRSRA_N:29:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000, } +VRSRA_N:30:result_int64x2 [] = { 4000000000000000, 4000000000000000, } +VRSRA_N:31:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, } +VRSRA_N:32:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, } +VRSRA_N:33:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000, } +VRSRA_N:34:result_uint64x2 [] = { 8000000000000000, 8000000000000000, } +VRSRA_N:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSRA_N (checking overflow: shift by 3, positive input) output: +VRSRA_N:36:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, } +VRSRA_N:37:result_int16x4 [] = { 1000, 1000, 1000, 1000, } +VRSRA_N:38:result_int32x2 [] = { 10000000, 10000000, } +VRSRA_N:39:result_int64x1 [] = { 1000000000000000, } +VRSRA_N:40:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20, } +VRSRA_N:41:result_uint16x4 [] = { 2000, 2000, 2000, 2000, } +VRSRA_N:42:result_uint32x2 [] = { 20000000, 20000000, } +VRSRA_N:43:result_uint64x1 [] = { 2000000000000000, } +VRSRA_N:44:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSRA_N:45:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, } +VRSRA_N:46:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, } +VRSRA_N:47:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000, } +VRSRA_N:48:result_int64x2 [] = { 1000000000000000, 1000000000000000, } +VRSRA_N:49:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, } +VRSRA_N:50:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, } +VRSRA_N:51:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000, } +VRSRA_N:52:result_uint64x2 [] = { 2000000000000000, 2000000000000000, } +VRSRA_N:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSRA_N (checking overflow: shift by max, positive input) output: +VRSRA_N:54:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:55:result_int16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:56:result_int32x2 [] = { 0, 0, } +VRSRA_N:57:result_int64x1 [] = { 0, } +VRSRA_N:58:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:59:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:60:result_uint32x2 [] = { 1, 1, } +VRSRA_N:61:result_uint64x1 [] = { 1, } +VRSRA_N:62:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSRA_N:63:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:64:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:65:result_int32x4 [] = { 0, 0, 0, 0, } +VRSRA_N:66:result_int64x2 [] = { 0, 0, } +VRSRA_N:67:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:68:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:69:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:70:result_uint64x2 [] = { 1, 1, } +VRSRA_N:71:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSRA_N (checking overflow: shift by 1, negative input) output: +VRSRA_N:72:result_int8x8 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSRA_N:73:result_int16x4 [] = { ffffc000, ffffc000, ffffc000, ffffc000, } +VRSRA_N:74:result_int32x2 [] = { c0000000, c0000000, } +VRSRA_N:75:result_int64x1 [] = { c000000000000000, } +VRSRA_N:76:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:77:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:78:result_uint32x2 [] = { 1, 1, } +VRSRA_N:79:result_uint64x1 [] = { 1, } +VRSRA_N:80:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSRA_N:81:result_int8x16 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, } +VRSRA_N:82:result_int16x8 [] = { ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, } +VRSRA_N:83:result_int32x4 [] = { c0000000, c0000000, c0000000, c0000000, } +VRSRA_N:84:result_int64x2 [] = { c000000000000000, c000000000000000, } +VRSRA_N:85:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:86:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:87:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:88:result_uint64x2 [] = { 1, 1, } +VRSRA_N:89:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSRA_N (checking overflow: shift by max, negative input) output: +VRSRA_N:90:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSRA_N:91:result_int16x4 [] = { fffff000, fffff000, fffff000, fffff000, } +VRSRA_N:92:result_int32x2 [] = { f0000000, f0000000, } +VRSRA_N:93:result_int64x1 [] = { f000000000000000, } +VRSRA_N:94:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:95:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:96:result_uint32x2 [] = { 1, 1, } +VRSRA_N:97:result_uint64x1 [] = { 1, } +VRSRA_N:98:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSRA_N:99:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } +VRSRA_N:100:result_int16x8 [] = { fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, } +VRSRA_N:101:result_int32x4 [] = { f0000000, f0000000, f0000000, f0000000, } +VRSRA_N:102:result_int64x2 [] = { f000000000000000, f000000000000000, } +VRSRA_N:103:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:104:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:105:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:106:result_uint64x2 [] = { 1, 1, } +VRSRA_N:107:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRSRA_N (checking overflow: shift by max, negative input) output: +VRSRA_N:108:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:109:result_int16x4 [] = { 0, 0, 0, 0, } +VRSRA_N:110:result_int32x2 [] = { 0, 0, } +VRSRA_N:111:result_int64x1 [] = { 0, } +VRSRA_N:112:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:113:result_uint16x4 [] = { 1, 1, 1, 1, } +VRSRA_N:114:result_uint32x2 [] = { 1, 1, } +VRSRA_N:115:result_uint64x1 [] = { 1, } +VRSRA_N:116:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VRSRA_N:117:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:118:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VRSRA_N:119:result_int32x4 [] = { 0, 0, 0, 0, } +VRSRA_N:120:result_int64x2 [] = { 0, 0, } +VRSRA_N:121:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:122:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1, } +VRSRA_N:123:result_uint32x4 [] = { 1, 1, 1, 1, } +VRSRA_N:124:result_uint64x2 [] = { 1, 1, } +VRSRA_N:125:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VSHLL_N output: +VSHLL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VSHLL_N:2:result_int32x2 [] = { 33333333, 33333333, } +VSHLL_N:3:result_int64x1 [] = { 3333333333333333, } +VSHLL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VSHLL_N:6:result_uint32x2 [] = { 33333333, 33333333, } +VSHLL_N:7:result_uint64x1 [] = { 3333333333333333, } +VSHLL_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSHLL_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:10:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee, } +VSHLL_N:11:result_int32x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, } +VSHLL_N:12:result_int64x2 [] = { ffffffffffffff80, ffffffffffffff88, } +VSHLL_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VSHLL_N:14:result_uint16x8 [] = { 3c0, 3c4, 3c8, 3cc, 3d0, 3d4, 3d8, 3dc, } +VSHLL_N:15:result_uint32x4 [] = { fff00, fff10, fff20, fff30, } +VSHLL_N:16:result_uint64x2 [] = { 7ffffff80, 7ffffff88, } +VSHLL_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VPADDL/VPADDLQ output: +VPADDL/VPADDLQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:1:result_int16x4 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, } +VPADDL/VPADDLQ:2:result_int32x2 [] = { ffffffe1, ffffffe5, } +VPADDL/VPADDLQ:3:result_int64x1 [] = { ffffffffffffffe1, } +VPADDL/VPADDLQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:5:result_uint16x4 [] = { 1e1, 1e5, 1e9, 1ed, } +VPADDL/VPADDLQ:6:result_uint32x2 [] = { 1ffe1, 1ffe5, } +VPADDL/VPADDLQ:7:result_uint64x1 [] = { 1ffffffe1, } +VPADDL/VPADDLQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VPADDL/VPADDLQ:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:10:result_int16x8 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, fffffff1, fffffff5, fffffff9, fffffffd, } +VPADDL/VPADDLQ:11:result_int32x4 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, } +VPADDL/VPADDLQ:12:result_int64x2 [] = { ffffffffffffffe1, ffffffffffffffe5, } +VPADDL/VPADDLQ:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADDL/VPADDLQ:14:result_uint16x8 [] = { 1e1, 1e5, 1e9, 1ed, 1f1, 1f5, 1f9, 1fd, } +VPADDL/VPADDLQ:15:result_uint32x4 [] = { 1ffe1, 1ffe5, 1ffe9, 1ffed, } +VPADDL/VPADDLQ:16:result_uint64x2 [] = { 1ffffffe1, 1ffffffe5, } +VPADDL/VPADDLQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VPADD output: +VPADD:0:result_int8x8 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, ffffffe1, ffffffe5, ffffffe9, ffffffed, } +VPADD:1:result_int16x4 [] = { ffffffe1, ffffffe5, ffffffe1, ffffffe5, } +VPADD:2:result_int32x2 [] = { ffffffe1, ffffffe1, } +VPADD:3:result_int64x1 [] = { 3333333333333333, } +VPADD:4:result_uint8x8 [] = { e1, e5, e9, ed, e1, e5, e9, ed, } +VPADD:5:result_uint16x4 [] = { ffe1, ffe5, ffe1, ffe5, } +VPADD:6:result_uint32x2 [] = { ffffffe1, ffffffe1, } +VPADD:7:result_uint64x1 [] = { 3333333333333333, } +VPADD:8:result_float32x2 [] = { c1f80000 -0x1.fp+4 -31, c1f80000 -0x1.fp+4 -31, } +VPADD:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADD:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADD:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VPADD:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADD:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPADD:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPADD:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VPADD:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VPADAL/VPADALQ output: +VPADAL/VPADALQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:1:result_int16x4 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, } +VPADAL/VPADALQ:2:result_int32x2 [] = { ffffffd1, ffffffd6, } +VPADAL/VPADALQ:3:result_int64x1 [] = { ffffffffffffffd1, } +VPADAL/VPADALQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:5:result_uint16x4 [] = { 1d1, 1d6, 1db, 1e0, } +VPADAL/VPADALQ:6:result_uint32x2 [] = { 1ffd1, 1ffd6, } +VPADAL/VPADALQ:7:result_uint64x1 [] = { 1ffffffd1, } +VPADAL/VPADALQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VPADAL/VPADALQ:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:10:result_int16x8 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, ffffffe5, ffffffea, ffffffef, fffffff4, } +VPADAL/VPADALQ:11:result_int32x4 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, } +VPADAL/VPADALQ:12:result_int64x2 [] = { ffffffffffffffd1, ffffffffffffffd6, } +VPADAL/VPADALQ:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPADAL/VPADALQ:14:result_uint16x8 [] = { 1d1, 1d6, 1db, 1e0, 1e5, 1ea, 1ef, 1f4, } +VPADAL/VPADALQ:15:result_uint32x4 [] = { 1ffd1, 1ffd6, 1ffdb, 1ffe0, } +VPADAL/VPADALQ:16:result_uint64x2 [] = { 1ffffffd1, 1ffffffd6, } +VPADAL/VPADALQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHLU_N/VQSHLUQ_N (negative input) overflow output: +VQSHLU_N/VQSHLUQ_N:0:vqshlu_n_s8 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:1:vqshlu_n_s16 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:2:vqshlu_n_s32 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:3:vqshlu_n_s64 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:4:vqshluq_n_s8 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:5:vqshluq_n_s16 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:6:vqshluq_n_s32 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:7:vqshluq_n_s64 Neon overflow 1 + +VQSHLU_N/VQSHLUQ_N (negative input) output: +VQSHLU_N/VQSHLUQ_N:8:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:9:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:10:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:11:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:12:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:13:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:14:result_uint32x2 [] = { 0, 0, } +VQSHLU_N/VQSHLUQ_N:15:result_uint64x1 [] = { 0, } +VQSHLU_N/VQSHLUQ_N:16:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHLU_N/VQSHLUQ_N:17:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:18:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:19:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:20:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:21:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:22:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:23:result_uint32x4 [] = { 0, 0, 0, 0, } +VQSHLU_N/VQSHLUQ_N:24:result_uint64x2 [] = { 0, 0, } +VQSHLU_N/VQSHLUQ_N:25:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 1) overflow output: +VQSHLU_N/VQSHLUQ_N:26:vqshlu_n_s8 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:27:vqshlu_n_s16 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:28:vqshlu_n_s32 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:29:vqshlu_n_s64 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:30:vqshluq_n_s8 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:31:vqshluq_n_s16 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:32:vqshluq_n_s32 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:33:vqshluq_n_s64 Neon overflow 0 + +VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 1) output: +VQSHLU_N/VQSHLUQ_N:34:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:35:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:36:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:37:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:38:result_uint8x8 [] = { fe, fe, fe, fe, fe, fe, fe, fe, } +VQSHLU_N/VQSHLUQ_N:39:result_uint16x4 [] = { fffe, fffe, fffe, fffe, } +VQSHLU_N/VQSHLUQ_N:40:result_uint32x2 [] = { fffffffe, fffffffe, } +VQSHLU_N/VQSHLUQ_N:41:result_uint64x1 [] = { fffffffffffffffe, } +VQSHLU_N/VQSHLUQ_N:42:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHLU_N/VQSHLUQ_N:43:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:44:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:45:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:46:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:47:result_uint8x16 [] = { fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, } +VQSHLU_N/VQSHLUQ_N:48:result_uint16x8 [] = { fffe, fffe, fffe, fffe, fffe, fffe, fffe, fffe, } +VQSHLU_N/VQSHLUQ_N:49:result_uint32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe, } +VQSHLU_N/VQSHLUQ_N:50:result_uint64x2 [] = { fffffffffffffffe, fffffffffffffffe, } +VQSHLU_N/VQSHLUQ_N:51:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 2) overflow output: +VQSHLU_N/VQSHLUQ_N:52:vqshlu_n_s8 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:53:vqshlu_n_s16 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:54:vqshlu_n_s32 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:55:vqshlu_n_s64 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:56:vqshluq_n_s8 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:57:vqshluq_n_s16 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:58:vqshluq_n_s32 Neon overflow 1 +VQSHLU_N/VQSHLUQ_N:59:vqshluq_n_s64 Neon overflow 1 + +VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 2) output: +VQSHLU_N/VQSHLUQ_N:60:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:61:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:62:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:63:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:64:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHLU_N/VQSHLUQ_N:65:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHLU_N/VQSHLUQ_N:66:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHLU_N/VQSHLUQ_N:67:result_uint64x1 [] = { ffffffffffffffff, } +VQSHLU_N/VQSHLUQ_N:68:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHLU_N/VQSHLUQ_N:69:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:70:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:71:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:72:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:73:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHLU_N/VQSHLUQ_N:74:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff, } +VQSHLU_N/VQSHLUQ_N:75:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } +VQSHLU_N/VQSHLUQ_N:76:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff, } +VQSHLU_N/VQSHLUQ_N:77:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHLU_N/VQSHLUQ_N overflow output: +VQSHLU_N/VQSHLUQ_N:78:vqshlu_n_s8 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:79:vqshlu_n_s16 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:80:vqshlu_n_s32 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:81:vqshlu_n_s64 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:82:vqshluq_n_s8 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:83:vqshluq_n_s16 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:84:vqshluq_n_s32 Neon overflow 0 +VQSHLU_N/VQSHLUQ_N:85:vqshluq_n_s64 Neon overflow 0 + +VQSHLU_N/VQSHLUQ_N output: +VQSHLU_N/VQSHLUQ_N:86:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:87:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:88:result_int32x2 [] = { 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:89:result_int64x1 [] = { 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:90:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VQSHLU_N/VQSHLUQ_N:91:result_uint16x4 [] = { 8, 8, 8, 8, } +VQSHLU_N/VQSHLUQ_N:92:result_uint32x2 [] = { 18, 18, } +VQSHLU_N/VQSHLUQ_N:93:result_uint64x1 [] = { 40, } +VQSHLU_N/VQSHLUQ_N:94:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHLU_N/VQSHLUQ_N:95:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHLU_N/VQSHLUQ_N:96:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHLU_N/VQSHLUQ_N:97:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHLU_N/VQSHLUQ_N:98:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHLU_N/VQSHLUQ_N:99:result_uint8x16 [] = { a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, } +VQSHLU_N/VQSHLUQ_N:100:result_uint16x8 [] = { 180, 180, 180, 180, 180, 180, 180, 180, } +VQSHLU_N/VQSHLUQ_N:101:result_uint32x4 [] = { 380, 380, 380, 380, } +VQSHLU_N/VQSHLUQ_N:102:result_uint64x2 [] = { 800, 800, } +VQSHLU_N/VQSHLUQ_N:103:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VCLZ/VCLZQ output: +VCLZ/VCLZQ:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VCLZ/VCLZQ:1:result_int16x4 [] = { 3, 3, 3, 3, } +VCLZ/VCLZQ:2:result_int32x2 [] = { 11, 11, } +VCLZ/VCLZQ:3:result_int64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VCLZ/VCLZQ:5:result_uint16x4 [] = { 0, 0, 0, 0, } +VCLZ/VCLZQ:6:result_uint32x2 [] = { 5, 5, } +VCLZ/VCLZQ:7:result_uint64x1 [] = { 3333333333333333, } +VCLZ/VCLZQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VCLZ/VCLZQ:9:result_int8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, } +VCLZ/VCLZQ:10:result_int16x8 [] = { 3, 3, 3, 3, 3, 3, 3, 3, } +VCLZ/VCLZQ:11:result_int32x4 [] = { 3, 3, 3, 3, } +VCLZ/VCLZQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:13:result_uint8x16 [] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, } +VCLZ/VCLZQ:14:result_uint16x8 [] = { d, d, d, d, d, d, d, d, } +VCLZ/VCLZQ:15:result_uint32x4 [] = { 1f, 1f, 1f, 1f, } +VCLZ/VCLZQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLZ/VCLZQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VCLS/VCLSQ (positive input) output: +VCLS/VCLSQ:0:result_int8x8 [] = { 6, 6, 6, 6, 6, 6, 6, 6, } +VCLS/VCLSQ:1:result_int16x4 [] = { 2, 2, 2, 2, } +VCLS/VCLSQ:2:result_int32x2 [] = { 19, 19, } +VCLS/VCLSQ:3:result_int64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:7:result_uint64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VCLS/VCLSQ:9:result_int8x16 [] = { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, } +VCLS/VCLSQ:10:result_int16x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VCLS/VCLSQ:11:result_int32x4 [] = { 14, 14, 14, 14, } +VCLS/VCLSQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLS/VCLSQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VCLS/VCLSQ (positive input) output: +VCLS/VCLSQ:18:result_int8x8 [] = { 7, 7, 7, 7, 7, 7, 7, 7, } +VCLS/VCLSQ:19:result_int16x4 [] = { 1, 1, 1, 1, } +VCLS/VCLSQ:20:result_int32x2 [] = { 1, 1, } +VCLS/VCLSQ:21:result_int64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:22:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:23:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:24:result_uint32x2 [] = { 33333333, 33333333, } +VCLS/VCLSQ:25:result_uint64x1 [] = { 3333333333333333, } +VCLS/VCLSQ:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VCLS/VCLSQ:27:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCLS/VCLSQ:28:result_int16x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2, } +VCLS/VCLSQ:29:result_int32x4 [] = { 0, 0, 0, 0, } +VCLS/VCLSQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VCLS/VCLSQ:32:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCLS/VCLSQ:33:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCLS/VCLSQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCLS/VCLSQ:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VCNT/VCNTQ output: +VCNT/VCNTQ:0:result_int8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8, } +VCNT/VCNTQ:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:2:result_int32x2 [] = { 33333333, 33333333, } +VCNT/VCNTQ:3:result_int64x1 [] = { 3333333333333333, } +VCNT/VCNTQ:4:result_uint8x8 [] = { 3, 3, 3, 3, 3, 3, 3, 3, } +VCNT/VCNTQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:6:result_uint32x2 [] = { 33333333, 33333333, } +VCNT/VCNTQ:7:result_uint64x1 [] = { 3333333333333333, } +VCNT/VCNTQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VCNT/VCNTQ:9:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } +VCNT/VCNTQ:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCNT/VCNTQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VCNT/VCNTQ:13:result_uint8x16 [] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, } +VCNT/VCNTQ:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VCNT/VCNTQ:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VCNT/VCNTQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VCNT/VCNTQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHRN_N overflow output: +VQSHRN_N:0:vqshrn_n_s16 Neon overflow 0 +VQSHRN_N:1:vqshrn_n_s32 Neon overflow 0 +VQSHRN_N:2:vqshrn_n_s64 Neon overflow 0 +VQSHRN_N:3:vqshrn_n_u16 Neon overflow 1 +VQSHRN_N:4:vqshrn_n_u32 Neon overflow 1 +VQSHRN_N:5:vqshrn_n_u64 Neon overflow 1 + +VQSHRN_N output: +VQSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, } +VQSHRN_N:7:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9, } +VQSHRN_N:8:result_int32x2 [] = { fffffffc, fffffffc, } +VQSHRN_N:9:result_int64x1 [] = { 3333333333333333, } +VQSHRN_N:10:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRN_N:11:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRN_N:12:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRN_N:13:result_uint64x1 [] = { 3333333333333333, } +VQSHRN_N:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHRN_N:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:16:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:17:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:18:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHRN_N (check saturation: shift by 3) overflow output: +VQSHRN_N:24:vqshrn_n_s16 Neon overflow 1 +VQSHRN_N:25:vqshrn_n_s32 Neon overflow 1 +VQSHRN_N:26:vqshrn_n_s64 Neon overflow 1 +VQSHRN_N:27:vqshrn_n_u16 Neon overflow 1 +VQSHRN_N:28:vqshrn_n_u32 Neon overflow 1 +VQSHRN_N:29:vqshrn_n_u64 Neon overflow 1 + +VQSHRN_N (check saturation: shift by 3) output: +VQSHRN_N:30:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHRN_N:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHRN_N:32:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHRN_N:33:result_int64x1 [] = { 3333333333333333, } +VQSHRN_N:34:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRN_N:35:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRN_N:36:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRN_N:37:result_uint64x1 [] = { 3333333333333333, } +VQSHRN_N:38:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHRN_N:39:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:40:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:41:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:42:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:43:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:44:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:45:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:46:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:47:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHRN_N (check saturation: shift by max) overflow output: +VQSHRN_N:48:vqshrn_n_s16 Neon overflow 0 +VQSHRN_N:49:vqshrn_n_s32 Neon overflow 0 +VQSHRN_N:50:vqshrn_n_s64 Neon overflow 0 +VQSHRN_N:51:vqshrn_n_u16 Neon overflow 0 +VQSHRN_N:52:vqshrn_n_u32 Neon overflow 0 +VQSHRN_N:53:vqshrn_n_u64 Neon overflow 0 + +VQSHRN_N (check saturation: shift by max) output: +VQSHRN_N:54:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, } +VQSHRN_N:55:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff, } +VQSHRN_N:56:result_int32x2 [] = { 7fffffff, 7fffffff, } +VQSHRN_N:57:result_int64x1 [] = { 3333333333333333, } +VQSHRN_N:58:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRN_N:59:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRN_N:60:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRN_N:61:result_uint64x1 [] = { 3333333333333333, } +VQSHRN_N:62:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHRN_N:63:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:64:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:65:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:66:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:67:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRN_N:68:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRN_N:69:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRN_N:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRN_N:71:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VPMAX output: +VPMAX:0:result_int8x8 [] = { fffffff1, fffffff3, fffffff5, fffffff7, fffffff1, fffffff3, fffffff5, fffffff7, } +VPMAX:1:result_int16x4 [] = { fffffff1, fffffff3, fffffff1, fffffff3, } +VPMAX:2:result_int32x2 [] = { fffffff1, fffffff1, } +VPMAX:3:result_int64x1 [] = { 3333333333333333, } +VPMAX:4:result_uint8x8 [] = { f1, f3, f5, f7, f1, f3, f5, f7, } +VPMAX:5:result_uint16x4 [] = { fff1, fff3, fff1, fff3, } +VPMAX:6:result_uint32x2 [] = { fffffff1, fffffff1, } +VPMAX:7:result_uint64x1 [] = { 3333333333333333, } +VPMAX:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, } +VPMAX:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMAX:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMAX:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VPMAX:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMAX:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMAX:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMAX:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VPMAX:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VPMIN output: +VPMIN:0:result_int8x8 [] = { fffffff0, fffffff2, fffffff4, fffffff6, fffffff0, fffffff2, fffffff4, fffffff6, } +VPMIN:1:result_int16x4 [] = { fffffff0, fffffff2, fffffff0, fffffff2, } +VPMIN:2:result_int32x2 [] = { fffffff0, fffffff0, } +VPMIN:3:result_int64x1 [] = { 3333333333333333, } +VPMIN:4:result_uint8x8 [] = { f0, f2, f4, f6, f0, f2, f4, f6, } +VPMIN:5:result_uint16x4 [] = { fff0, fff2, fff0, fff2, } +VPMIN:6:result_uint32x2 [] = { fffffff0, fffffff0, } +VPMIN:7:result_uint64x1 [] = { 3333333333333333, } +VPMIN:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, } +VPMIN:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMIN:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMIN:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VPMIN:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VPMIN:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VPMIN:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VPMIN:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VPMIN:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHRUN_N (negative input) overflow output: +VQSHRUN_N:0:vqshrun_n_s16 Neon overflow 1 +VQSHRUN_N:1:vqshrun_n_s32 Neon overflow 1 +VQSHRUN_N:2:vqshrun_n_s64 Neon overflow 1 + +VQSHRUN_N (negative input) output: +VQSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:4:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:5:result_int32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:6:result_int64x1 [] = { 3333333333333333, } +VQSHRUN_N:7:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQSHRUN_N:8:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:9:result_uint32x2 [] = { 0, 0, } +VQSHRUN_N:10:result_uint64x1 [] = { 3333333333333333, } +VQSHRUN_N:11:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHRUN_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:20:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHRUN_N (check saturation/overflow) overflow output: +VQSHRUN_N:21:vqshrun_n_s16 Neon overflow 1 +VQSHRUN_N:22:vqshrun_n_s32 Neon overflow 1 +VQSHRUN_N:23:vqshrun_n_s64 Neon overflow 1 + +VQSHRUN_N (check saturation/overflow) output: +VQSHRUN_N:24:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:25:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:26:result_int32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:27:result_int64x1 [] = { 3333333333333333, } +VQSHRUN_N:28:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQSHRUN_N:29:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQSHRUN_N:30:result_uint32x2 [] = { ffffffff, ffffffff, } +VQSHRUN_N:31:result_uint64x1 [] = { 3333333333333333, } +VQSHRUN_N:32:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHRUN_N:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:34:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:35:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:38:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:39:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:41:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQSHRUN_N overflow output: +VQSHRUN_N:42:vqshrun_n_s16 Neon overflow 0 +VQSHRUN_N:43:vqshrun_n_s32 Neon overflow 1 +VQSHRUN_N:44:vqshrun_n_s64 Neon overflow 0 + +VQSHRUN_N output: +VQSHRUN_N:45:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:46:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQSHRUN_N:47:result_int32x2 [] = { 33333333, 33333333, } +VQSHRUN_N:48:result_int64x1 [] = { 3333333333333333, } +VQSHRUN_N:49:result_uint8x8 [] = { 48, 48, 48, 48, 48, 48, 48, 48, } +VQSHRUN_N:50:result_uint16x4 [] = { 0, 0, 0, 0, } +VQSHRUN_N:51:result_uint32x2 [] = { deadbe, deadbe, } +VQSHRUN_N:52:result_uint64x1 [] = { 3333333333333333, } +VQSHRUN_N:53:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQSHRUN_N:54:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:55:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:56:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:57:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:58:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQSHRUN_N:59:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQSHRUN_N:60:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQSHRUN_N:61:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQSHRUN_N:62:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHRUN_N (negative input) overflow output: +VQRSHRUN_N:0:vqrshrun_n_s16 Neon overflow 0 +VQRSHRUN_N:1:vqrshrun_n_s32 Neon overflow 0 +VQRSHRUN_N:2:vqrshrun_n_s64 Neon overflow 1 + +VQRSHRUN_N (negative input) output: +VQRSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:4:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:5:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:6:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:7:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHRUN_N:8:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:9:result_uint32x2 [] = { 0, 0, } +VQRSHRUN_N:10:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:11:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHRUN_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:20:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHRUN_N (check saturation/overflow: shift by 1) overflow output: +VQRSHRUN_N:21:vqrshrun_n_s16 Neon overflow 1 +VQRSHRUN_N:22:vqrshrun_n_s32 Neon overflow 1 +VQRSHRUN_N:23:vqrshrun_n_s64 Neon overflow 1 + +VQRSHRUN_N (check saturation/overflow: shift by 1) output: +VQRSHRUN_N:24:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:25:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:26:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:27:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:28:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff, } +VQRSHRUN_N:29:result_uint16x4 [] = { ffff, ffff, ffff, ffff, } +VQRSHRUN_N:30:result_uint32x2 [] = { ffffffff, ffffffff, } +VQRSHRUN_N:31:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:32:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHRUN_N:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:34:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:35:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:36:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:38:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:39:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:41:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHRUN_N (check saturation/overflow: shift by max, positive input) overflow output: +VQRSHRUN_N:42:vqrshrun_n_s16 Neon overflow 0 +VQRSHRUN_N:43:vqrshrun_n_s32 Neon overflow 0 +VQRSHRUN_N:44:vqrshrun_n_s64 Neon overflow 0 + +VQRSHRUN_N (check saturation/overflow: shift by max, positive input) output: +VQRSHRUN_N:45:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:46:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:47:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:48:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:49:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80, } +VQRSHRUN_N:50:result_uint16x4 [] = { 8000, 8000, 8000, 8000, } +VQRSHRUN_N:51:result_uint32x2 [] = { 80000000, 80000000, } +VQRSHRUN_N:52:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:53:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHRUN_N:54:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:55:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:56:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:57:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:58:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:59:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:60:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:61:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:62:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHRUN_N (check saturation/overflow: shift by max, negative input) overflow output: +VQRSHRUN_N:63:vqrshrun_n_s16 Neon overflow 1 +VQRSHRUN_N:64:vqrshrun_n_s32 Neon overflow 1 +VQRSHRUN_N:65:vqrshrun_n_s64 Neon overflow 1 + +VQRSHRUN_N (check saturation/overflow: shift by max, negative input) output: +VQRSHRUN_N:66:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:67:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:68:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:69:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:70:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VQRSHRUN_N:71:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:72:result_uint32x2 [] = { 0, 0, } +VQRSHRUN_N:73:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:74:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHRUN_N:75:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:76:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:77:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:78:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:79:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:80:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:81:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:82:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:83:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VQRSHRUN_N overflow output: +VQRSHRUN_N:84:vqrshrun_n_s16 Neon overflow 0 +VQRSHRUN_N:85:vqrshrun_n_s32 Neon overflow 1 +VQRSHRUN_N:86:vqrshrun_n_s64 Neon overflow 0 + +VQRSHRUN_N output: +VQRSHRUN_N:87:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:88:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VQRSHRUN_N:89:result_int32x2 [] = { 33333333, 33333333, } +VQRSHRUN_N:90:result_int64x1 [] = { 3333333333333333, } +VQRSHRUN_N:91:result_uint8x8 [] = { 49, 49, 49, 49, 49, 49, 49, 49, } +VQRSHRUN_N:92:result_uint16x4 [] = { 0, 0, 0, 0, } +VQRSHRUN_N:93:result_uint32x2 [] = { deadbf, deadbf, } +VQRSHRUN_N:94:result_uint64x1 [] = { 3333333333333333, } +VQRSHRUN_N:95:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VQRSHRUN_N:96:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:97:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:98:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:99:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:100:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VQRSHRUN_N:101:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VQRSHRUN_N:102:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VQRSHRUN_N:103:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VQRSHRUN_N:104:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VST2_LANE/VST2Q_LANE chunk 0 output: +VST2_LANE/VST2Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, 0, 0, } +VST2_LANE/VST2Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VST2_LANE/VST2Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:4:result_uint8x8 [] = { f0, f1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:5:result_uint16x4 [] = { fff0, fff1, 0, 0, } +VST2_LANE/VST2Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VST2_LANE/VST2Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VST2_LANE/VST2Q_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:10:result_int16x8 [] = { fffffff0, fffffff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:11:result_int32x4 [] = { fffffff0, fffffff1, 0, 0, } +VST2_LANE/VST2Q_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:14:result_uint16x8 [] = { fff0, fff1, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:15:result_uint32x4 [] = { fffffff0, fffffff1, 0, 0, } +VST2_LANE/VST2Q_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, 0 0x0p+0 0, 0 0x0p+0 0, } + +VST2_LANE/VST2Q_LANE chunk 1 output: +VST2_LANE/VST2Q_LANE:18:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:19:result_int16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:20:result_int32x2 [] = { 0, 0, } +VST2_LANE/VST2Q_LANE:21:result_int64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:22:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:23:result_uint16x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:24:result_uint32x2 [] = { 0, 0, } +VST2_LANE/VST2Q_LANE:25:result_uint64x1 [] = { 3333333333333333, } +VST2_LANE/VST2Q_LANE:26:result_float32x2 [] = { 0 0x0p+0 0, 0 0x0p+0 0, } +VST2_LANE/VST2Q_LANE:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:29:result_int32x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST2_LANE/VST2Q_LANE:32:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:33:result_uint32x4 [] = { 0, 0, 0, 0, } +VST2_LANE/VST2Q_LANE:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST2_LANE/VST2Q_LANE:35:result_float32x4 [] = { 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, } + +VST3_LANE/VST3Q_LANE chunk 0 output: +VST3_LANE/VST3Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, 0, } +VST3_LANE/VST3Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VST3_LANE/VST3Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:4:result_uint8x8 [] = { f0, f1, f2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:5:result_uint16x4 [] = { fff0, fff1, fff2, 0, } +VST3_LANE/VST3Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VST3_LANE/VST3Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VST3_LANE/VST3Q_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, 0, } +VST3_LANE/VST3Q_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:14:result_uint16x8 [] = { fff0, fff1, fff2, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, 0, } +VST3_LANE/VST3Q_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, 0 0x0p+0 0, } + +VST3_LANE/VST3Q_LANE chunk 1 output: +VST3_LANE/VST3Q_LANE:18:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:19:result_int16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:20:result_int32x2 [] = { fffffff2, 0, } +VST3_LANE/VST3Q_LANE:21:result_int64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:22:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:23:result_uint16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:24:result_uint32x2 [] = { fffffff2, 0, } +VST3_LANE/VST3Q_LANE:25:result_uint64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, 0 0x0p+0 0, } +VST3_LANE/VST3Q_LANE:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:29:result_int32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:32:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:33:result_uint32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:35:result_float32x4 [] = { 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, } + +VST3_LANE/VST3Q_LANE chunk 2 output: +VST3_LANE/VST3Q_LANE:36:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:37:result_int16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:38:result_int32x2 [] = { 0, 0, } +VST3_LANE/VST3Q_LANE:39:result_int64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:40:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:41:result_uint16x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:42:result_uint32x2 [] = { 0, 0, } +VST3_LANE/VST3Q_LANE:43:result_uint64x1 [] = { 3333333333333333, } +VST3_LANE/VST3Q_LANE:44:result_float32x2 [] = { 0 0x0p+0 0, 0 0x0p+0 0, } +VST3_LANE/VST3Q_LANE:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:46:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:47:result_int32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST3_LANE/VST3Q_LANE:50:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:51:result_uint32x4 [] = { 0, 0, 0, 0, } +VST3_LANE/VST3Q_LANE:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST3_LANE/VST3Q_LANE:53:result_float32x4 [] = { 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, } + +VST4_LANE/VST4Q_LANE chunk 0 output: +VST4_LANE/VST4Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1, } +VST4_LANE/VST4Q_LANE:3:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:4:result_uint8x8 [] = { f0, f1, f2, f3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VST4_LANE/VST4Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VST4_LANE/VST4Q_LANE:7:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VST4_LANE/VST4Q_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } + +VST4_LANE/VST4Q_LANE chunk 1 output: +VST4_LANE/VST4Q_LANE:18:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:19:result_int16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:20:result_int32x2 [] = { fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:21:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:22:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:23:result_uint16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:24:result_uint32x2 [] = { fffffff2, fffffff3, } +VST4_LANE/VST4Q_LANE:25:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } +VST4_LANE/VST4Q_LANE:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:29:result_int32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:32:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:33:result_uint32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:35:result_float32x4 [] = { 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, } + +VST4_LANE/VST4Q_LANE chunk 2 output: +VST4_LANE/VST4Q_LANE:36:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:37:result_int16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:38:result_int32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:39:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:40:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:41:result_uint16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:42:result_uint32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:43:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:44:result_float32x2 [] = { 0 0x0p+0 0, 0 0x0p+0 0, } +VST4_LANE/VST4Q_LANE:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:46:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:47:result_int32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:50:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:51:result_uint32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:53:result_float32x4 [] = { 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, } + +VST4_LANE/VST4Q_LANE chunk 3 output: +VST4_LANE/VST4Q_LANE:54:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:55:result_int16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:56:result_int32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:57:result_int64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:58:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:59:result_uint16x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:60:result_uint32x2 [] = { 0, 0, } +VST4_LANE/VST4Q_LANE:61:result_uint64x1 [] = { 3333333333333333, } +VST4_LANE/VST4Q_LANE:62:result_float32x2 [] = { 0 0x0p+0 0, 0 0x0p+0 0, } +VST4_LANE/VST4Q_LANE:63:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:64:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:65:result_int32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:66:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:67:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VST4_LANE/VST4Q_LANE:68:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:69:result_uint32x4 [] = { 0, 0, 0, 0, } +VST4_LANE/VST4Q_LANE:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VST4_LANE/VST4Q_LANE:71:result_float32x4 [] = { 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, } + +VTBL1 output: +VTBL1:0:result_int8x8 [] = { 0, fffffff2, fffffff2, fffffff2, 0, 0, fffffff2, fffffff2, } +VTBL1:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL1:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL1:3:result_int64x1 [] = { 3333333333333333, } +VTBL1:4:result_uint8x8 [] = { 0, f3, f3, f3, 0, 0, f3, f3, } +VTBL1:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL1:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL1:7:result_uint64x1 [] = { 3333333333333333, } +VTBL1:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VTBL1:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL1:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL1:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL1:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL1:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL1:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL1:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL1:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL1:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VTBL2 output: +VTBL2:0:result_int8x8 [] = { fffffff6, fffffff3, fffffff3, fffffff3, 0, 0, fffffff3, fffffff3, } +VTBL2:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL2:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL2:3:result_int64x1 [] = { 3333333333333333, } +VTBL2:4:result_uint8x8 [] = { f6, f5, f5, f5, 0, 0, f5, f5, } +VTBL2:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL2:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL2:7:result_uint64x1 [] = { 3333333333333333, } +VTBL2:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VTBL2:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL2:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL2:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL2:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL2:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL2:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL2:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL2:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL2:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VTBL3 output: +VTBL3:0:result_int8x8 [] = { fffffff8, fffffff4, fffffff4, fffffff4, ffffffff, 0, fffffff4, fffffff4, } +VTBL3:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL3:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL3:3:result_int64x1 [] = { 3333333333333333, } +VTBL3:4:result_uint8x8 [] = { f8, f7, f7, f7, ff, 0, f7, f7, } +VTBL3:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL3:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL3:7:result_uint64x1 [] = { 3333333333333333, } +VTBL3:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VTBL3:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL3:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL3:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL3:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL3:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL3:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL3:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL3:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL3:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VTBL4 output: +VTBL4:0:result_int8x8 [] = { fffffffa, fffffff5, fffffff5, fffffff5, 3, 0, fffffff5, fffffff5, } +VTBL4:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL4:2:result_int32x2 [] = { 33333333, 33333333, } +VTBL4:3:result_int64x1 [] = { 3333333333333333, } +VTBL4:4:result_uint8x8 [] = { fa, f9, f9, f9, 3, 0, f9, f9, } +VTBL4:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBL4:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBL4:7:result_uint64x1 [] = { 3333333333333333, } +VTBL4:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VTBL4:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL4:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL4:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL4:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL4:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBL4:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBL4:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBL4:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBL4:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VTBX1 output: +VTBX1:0:result_int8x8 [] = { 33, fffffff2, fffffff2, fffffff2, 33, 33, fffffff2, fffffff2, } +VTBX1:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX1:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX1:3:result_int64x1 [] = { 3333333333333333, } +VTBX1:4:result_uint8x8 [] = { cc, f3, f3, f3, cc, cc, f3, f3, } +VTBX1:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX1:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX1:7:result_uint64x1 [] = { 3333333333333333, } +VTBX1:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VTBX1:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX1:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX1:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX1:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX1:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX1:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX1:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX1:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX1:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VTBX2 output: +VTBX2:0:result_int8x8 [] = { fffffff6, fffffff3, fffffff3, fffffff3, 33, 33, fffffff3, fffffff3, } +VTBX2:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX2:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX2:3:result_int64x1 [] = { 3333333333333333, } +VTBX2:4:result_uint8x8 [] = { f6, f5, f5, f5, cc, cc, f5, f5, } +VTBX2:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX2:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX2:7:result_uint64x1 [] = { 3333333333333333, } +VTBX2:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VTBX2:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX2:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX2:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX2:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX2:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX2:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX2:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX2:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX2:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VTBX3 output: +VTBX3:0:result_int8x8 [] = { fffffff8, fffffff4, fffffff4, fffffff4, ffffffff, 33, fffffff4, fffffff4, } +VTBX3:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX3:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX3:3:result_int64x1 [] = { 3333333333333333, } +VTBX3:4:result_uint8x8 [] = { f8, f7, f7, f7, ff, cc, f7, f7, } +VTBX3:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX3:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX3:7:result_uint64x1 [] = { 3333333333333333, } +VTBX3:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VTBX3:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX3:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX3:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX3:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX3:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX3:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX3:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX3:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX3:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VTBX4 output: +VTBX4:0:result_int8x8 [] = { fffffffa, fffffff5, fffffff5, fffffff5, 3, 33, fffffff5, fffffff5, } +VTBX4:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX4:2:result_int32x2 [] = { 33333333, 33333333, } +VTBX4:3:result_int64x1 [] = { 3333333333333333, } +VTBX4:4:result_uint8x8 [] = { fa, f9, f9, f9, 3, cc, f9, f9, } +VTBX4:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333, } +VTBX4:6:result_uint32x2 [] = { 33333333, 33333333, } +VTBX4:7:result_uint64x1 [] = { 3333333333333333, } +VTBX4:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VTBX4:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX4:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX4:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX4:12:result_int64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX4:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, } +VTBX4:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333, } +VTBX4:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, } +VTBX4:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, } +VTBX4:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + +VRECPE/VRECPEQ (positive input) output: +VRECPE/VRECPEQ:0:result_uint32x2 [] = { ffffffff, ffffffff, } +VRECPE/VRECPEQ:1:result_uint32x4 [] = { bf000000, bf000000, bf000000, bf000000, } +VRECPE/VRECPEQ:2:result_float32x2 [] = { 3f068000 0x1.0dp-1 0.525391, 3f068000 0x1.0dp-1 0.525391, } +VRECPE/VRECPEQ:3:result_float32x4 [] = { 3c030000 0x1.06p-7 0.00799561, 3c030000 0x1.06p-7 0.00799561, 3c030000 0x1.06p-7 0.00799561, 3c030000 0x1.06p-7 0.00799561, } + +VRECPE/VRECPEQ (negative input) output: +VRECPE/VRECPEQ:4:result_uint32x2 [] = { 80000000, 80000000, } +VRECPE/VRECPEQ:5:result_uint32x4 [] = { ee800000, ee800000, ee800000, ee800000, } +VRECPE/VRECPEQ:6:result_float32x2 [] = { bdcc8000 -0x1.99p-4 -0.0998535, bdcc8000 -0x1.99p-4 -0.0998535, } +VRECPE/VRECPEQ:7:result_float32x4 [] = { bc030000 -0x1.06p-7 -0.00799561, bc030000 -0x1.06p-7 -0.00799561, bc030000 -0x1.06p-7 -0.00799561, bc030000 -0x1.06p-7 -0.00799561, } + +VRSQRTE/VRSQRTEQ output: +VRSQRTE/VRSQRTEQ:0:result_uint32x2 [] = { ffffffff, ffffffff, } +VRSQRTE/VRSQRTEQ:1:result_uint32x4 [] = { 9c800000, 9c800000, 9c800000, 9c800000, } +VRSQRTE/VRSQRTEQ:2:result_float32x2 [] = { 3e8e8000 0x1.1dp-2 0.27832, 3e8e8000 0x1.1dp-2 0.27832, } +VRSQRTE/VRSQRTEQ:3:result_float32x4 [] = { 3e700000 0x1.ep-3 0.234375, 3e700000 0x1.ep-3 0.234375, 3e700000 0x1.ep-3 0.234375, 3e700000 0x1.ep-3 0.234375, } + +VRSQRTE/VRSQRTEQ (2) output: +VRSQRTE/VRSQRTEQ:4:result_uint32x2 [] = { 80000000, 80000000, } +VRSQRTE/VRSQRTEQ:5:result_uint32x4 [] = { ae800000, ae800000, ae800000, ae800000, } + +VRSQRTE/VRSQRTEQ (3) output: +VRSQRTE/VRSQRTEQ:6:result_uint32x2 [] = { b4800000, b4800000, } +VRSQRTE/VRSQRTEQ:7:result_uint32x4 [] = { ed000000, ed000000, ed000000, ed000000, } + + +Integer (non-NEON) intrinsics +__clz(0xffffffff) = 0 +__clz(0x7fffffff) = 1 +__clz(0x3fffffff) = 2 +__clz(0x1fffffff) = 3 +__clz(0xfffffff) = 4 +__clz(0x7ffffff) = 5 +__clz(0x3ffffff) = 6 +__clz(0x1ffffff) = 7 +__clz(0xffffff) = 8 +__clz(0x7fffff) = 9 +__clz(0x3fffff) = 10 +__clz(0x1fffff) = 11 +__clz(0xfffff) = 12 +__clz(0x7ffff) = 13 +__clz(0x3ffff) = 14 +__clz(0x1ffff) = 15 +__clz(0xffff) = 16 +__clz(0x7fff) = 17 +__clz(0x3fff) = 18 +__clz(0x1fff) = 19 +__clz(0xfff) = 20 +__clz(0x7ff) = 21 +__clz(0x3ff) = 22 +__clz(0x1ff) = 23 +__clz(0xff) = 24 +__clz(0x7f) = 25 +__clz(0x3f) = 26 +__clz(0x1f) = 27 +__clz(0xf) = 28 +__clz(0x7) = 29 +__clz(0x3) = 30 +__clz(0x1) = 31 +__clz(0) = 32 +__qadd(0x1, 0x2) = 0x3 sat 0 +__qadd(0xffffffff, 0xfffffffe) = 0xfffffffd sat 0 +__qadd(0xffffffff, 0x2) = 0x1 sat 0 +__qadd(0x7000, 0x7000) = 0xe000 sat 0 +__qadd(0x8fff, 0x8fff) = 0x11ffe sat 0 +__qadd(0x70000000, 0x70000000) = 0x7fffffff sat 1 +__qadd(0x8fffffff, 0x8fffffff) = 0x80000000 sat 1 +__qdbl(0x1) = 0x2 sat 0 +__qdbl(0x70000000) = 0x7fffffff sat 1 +__qdbl(0x8fffffff) = 0x80000000 sat 1 +__qdbl(0xefffffff) = 0xdffffffe sat 0 +__qsub(0x1, 0x2) = 0xffffffff sat 0 +__qsub(0xffffffff, 0xfffffffe) = 0x1 sat 0 +__qsub(0xffffffff, 0x2) = 0xfffffffd sat 0 +__qsub(0x7000, 0xffff9000) = 0xe000 sat 0 +__qsub(0x8fff, 0xffff7001) = 0x11ffe sat 0 +__qsub(0x70000000, 0x90000000) = 0x7fffffff sat 1 +__qsub(0x8fffffff, 0x70000001) = 0x80000000 sat 1 +__qsub(0, 0x80000000) = 0x7fffffff sat 1 +__rbit(0x12345678) = 0x1e6a2c48 +__rev(0x12345678) = 0x78563412 +__ssat(0x12345678, 30) = 0x12345678 sat 0 +__ssat(0x12345678, 19) = 0x3ffff sat 1 +__ssat(0x87654321, 29) = 0xf0000000 sat 1 +__ssat(0x87654321, 12) = 0xfffff800 sat 1 +__ssat(0x87654321, 32) = 0x87654321 sat 0 +__ssat(0x87654321, 1) = 0xffffffff sat 1 +__usat(0x12345678, 30) = 0x12345678 sat 0 +__usat(0x12345678, 19) = 0x7ffff sat 1 +__usat(0x87654321, 29) = 0 sat 1 +__usat(0x87654321, 12) = 0 sat 1 +__usat(0x87654321, 31) = 0 sat 1 +__usat(0x87654321, 0) = 0 sat 1 + +VCAGE/VCAGEQ output: +VCAGE/VCAGEQ:0:result_uint32x2 [] = { ffffffff, 0, } +VCAGE/VCAGEQ:1:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0, } + +VCAGE/VCAGEQ output: +VCAGE/VCAGEQ:2:result_uint32x2 [] = { ffffffff, ffffffff, } +VCAGE/VCAGEQ:3:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } + +VCALE/VCALEQ output: +VCALE/VCALEQ:0:result_uint32x2 [] = { ffffffff, ffffffff, } +VCALE/VCALEQ:1:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff, } + +VCALE/VCALEQ output: +VCALE/VCALEQ:2:result_uint32x2 [] = { 0, 0, } +VCALE/VCALEQ:3:result_uint32x4 [] = { 0, 0, 0, 0, } + +VCAGT/VCAGTQ output: +VCAGT/VCAGTQ:0:result_uint32x2 [] = { 0, 0, } +VCAGT/VCAGTQ:1:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0, } + +VCAGT/VCAGTQ output: +VCAGT/VCAGTQ:2:result_uint32x2 [] = { ffffffff, ffffffff, } +VCAGT/VCAGTQ:3:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } + +VCALT/VCALTQ output: +VCALT/VCALTQ:0:result_uint32x2 [] = { 0, ffffffff, } +VCALT/VCALTQ:1:result_uint32x4 [] = { 0, 0, 0, ffffffff, } + +VCALT/VCALTQ output: +VCALT/VCALTQ:2:result_uint32x2 [] = { 0, 0, } +VCALT/VCALTQ:3:result_uint32x4 [] = { 0, 0, 0, 0, } + +VCVT/VCVTQ output: +VCVT/VCVTQ:0:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, } +VCVT/VCVTQ:1:result_float32x2 [] = { 4f800000 0x1p+32 4.29497e+09, 4f800000 0x1p+32 4.29497e+09, } +VCVT/VCVTQ:2:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, } +VCVT/VCVTQ:3:result_float32x4 [] = { 4f800000 0x1p+32 4.29497e+09, 4f800000 0x1p+32 4.29497e+09, 4f800000 0x1p+32 4.29497e+09, 4f800000 0x1p+32 4.29497e+09, } +VCVT/VCVTQ:4:result_int32x2 [] = { fffffff1, 5, } +VCVT/VCVTQ:5:result_uint32x2 [] = { 0, 5, } +VCVT/VCVTQ:6:result_int32x4 [] = { fffffff0, fffffff1, fffffff1, 5, } +VCVT/VCVTQ:7:result_uint32x4 [] = { 0, 0, 0, 5, } + +VCVT_N/VCVTQ_N output: +VCVT_N/VCVTQ_N:8:result_float32x2 [] = { c0800000 -0x1p+2 -4, c0700000 -0x1.ep+1 -3.75, } +VCVT_N/VCVTQ_N:9:result_float32x2 [] = { 4c000000 0x1p+25 3.35544e+07, 4c000000 0x1p+25 3.35544e+07, } +VCVT_N/VCVTQ_N:10:result_float32x4 [] = { b2800000 -0x1p-26 -1.49012e-08, b2700000 -0x1.ep-27 -1.39698e-08, b2600000 -0x1.cp-27 -1.30385e-08, b2500000 -0x1.ap-27 -1.21072e-08, } +VCVT_N/VCVTQ_N:11:result_float32x4 [] = { 49800000 0x1p+20 1.04858e+06, 49800000 0x1p+20 1.04858e+06, 49800000 0x1p+20 1.04858e+06, 49800000 0x1p+20 1.04858e+06, } +VCVT_N/VCVTQ_N:12:result_int32x2 [] = { ff0b3333, 54cccd, } +VCVT_N/VCVTQ_N:13:result_uint32x2 [] = { 0, 15, } +VCVT_N/VCVTQ_N:14:result_int32x4 [] = { fffe0000, fffe2000, fffe1667, a999, } +VCVT_N/VCVTQ_N:15:result_uint32x4 [] = { 0, 0, 0, a, } + +VCVT/VCVTQ (check rounding) output: +VCVT/VCVTQ:16:result_int32x2 [] = { a, a, } +VCVT/VCVTQ:17:result_uint32x2 [] = { a, a, } +VCVT/VCVTQ:18:result_int32x4 [] = { 7d, 7d, 7d, 7d, } +VCVT/VCVTQ:19:result_uint32x4 [] = { 7d, 7d, 7d, 7d, } + +VCVT_N/VCVTQ_N (check rounding) output: +VCVT_N/VCVTQ_N:20:result_int32x2 [] = { a66666, a66666, } +VCVT_N/VCVTQ_N:21:result_uint32x2 [] = { a66666, a66666, } +VCVT_N/VCVTQ_N:22:result_int32x4 [] = { fbccc, fbccc, fbccc, fbccc, } +VCVT_N/VCVTQ_N:23:result_uint32x4 [] = { fbccc, fbccc, fbccc, fbccc, } + +VCVT_N/VCVTQ_N (check saturation) output: +VCVT_N/VCVTQ_N:24:result_int32x2 [] = { 7fffffff, 7fffffff, } +VCVT_N/VCVTQ_N:25:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff, } + +VRECPS/VRECPSQ output: +VRECPS/VRECPSQ:0:result_float32x2 [] = { c2e19eb7 -0x1.c33d6ep+6 -112.81, c2e19eb7 -0x1.c33d6ep+6 -112.81, } +VRECPS/VRECPSQ:1:result_float32x4 [] = { c1db851f -0x1.b70a3ep+4 -27.44, c1db851f -0x1.b70a3ep+4 -27.44, c1db851f -0x1.b70a3ep+4 -27.44, c1db851f -0x1.b70a3ep+4 -27.44, } + +VRSQRTS/VRSQRTSQ output: +VRSQRTS/VRSQRTSQ:0:result_float32x2 [] = { c2796b84 -0x1.f2d708p+5 -62.355, c2796b84 -0x1.f2d708p+5 -62.355, } +VRSQRTS/VRSQRTSQ:1:result_float32x4 [] = { c0e4a3d8 -0x1.c947bp+2 -7.145, c0e4a3d8 -0x1.c947bp+2 -7.145, c0e4a3d8 -0x1.c947bp+2 -7.145, c0e4a3d8 -0x1.c947bp+2 -7.145, } + + +DSP (non-NEON) intrinsics +qadd(0x1, 0x2) = 0x3 sat 0 +qadd(0xffffffff, 0xfffffffe) = 0xfffffffd sat 0 +qadd(0xffffffff, 0x2) = 0x1 sat 0 +qadd(0x7000, 0x7000) = 0xe000 sat 0 +qadd(0x8fff, 0x8fff) = 0x11ffe sat 0 +qadd(0x70000000, 0x70000000) = 0x7fffffff sat 1 +qadd(0x8fffffff, 0x8fffffff) = 0x80000000 sat 1 +qsub(0x1, 0x2) = 0xffffffff sat 0 +qsub(0xffffffff, 0xfffffffe) = 0x1 sat 0 +qsub(0xffffffff, 0x2) = 0xfffffffd sat 0 +qsub(0x7000, 0xffff9000) = 0xe000 sat 0 +qsub(0x8fff, 0xffff7001) = 0x11ffe sat 0 +qsub(0x70000000, 0x90000000) = 0x7fffffff sat 1 +qsub(0x8fffffff, 0x70000001) = 0x80000000 sat 1 +qsub(0, 0x80000000) = 0x7fffffff sat 1 +qdadd(0x1, 0x2) = 0x5 sat 0 +qdadd(0xffffffff, 0xfffffffe) = 0xfffffffb sat 0 +qdadd(0xffffffff, 0x2) = 0x3 sat 0 +qdadd(0x7000, 0x7000) = 0x15000 sat 0 +qdadd(0x8fff, 0x8fff) = 0x1affd sat 0 +qdadd(0x70000000, 0x70000000) = 0x7fffffff sat 1 +qdadd(0, 0x70000000) = 0x7fffffff sat 1 +qdadd(0x8fffffff, 0x8fffffff) = 0x80000000 sat 1 +qdadd(0, 0x8fffffff) = 0x80000000 sat 1 +qdsub(0x1, 0x2) = 0xfffffffd sat 0 +qdsub(0xffffffff, 0xfffffffe) = 0x3 sat 0 +qdsub(0xffffffff, 0x2) = 0xfffffffb sat 0 +qdsub(0x7000, 0xffff9000) = 0x15000 sat 0 +qdsub(0x8fff, 0xffff7001) = 0x1affd sat 0 +qdsub(0x70000000, 0x90000000) = 0x7fffffff sat 1 +qdsub(0, 0x90000000) = 0x7fffffff sat 1 +qdsub(0x8fffffff, 0x70000001) = 0x80000000 sat 1 +qdsub(0, 0x70000001) = 0x80000001 sat 1 +smulbb(0x12345678, 0x12345678) = 0x1d34d840 +smulbt(0x12345678, 0x12345678) = 0x6260060 +smultb(0x12345678, 0x12345678) = 0x6260060 +smultt(0x12345678, 0x12345678) = 0x14b5a90 +smulbb(0xf123f456, 0xf123f456) = 0x880ce4 +smulbt(0xf123f456, 0xf123f456) = 0xad5dc2 +smultb(0xf123f456, 0xf123f456) = 0xad5dc2 +smultt(0xf123f456, 0xf123f456) = 0xdceac9 +smlabb(0x12345678, 0x12345678, 0x1020304) = 0x1e36db44 +smlabt(0x12345678, 0x12345678, 0x1020304) = 0x7280364 +smlatb(0x12345678, 0x12345678, 0x1020304) = 0x7280364 +smlatt(0x12345678, 0x12345678, 0x1020304) = 0x24d5d94 +smlabb(0xf123f456, 0xf123f456, 0x1020304) = 0x18a0fe8 +smlabt(0xf123f456, 0xf123f456, 0x1020304) = 0x1af60c6 +smlatb(0xf123f456, 0xf123f456, 0x1020304) = 0x1af60c6 +smlatt(0xf123f456, 0xf123f456, 0x1020304) = 0x1deedcd +smlalbb(&0x9abcdef0, &0x12345678, 0x12345678, 0x12345678) = 0x123456780xb7f1b730 +smlalbt(&0x9abcdef0, &0x12345678, 0x12345678, 0x12345678) = 0x123456780xa0e2df50 +smlaltb(&0x9abcdef0, &0x12345678, 0x12345678, 0x12345678) = 0x123456780xa0e2df50 +smlaltt(&0x9abcdef0, &0x12345678, 0x12345678, 0x12345678) = 0x123456780x9c083980 +smlalbb(&0x9abcdef0, &0x12345678, 0xf123f456, 0xf123f456) = 0x123456780x9b44ebd4 +smlalbt(&0x9abcdef0, &0x12345678, 0xf123f456, 0xf123f456) = 0x123456780x9b6a3cb2 +smlaltb(&0x9abcdef0, &0x12345678, 0xf123f456, 0xf123f456) = 0x123456780x9b6a3cb2 +smlaltt(&0x9abcdef0, &0x12345678, 0xf123f456, 0xf123f456) = 0x123456780x9b99c9b9 +smlalbb(&0xffffffff, &0x12345678, 0x7fff7fff, 0x7fff7fff) = 0x123456790x3fff0000 +smlalbt(&0xffffffff, &0x12345678, 0x7fff7fff, 0x7fff7fff) = 0x123456790x3fff0000 +smlaltb(&0xffffffff, &0x12345678, 0x7fff7fff, 0x7fff7fff) = 0x123456790x3fff0000 +smlaltt(&0xffffffff, &0x12345678, 0x7fff7fff, 0x7fff7fff) = 0x123456790x3fff0000 +smulwb(0x12345678, 0x12345678) = 0x6261d94 +smulwt(0x12345678, 0x12345678) = 0x14b60b6 +smulwb(0xf123f456, 0xf123f456) = 0xad52a0 +smulwt(0xf123f456, 0xf123f456) = 0xdcdc99 +smlawb(0x12345678, 0x12345678, 0x1020304) = 0x7282098 +smlawt(0x12345678, 0x12345678, 0x1020304) = 0x24d63ba +smlawb(0xf123f456, 0xf123f456, 0x1020304) = 0x1af55a4 +smlawt(0xf123f456, 0xf123f456, 0X1020304) = 0x1dedf9d + + +DSP FNS (non-NEON/ITU) intrinsics with input Overflow=0 and input Carry=0 +Checking saturate with input Overflow=0 and input Carry=0 +saturate(0x1) = 0x1 overflow 0 carry 0 +saturate(0xffffffff) = 0xffffffff overflow 0 carry 0 +saturate(0x8000) = 0x7fff overflow 1 carry 0 +saturate(0xffff8000) = 0xffff8000 overflow 0 carry 0 +saturate(0xffff7fff) = 0xffff8000 overflow 1 carry 0 +add(0x1, 0x1) = 0x2 overflow 0 carry 0 +add(0xffffffff, 0xffffffff) = 0xfffffffe overflow 0 carry 0 +add(0x4e20, 0x4e20) = 0x7fff overflow 1 carry 0 +add(0xffffb1e0, 0xffffb1e0) = 0xffff8000 overflow 1 carry 0 +sub(0x1, 0x1) = 0 overflow 0 carry 0 +sub(0xffffffff, 0xffffffff) = 0 overflow 0 carry 0 +sub(0x4e20, 0x4e20) = 0 overflow 0 carry 0 +sub(0xffffb1e0, 0xffffb1e0) = 0 overflow 0 carry 0 +sub(0, 0xffff8000) = 0x7fff overflow 1 carry 0 +abs_s(0x1) = 0x1 overflow 0 carry 0 +abs_s(0xffffffff) = 0x1 overflow 0 carry 0 +abs_s(0xffff8000) = 0x7fff overflow 0 carry 0 +shl(0x1, 1) = 0x2 overflow 0 carry 0 +shl(0xa, 1) = 0x14 overflow 0 carry 0 +shl(0xfff, 10) = 0x7fff overflow 1 carry 0 +shl(0xfff, 20) = 0x7fff overflow 1 carry 0 +shl(0x1, -1) = 0 overflow 0 carry 0 +shl(0x14, -1) = 0xa overflow 0 carry 0 +shl(0xfff, -10) = 0x3 overflow 0 carry 0 +shl(0xfff, -64) = 0 overflow 0 carry 0 +shr(0x1, -1) = 0x2 overflow 0 carry 0 +shr(0xa, -1) = 0x14 overflow 0 carry 0 +shr(0xfff, -10) = 0x7fff overflow 1 carry 0 +shr(0xfff, -20) = 0x7fff overflow 1 carry 0 +shr(0x1, 1) = 0 overflow 0 carry 0 +shr(0x14, 1) = 0xa overflow 0 carry 0 +shr(0xfff, 10) = 0x3 overflow 0 carry 0 +shr(0xfff, 64) = 0 overflow 0 carry 0 +mult(0x2, 0x2) = 0 overflow 0 carry 0 +mult(0xffffffff, 0xffffffff) = 0 overflow 0 carry 0 +mult(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +L_mult(0x2, 0x2) = 0x8 overflow 0 carry 0 +L_mult(0xffffffff, 0xffffffff) = 0x2 overflow 0 carry 0 +L_mult(0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 0 +negate(0x1) = 0xffffffff overflow 0 carry 0 +negate(0xffffffff) = 0x1 overflow 0 carry 0 +negate(0xffff8000) = 0x7fff overflow 0 carry 0 +extract_h(0x1) = 0 overflow 0 carry 0 +extract_h(0xffffffff) = 0xffffffff overflow 0 carry 0 +extract_h(0xffff8000) = 0xffffffff overflow 0 carry 0 +extract_h(0x12345678) = 0x1234 overflow 0 carry 0 +extract_l(0x1) = 0x1 overflow 0 carry 0 +extract_l(0xffffffff) = 0xffffffff overflow 0 carry 0 +extract_l(0xffff8000) = 0xffff8000 overflow 0 carry 0 +extract_l(0x43218765) = 0xffff8765 overflow 0 carry 0 +round(0x1) = 0 overflow 0 carry 0 +round(0xffffffff) = 0 overflow 0 carry 0 +round(0xffff8000) = 0 overflow 0 carry 0 +round(0x43218765) = 0x4322 overflow 0 carry 0 +round(0x87654321) = 0xffff8765 overflow 0 carry 0 +L_mac(0x1234, 0x2, 0x2) = 0x123c overflow 0 carry 0 +L_mac(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 0 carry 0 +L_mac(0x1234, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 0 +L_mac(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 0 +L_msu(0x1234, 0x2, 0x2) = 0x122c overflow 0 carry 0 +L_msu(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 0 carry 0 +L_msu(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 0 +L_msu(0x1, 0xffff8000, 0xffff8000) = 0x80000002 overflow 1 carry 0 +L_add(0x1, 0x2) = 0x3 overflow 0 carry 0 +L_add(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 0 carry 0 +L_add(0xffffffff, 0x2) = 0x1 overflow 0 carry 0 +L_add(0x7000, 0x7000) = 0xe000 overflow 0 carry 0 +L_add(0x8fff, 0x8fff) = 0x11ffe overflow 0 carry 0 +L_add(0x70000000, 0x70000000) = 0x7fffffff overflow 1 carry 0 +L_add(0x8fffffff, 0x8fffffff) = 0x80000000 overflow 1 carry 0 +L_sub(0x1, 0x2) = 0xffffffff overflow 0 carry 0 +L_sub(0xffffffff, 0xfffffffe) = 0x1 overflow 0 carry 0 +L_sub(0xffffffff, 0x2) = 0xfffffffd overflow 0 carry 0 +L_sub(0x7000, 0xffff9000) = 0xe000 overflow 0 carry 0 +L_sub(0x8fff, 0xffff7001) = 0x11ffe overflow 0 carry 0 +L_sub(0x70000000, 0x90000000) = 0x7fffffff overflow 1 carry 0 +L_sub(0x8fffffff, 0x70000001) = 0x80000000 overflow 1 carry 0 +L_sub(0, 0x80000000) = 0x7fffffff overflow 1 carry 0 +Checking L_add_c with input Overflow=0 and input Carry=0 +L_add_c(0x1, 0x2) = 0x3 overflow 0 carry 0 +L_add_c(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 0 carry 1 +L_add_c(0xffffffff, 0x2) = 0x1 overflow 0 carry 1 +L_add_c(0x7000, 0x7000) = 0xe000 overflow 0 carry 0 +L_add_c(0x8fff, 0x8fff) = 0x11ffe overflow 0 carry 0 +L_add_c(0x70000000, 0x70000000) = 0xe0000000 overflow 1 carry 0 +L_add_c(0x8fffffff, 0x8fffffff) = 0x1ffffffe overflow 1 carry 1 +L_add_c(0x8fffffff, 0xffffffff) = 0x8ffffffe overflow 0 carry 1 +L_sub_c(0x1, 0x2) = 0xfffffffe overflow 0 carry 0 +L_sub_c(0xffffffff, 0xfffffffe) = 0 overflow 0 carry 1 +L_sub_c(0xffffffff, 0x2) = 0xfffffffc overflow 0 carry 1 +L_sub_c(0x7000, 0x7000) = 0xffffffff overflow 0 carry 0 +L_sub_c(0x8fff, 0x8fff) = 0xffffffff overflow 0 carry 0 +L_sub_c(0x70000000, 0x70000000) = 0xffffffff overflow 0 carry 0 +L_sub_c(0x8fffffff, 0x8fffffff) = 0xffffffff overflow 0 carry 0 +L_sub_c(0x1, 0x80000000) = 0x80000000 overflow 1 carry 0 +L_sub_c(0xffffffff, 0x7fffffff) = 0x7fffffff overflow 1 carry 1 +Checking L_macNs with input Overflow=0 and input Carry=0 +L_macNs(0x1234, 0x2, 0x2) = 0x123c overflow 0 carry 0 +L_macNs(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 0 carry 0 +L_macNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001233 overflow 1 carry 0 +L_macNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 1 +Checking L_msuNs with input Overflow=0 and input Carry=0 +L_msuNs(0x1234, 0x2, 0x2) = 0x122b overflow 0 carry 1 +L_msuNs(0x1234, 0xffffffff, 0xffffffff) = 0x1231 overflow 0 carry 1 +L_msuNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001234 overflow 1 carry 0 +L_msuNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +negate(0x1) = 0xffffffff overflow 0 carry 0 +negate(0xffffffff) = 0x1 overflow 0 carry 0 +negate(0xffff8000) = 0x7fff overflow 0 carry 0 +mult_r(0x2, 0x2) = 0 overflow 0 carry 0 +mult_r(0xffffffff, 0xffffffff) = 0 overflow 0 carry 0 +mult_r(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +norm_s(0x1) = 0xe overflow 0 carry 0 +norm_s(0xffffffff) = 0xf overflow 0 carry 0 +norm_s(0xffff8000) = 0 overflow 0 carry 0 +norm_s(0x2ee0) = 0x1 overflow 0 carry 0 +norm_l(0x1) = 0x1e overflow 0 carry 0 +norm_l(0xffffffff) = 0x1f overflow 0 carry 0 +norm_l(0xffff8000) = 0x10 overflow 0 carry 0 +norm_l(0x2ee0) = 0x11 overflow 0 carry 0 +norm_l(0x123456) = 0xa overflow 0 carry 0 +norm_l(0xabcdef) = 0x7 overflow 0 carry 0 +L_shl(0x1, 1) = 0x2 overflow 0 carry 0 +L_shl(0xa, 1) = 0x14 overflow 0 carry 0 +L_shl(0xfff, 10) = 0x3ffc00 overflow 0 carry 0 +L_shl(0xfff, 20) = 0x7fffffff overflow 1 carry 0 +L_shl(0x12345678, 2) = 0x48d159e0 overflow 0 carry 0 +L_shl(0x12345678, 40) = 0x7fffffff overflow 1 carry 0 +L_shl(0x1, -1) = 0 overflow 0 carry 0 +L_shl(0x14, -1) = 0xa overflow 0 carry 0 +L_shl(0xfff, -10) = 0x3 overflow 0 carry 0 +L_shl(0xfff, -64) = 0 overflow 0 carry 0 +L_shl(0x12345678, -10) = 0x48d15 overflow 0 carry 0 +L_shl(0x12345678, -64) = 0 overflow 0 carry 0 +L_shr(0x1, -1) = 0x2 overflow 0 carry 0 +L_shr(0xa, -1) = 0x14 overflow 0 carry 0 +L_shr(0xfff, -10) = 0x3ffc00 overflow 0 carry 0 +L_shr(0xfff, -20) = 0x7fffffff overflow 1 carry 0 +L_shr(0x12345678, -10) = 0x7fffffff overflow 1 carry 0 +L_shr(0x12345678, -40) = 0x7fffffff overflow 1 carry 0 +L_shr(0x1, 1) = 0 overflow 0 carry 0 +L_shr(0x14, 1) = 0xa overflow 0 carry 0 +L_shr(0xfff, 10) = 0x3 overflow 0 carry 0 +L_shr(0xfff, 64) = 0 overflow 0 carry 0 +L_shr(0x12345678, 10) = 0x48d15 overflow 0 carry 0 +L_shr(0x12345678, 64) = 0 overflow 0 carry 0 +shr_r(0x1, -1) = 0x2 overflow 0 carry 0 +shr_r(0xa, -1) = 0x14 overflow 0 carry 0 +shr_r(0xfff, -10) = 0x7fff overflow 1 carry 0 +shr_r(0xfff, -20) = 0x7fff overflow 1 carry 0 +shr_r(0x1, 1) = 0x1 overflow 0 carry 0 +shr_r(0x14, 1) = 0xa overflow 0 carry 0 +shr_r(0xfff, 10) = 0x4 overflow 0 carry 0 +shr_r(0xfff, 64) = 0 overflow 0 carry 0 +mac_r(0x1234, 0x2, 0x2) = 0 overflow 0 carry 0 +mac_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 0 carry 0 +mac_r(0x1234, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +mac_r(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +mac_r(0x123456, 0x244, 0x522) = 0x29 overflow 0 carry 0 +msu_r(0x1234, 0x2, 0x2) = 0 overflow 0 carry 0 +msu_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 0 carry 0 +msu_r(0x1234, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 0 +msu_r(0x1, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 0 +msu_r(0x123456, 0x321, 0x243) = 0x4 overflow 0 carry 0 +L_deposit_h(0x1) = 0x10000 overflow 0 carry 0 +L_deposit_h(0xffffffff) = 0xffff0000 overflow 0 carry 0 +L_deposit_h(0xffff8000) = 0x80000000 overflow 0 carry 0 +L_deposit_h(0x1234) = 0x12340000 overflow 0 carry 0 +L_deposit_l(0x1) = 0x1 overflow 0 carry 0 +L_deposit_l(0xffffffff) = 0xffffffff overflow 0 carry 0 +L_deposit_l(0xffff8000) = 0xffff8000 overflow 0 carry 0 +L_deposit_l(0x1234) = 0x1234 overflow 0 carry 0 +L_shr_r(0x1, -1) = 0x2 overflow 0 carry 0 +L_shr_r(0xa, -1) = 0x14 overflow 0 carry 0 +L_shr_r(0xfff, -10) = 0x3ffc00 overflow 0 carry 0 +L_shr_r(0xfff, -20) = 0x7fffffff overflow 1 carry 0 +L_shr_r(0x12345678, -10) = 0x7fffffff overflow 1 carry 0 +L_shr_r(0x12345678, -40) = 0x7fffffff overflow 1 carry 0 +L_shr_r(0x1, 1) = 0x1 overflow 0 carry 0 +L_shr_r(0x14, 1) = 0xa overflow 0 carry 0 +L_shr_r(0xfff, 10) = 0x4 overflow 0 carry 0 +L_shr_r(0xfff, 64) = 0 overflow 0 carry 0 +L_shr_r(0x12345678, 10) = 0x48d16 overflow 0 carry 0 +L_shr_r(0x12345678, 64) = 0 overflow 0 carry 0 +L_abs(0x1) = 0x1 overflow 0 carry 0 +L_abs(0xffffffff) = 0x1 overflow 0 carry 0 +L_abs(0x80000000) = 0x7fffffff overflow 1 carry 0 +L_sat(0x1) = 0x1 overflow 0 carry 0 +L_sat(0xffffffff) = 0xffffffff overflow 0 carry 0 +L_sat(0xffff8000) = 0xffff8000 overflow 0 carry 0 +L_sat(0x8000) = 0x8000 overflow 0 carry 0 +div_s(0x1, 0x1) = 0x7fff overflow 0 carry 0 +div_s(0x2710, 0x4e20) = 0x4000 overflow 0 carry 0 +div_s(0x2710, 0x4e20) = 0x4000 overflow 0 carry 0 + + +DSP FNS (non-NEON/ITU) intrinsics with input Overflow=0 and input Carry=1 +Checking saturate with input Overflow=0 and input Carry=1 +saturate(0x1) = 0x1 overflow 0 carry 1 +saturate(0xffffffff) = 0xffffffff overflow 0 carry 1 +saturate(0x8000) = 0x7fff overflow 1 carry 1 +saturate(0xffff8000) = 0xffff8000 overflow 0 carry 1 +saturate(0xffff7fff) = 0xffff8000 overflow 1 carry 1 +add(0x1, 0x1) = 0x2 overflow 0 carry 1 +add(0xffffffff, 0xffffffff) = 0xfffffffe overflow 0 carry 1 +add(0x4e20, 0x4e20) = 0x7fff overflow 1 carry 1 +add(0xffffb1e0, 0xffffb1e0) = 0xffff8000 overflow 1 carry 1 +sub(0x1, 0x1) = 0 overflow 0 carry 1 +sub(0xffffffff, 0xffffffff) = 0 overflow 0 carry 1 +sub(0x4e20, 0x4e20) = 0 overflow 0 carry 1 +sub(0xffffb1e0, 0xffffb1e0) = 0 overflow 0 carry 1 +sub(0, 0xffff8000) = 0x7fff overflow 1 carry 1 +abs_s(0x1) = 0x1 overflow 0 carry 1 +abs_s(0xffffffff) = 0x1 overflow 0 carry 1 +abs_s(0xffff8000) = 0x7fff overflow 0 carry 1 +shl(0x1, 1) = 0x2 overflow 0 carry 1 +shl(0xa, 1) = 0x14 overflow 0 carry 1 +shl(0xfff, 10) = 0x7fff overflow 1 carry 1 +shl(0xfff, 20) = 0x7fff overflow 1 carry 1 +shl(0x1, -1) = 0 overflow 0 carry 1 +shl(0x14, -1) = 0xa overflow 0 carry 1 +shl(0xfff, -10) = 0x3 overflow 0 carry 1 +shl(0xfff, -64) = 0 overflow 0 carry 1 +shr(0x1, -1) = 0x2 overflow 0 carry 1 +shr(0xa, -1) = 0x14 overflow 0 carry 1 +shr(0xfff, -10) = 0x7fff overflow 1 carry 1 +shr(0xfff, -20) = 0x7fff overflow 1 carry 1 +shr(0x1, 1) = 0 overflow 0 carry 1 +shr(0x14, 1) = 0xa overflow 0 carry 1 +shr(0xfff, 10) = 0x3 overflow 0 carry 1 +shr(0xfff, 64) = 0 overflow 0 carry 1 +mult(0x2, 0x2) = 0 overflow 0 carry 1 +mult(0xffffffff, 0xffffffff) = 0 overflow 0 carry 1 +mult(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +L_mult(0x2, 0x2) = 0x8 overflow 0 carry 1 +L_mult(0xffffffff, 0xffffffff) = 0x2 overflow 0 carry 1 +L_mult(0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +negate(0x1) = 0xffffffff overflow 0 carry 1 +negate(0xffffffff) = 0x1 overflow 0 carry 1 +negate(0xffff8000) = 0x7fff overflow 0 carry 1 +extract_h(0x1) = 0 overflow 0 carry 1 +extract_h(0xffffffff) = 0xffffffff overflow 0 carry 1 +extract_h(0xffff8000) = 0xffffffff overflow 0 carry 1 +extract_h(0x12345678) = 0x1234 overflow 0 carry 1 +extract_l(0x1) = 0x1 overflow 0 carry 1 +extract_l(0xffffffff) = 0xffffffff overflow 0 carry 1 +extract_l(0xffff8000) = 0xffff8000 overflow 0 carry 1 +extract_l(0x43218765) = 0xffff8765 overflow 0 carry 1 +round(0x1) = 0 overflow 0 carry 1 +round(0xffffffff) = 0 overflow 0 carry 1 +round(0xffff8000) = 0 overflow 0 carry 1 +round(0x43218765) = 0x4322 overflow 0 carry 1 +round(0x87654321) = 0xffff8765 overflow 0 carry 1 +L_mac(0x1234, 0x2, 0x2) = 0x123c overflow 0 carry 1 +L_mac(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 0 carry 1 +L_mac(0x1234, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +L_mac(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 1 +L_msu(0x1234, 0x2, 0x2) = 0x122c overflow 0 carry 1 +L_msu(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 0 carry 1 +L_msu(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 1 +L_msu(0x1, 0xffff8000, 0xffff8000) = 0x80000002 overflow 1 carry 1 +L_add(0x1, 0x2) = 0x3 overflow 0 carry 1 +L_add(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 0 carry 1 +L_add(0xffffffff, 0x2) = 0x1 overflow 0 carry 1 +L_add(0x7000, 0x7000) = 0xe000 overflow 0 carry 1 +L_add(0x8fff, 0x8fff) = 0x11ffe overflow 0 carry 1 +L_add(0x70000000, 0x70000000) = 0x7fffffff overflow 1 carry 1 +L_add(0x8fffffff, 0x8fffffff) = 0x80000000 overflow 1 carry 1 +L_sub(0x1, 0x2) = 0xffffffff overflow 0 carry 1 +L_sub(0xffffffff, 0xfffffffe) = 0x1 overflow 0 carry 1 +L_sub(0xffffffff, 0x2) = 0xfffffffd overflow 0 carry 1 +L_sub(0x7000, 0xffff9000) = 0xe000 overflow 0 carry 1 +L_sub(0x8fff, 0xffff7001) = 0x11ffe overflow 0 carry 1 +L_sub(0x70000000, 0x90000000) = 0x7fffffff overflow 1 carry 1 +L_sub(0x8fffffff, 0x70000001) = 0x80000000 overflow 1 carry 1 +L_sub(0, 0x80000000) = 0x7fffffff overflow 1 carry 1 +Checking L_add_c with input Overflow=0 and input Carry=1 +L_add_c(0x1, 0x2) = 0x4 overflow 0 carry 0 +L_add_c(0xffffffff, 0xfffffffe) = 0xfffffffe overflow 0 carry 1 +L_add_c(0xffffffff, 0x2) = 0x2 overflow 0 carry 1 +L_add_c(0x7000, 0x7000) = 0xe001 overflow 0 carry 0 +L_add_c(0x8fff, 0x8fff) = 0x11fff overflow 0 carry 0 +L_add_c(0x70000000, 0x70000000) = 0xe0000001 overflow 1 carry 0 +L_add_c(0x8fffffff, 0x8fffffff) = 0x1fffffff overflow 1 carry 1 +L_add_c(0x8fffffff, 0xffffffff) = 0x8fffffff overflow 0 carry 1 +L_sub_c(0x1, 0x2) = 0xffffffff overflow 0 carry 0 +L_sub_c(0xffffffff, 0xfffffffe) = 0x1 overflow 0 carry 1 +L_sub_c(0xffffffff, 0x2) = 0xfffffffd overflow 0 carry 1 +L_sub_c(0x7000, 0x7000) = 0 overflow 0 carry 1 +L_sub_c(0x8fff, 0x8fff) = 0 overflow 0 carry 1 +L_sub_c(0x70000000, 0x70000000) = 0 overflow 0 carry 1 +L_sub_c(0x8fffffff, 0x8fffffff) = 0 overflow 0 carry 1 +L_sub_c(0x1, 0x80000000) = 0x80000001 overflow 1 carry 0 +L_sub_c(0xffffffff, 0x7fffffff) = 0x80000000 overflow 0 carry 1 +Checking L_macNs with input Overflow=0 and input Carry=1 +L_macNs(0x1234, 0x2, 0x2) = 0x123d overflow 0 carry 0 +L_macNs(0x1234, 0xffffffff, 0xffffffff) = 0x1237 overflow 0 carry 0 +L_macNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001234 overflow 1 carry 0 +L_macNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +Checking L_msuNs with input Overflow=0 and input Carry=1 +L_msuNs(0x1234, 0x2, 0x2) = 0x122c overflow 0 carry 1 +L_msuNs(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 0 carry 1 +L_msuNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 0 +L_msuNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x80000000 overflow 1 carry 1 +negate(0x1) = 0xffffffff overflow 0 carry 1 +negate(0xffffffff) = 0x1 overflow 0 carry 1 +negate(0xffff8000) = 0x7fff overflow 0 carry 1 +mult_r(0x2, 0x2) = 0 overflow 0 carry 1 +mult_r(0xffffffff, 0xffffffff) = 0 overflow 0 carry 1 +mult_r(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +norm_s(0x1) = 0xe overflow 0 carry 1 +norm_s(0xffffffff) = 0xf overflow 0 carry 1 +norm_s(0xffff8000) = 0 overflow 0 carry 1 +norm_s(0x2ee0) = 0x1 overflow 0 carry 1 +norm_l(0x1) = 0x1e overflow 0 carry 1 +norm_l(0xffffffff) = 0x1f overflow 0 carry 1 +norm_l(0xffff8000) = 0x10 overflow 0 carry 1 +norm_l(0x2ee0) = 0x11 overflow 0 carry 1 +norm_l(0x123456) = 0xa overflow 0 carry 1 +norm_l(0xabcdef) = 0x7 overflow 0 carry 1 +L_shl(0x1, 1) = 0x2 overflow 0 carry 1 +L_shl(0xa, 1) = 0x14 overflow 0 carry 1 +L_shl(0xfff, 10) = 0x3ffc00 overflow 0 carry 1 +L_shl(0xfff, 20) = 0x7fffffff overflow 1 carry 1 +L_shl(0x12345678, 2) = 0x48d159e0 overflow 0 carry 1 +L_shl(0x12345678, 40) = 0x7fffffff overflow 1 carry 1 +L_shl(0x1, -1) = 0 overflow 0 carry 1 +L_shl(0x14, -1) = 0xa overflow 0 carry 1 +L_shl(0xfff, -10) = 0x3 overflow 0 carry 1 +L_shl(0xfff, -64) = 0 overflow 0 carry 1 +L_shl(0x12345678, -10) = 0x48d15 overflow 0 carry 1 +L_shl(0x12345678, -64) = 0 overflow 0 carry 1 +L_shr(0x1, -1) = 0x2 overflow 0 carry 1 +L_shr(0xa, -1) = 0x14 overflow 0 carry 1 +L_shr(0xfff, -10) = 0x3ffc00 overflow 0 carry 1 +L_shr(0xfff, -20) = 0x7fffffff overflow 1 carry 1 +L_shr(0x12345678, -10) = 0x7fffffff overflow 1 carry 1 +L_shr(0x12345678, -40) = 0x7fffffff overflow 1 carry 1 +L_shr(0x1, 1) = 0 overflow 0 carry 1 +L_shr(0x14, 1) = 0xa overflow 0 carry 1 +L_shr(0xfff, 10) = 0x3 overflow 0 carry 1 +L_shr(0xfff, 64) = 0 overflow 0 carry 1 +L_shr(0x12345678, 10) = 0x48d15 overflow 0 carry 1 +L_shr(0x12345678, 64) = 0 overflow 0 carry 1 +shr_r(0x1, -1) = 0x2 overflow 0 carry 1 +shr_r(0xa, -1) = 0x14 overflow 0 carry 1 +shr_r(0xfff, -10) = 0x7fff overflow 1 carry 1 +shr_r(0xfff, -20) = 0x7fff overflow 1 carry 1 +shr_r(0x1, 1) = 0x1 overflow 0 carry 1 +shr_r(0x14, 1) = 0xa overflow 0 carry 1 +shr_r(0xfff, 10) = 0x4 overflow 0 carry 1 +shr_r(0xfff, 64) = 0 overflow 0 carry 1 +mac_r(0x1234, 0x2, 0x2) = 0 overflow 0 carry 1 +mac_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 0 carry 1 +mac_r(0x1234, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +mac_r(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +mac_r(0x123456, 0x244, 0x522) = 0x29 overflow 0 carry 1 +msu_r(0x1234, 0x2, 0x2) = 0 overflow 0 carry 1 +msu_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 0 carry 1 +msu_r(0x1234, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 1 +msu_r(0x1, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 1 +msu_r(0x123456, 0x321, 0x243) = 0x4 overflow 0 carry 1 +L_deposit_h(0x1) = 0x10000 overflow 0 carry 1 +L_deposit_h(0xffffffff) = 0xffff0000 overflow 0 carry 1 +L_deposit_h(0xffff8000) = 0x80000000 overflow 0 carry 1 +L_deposit_h(0x1234) = 0x12340000 overflow 0 carry 1 +L_deposit_l(0x1) = 0x1 overflow 0 carry 1 +L_deposit_l(0xffffffff) = 0xffffffff overflow 0 carry 1 +L_deposit_l(0xffff8000) = 0xffff8000 overflow 0 carry 1 +L_deposit_l(0x1234) = 0x1234 overflow 0 carry 1 +L_shr_r(0x1, -1) = 0x2 overflow 0 carry 1 +L_shr_r(0xa, -1) = 0x14 overflow 0 carry 1 +L_shr_r(0xfff, -10) = 0x3ffc00 overflow 0 carry 1 +L_shr_r(0xfff, -20) = 0x7fffffff overflow 1 carry 1 +L_shr_r(0x12345678, -10) = 0x7fffffff overflow 1 carry 1 +L_shr_r(0x12345678, -40) = 0x7fffffff overflow 1 carry 1 +L_shr_r(0x1, 1) = 0x1 overflow 0 carry 1 +L_shr_r(0x14, 1) = 0xa overflow 0 carry 1 +L_shr_r(0xfff, 10) = 0x4 overflow 0 carry 1 +L_shr_r(0xfff, 64) = 0 overflow 0 carry 1 +L_shr_r(0x12345678, 10) = 0x48d16 overflow 0 carry 1 +L_shr_r(0x12345678, 64) = 0 overflow 0 carry 1 +L_abs(0x1) = 0x1 overflow 0 carry 1 +L_abs(0xffffffff) = 0x1 overflow 0 carry 1 +L_abs(0x80000000) = 0x7fffffff overflow 1 carry 1 +L_sat(0x1) = 0x1 overflow 0 carry 1 +L_sat(0xffffffff) = 0xffffffff overflow 0 carry 1 +L_sat(0xffff8000) = 0xffff8000 overflow 0 carry 1 +L_sat(0x8000) = 0x8000 overflow 0 carry 1 +div_s(0x1, 0x1) = 0x7fff overflow 0 carry 1 +div_s(0x2710, 0x4e20) = 0x4000 overflow 0 carry 1 +div_s(0x2710, 0x4e20) = 0x4000 overflow 0 carry 1 + + +DSP FNS (non-NEON/ITU) intrinsics with input Overflow=1 and input Carry=0 +Checking saturate with input Overflow=1 and input Carry=0 +saturate(0x1) = 0x1 overflow 1 carry 0 +saturate(0xffffffff) = 0xffffffff overflow 1 carry 0 +saturate(0x8000) = 0x7fff overflow 1 carry 0 +saturate(0xffff8000) = 0xffff8000 overflow 1 carry 0 +saturate(0xffff7fff) = 0xffff8000 overflow 1 carry 0 +add(0x1, 0x1) = 0x2 overflow 1 carry 0 +add(0xffffffff, 0xffffffff) = 0xfffffffe overflow 1 carry 0 +add(0x4e20, 0x4e20) = 0x7fff overflow 1 carry 0 +add(0xffffb1e0, 0xffffb1e0) = 0xffff8000 overflow 1 carry 0 +sub(0x1, 0x1) = 0 overflow 1 carry 0 +sub(0xffffffff, 0xffffffff) = 0 overflow 1 carry 0 +sub(0x4e20, 0x4e20) = 0 overflow 1 carry 0 +sub(0xffffb1e0, 0xffffb1e0) = 0 overflow 1 carry 0 +sub(0, 0xffff8000) = 0x7fff overflow 1 carry 0 +abs_s(0x1) = 0x1 overflow 1 carry 0 +abs_s(0xffffffff) = 0x1 overflow 1 carry 0 +abs_s(0xffff8000) = 0x7fff overflow 1 carry 0 +shl(0x1, 1) = 0x2 overflow 1 carry 0 +shl(0xa, 1) = 0x14 overflow 1 carry 0 +shl(0xfff, 10) = 0x7fff overflow 1 carry 0 +shl(0xfff, 20) = 0x7fff overflow 1 carry 0 +shl(0x1, -1) = 0 overflow 1 carry 0 +shl(0x14, -1) = 0xa overflow 1 carry 0 +shl(0xfff, -10) = 0x3 overflow 1 carry 0 +shl(0xfff, -64) = 0 overflow 1 carry 0 +shr(0x1, -1) = 0x2 overflow 1 carry 0 +shr(0xa, -1) = 0x14 overflow 1 carry 0 +shr(0xfff, -10) = 0x7fff overflow 1 carry 0 +shr(0xfff, -20) = 0x7fff overflow 1 carry 0 +shr(0x1, 1) = 0 overflow 1 carry 0 +shr(0x14, 1) = 0xa overflow 1 carry 0 +shr(0xfff, 10) = 0x3 overflow 1 carry 0 +shr(0xfff, 64) = 0 overflow 1 carry 0 +mult(0x2, 0x2) = 0 overflow 1 carry 0 +mult(0xffffffff, 0xffffffff) = 0 overflow 1 carry 0 +mult(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +L_mult(0x2, 0x2) = 0x8 overflow 1 carry 0 +L_mult(0xffffffff, 0xffffffff) = 0x2 overflow 1 carry 0 +L_mult(0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 0 +negate(0x1) = 0xffffffff overflow 1 carry 0 +negate(0xffffffff) = 0x1 overflow 1 carry 0 +negate(0xffff8000) = 0x7fff overflow 1 carry 0 +extract_h(0x1) = 0 overflow 1 carry 0 +extract_h(0xffffffff) = 0xffffffff overflow 1 carry 0 +extract_h(0xffff8000) = 0xffffffff overflow 1 carry 0 +extract_h(0x12345678) = 0x1234 overflow 1 carry 0 +extract_l(0x1) = 0x1 overflow 1 carry 0 +extract_l(0xffffffff) = 0xffffffff overflow 1 carry 0 +extract_l(0xffff8000) = 0xffff8000 overflow 1 carry 0 +extract_l(0x43218765) = 0xffff8765 overflow 1 carry 0 +round(0x1) = 0 overflow 1 carry 0 +round(0xffffffff) = 0 overflow 1 carry 0 +round(0xffff8000) = 0 overflow 1 carry 0 +round(0x43218765) = 0x4322 overflow 1 carry 0 +round(0x87654321) = 0xffff8765 overflow 1 carry 0 +L_mac(0x1234, 0x2, 0x2) = 0x123c overflow 1 carry 0 +L_mac(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 1 carry 0 +L_mac(0x1234, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 0 +L_mac(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 0 +L_msu(0x1234, 0x2, 0x2) = 0x122c overflow 1 carry 0 +L_msu(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 1 carry 0 +L_msu(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 0 +L_msu(0x1, 0xffff8000, 0xffff8000) = 0x80000002 overflow 1 carry 0 +L_add(0x1, 0x2) = 0x3 overflow 1 carry 0 +L_add(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 1 carry 0 +L_add(0xffffffff, 0x2) = 0x1 overflow 1 carry 0 +L_add(0x7000, 0x7000) = 0xe000 overflow 1 carry 0 +L_add(0x8fff, 0x8fff) = 0x11ffe overflow 1 carry 0 +L_add(0x70000000, 0x70000000) = 0x7fffffff overflow 1 carry 0 +L_add(0x8fffffff, 0x8fffffff) = 0x80000000 overflow 1 carry 0 +L_sub(0x1, 0x2) = 0xffffffff overflow 1 carry 0 +L_sub(0xffffffff, 0xfffffffe) = 0x1 overflow 1 carry 0 +L_sub(0xffffffff, 0x2) = 0xfffffffd overflow 1 carry 0 +L_sub(0x7000, 0xffff9000) = 0xe000 overflow 1 carry 0 +L_sub(0x8fff, 0xffff7001) = 0x11ffe overflow 1 carry 0 +L_sub(0x70000000, 0x90000000) = 0x7fffffff overflow 1 carry 0 +L_sub(0x8fffffff, 0x70000001) = 0x80000000 overflow 1 carry 0 +L_sub(0, 0x80000000) = 0x7fffffff overflow 1 carry 0 +Checking L_add_c with input Overflow=1 and input Carry=0 +L_add_c(0x1, 0x2) = 0x3 overflow 1 carry 0 +L_add_c(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 1 carry 1 +L_add_c(0xffffffff, 0x2) = 0x1 overflow 1 carry 1 +L_add_c(0x7000, 0x7000) = 0xe000 overflow 1 carry 0 +L_add_c(0x8fff, 0x8fff) = 0x11ffe overflow 1 carry 0 +L_add_c(0x70000000, 0x70000000) = 0xe0000000 overflow 1 carry 0 +L_add_c(0x8fffffff, 0x8fffffff) = 0x1ffffffe overflow 1 carry 1 +L_add_c(0x8fffffff, 0xffffffff) = 0x8ffffffe overflow 1 carry 1 +L_sub_c(0x1, 0x2) = 0xfffffffe overflow 1 carry 0 +L_sub_c(0xffffffff, 0xfffffffe) = 0 overflow 1 carry 1 +L_sub_c(0xffffffff, 0x2) = 0xfffffffc overflow 1 carry 1 +L_sub_c(0x7000, 0x7000) = 0xffffffff overflow 1 carry 0 +L_sub_c(0x8fff, 0x8fff) = 0xffffffff overflow 1 carry 0 +L_sub_c(0x70000000, 0x70000000) = 0xffffffff overflow 1 carry 0 +L_sub_c(0x8fffffff, 0x8fffffff) = 0xffffffff overflow 1 carry 0 +L_sub_c(0x1, 0x80000000) = 0x80000000 overflow 1 carry 0 +L_sub_c(0xffffffff, 0x7fffffff) = 0x7fffffff overflow 1 carry 1 +Checking L_macNs with input Overflow=1 and input Carry=0 +L_macNs(0x1234, 0x2, 0x2) = 0x123c overflow 0 carry 0 +L_macNs(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 0 carry 0 +L_macNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001233 overflow 1 carry 0 +L_macNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 1 +Checking L_msuNs with input Overflow=1 and input Carry=0 +L_msuNs(0x1234, 0x2, 0x2) = 0x122b overflow 0 carry 1 +L_msuNs(0x1234, 0xffffffff, 0xffffffff) = 0x1231 overflow 0 carry 1 +L_msuNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001234 overflow 1 carry 0 +L_msuNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +negate(0x1) = 0xffffffff overflow 1 carry 0 +negate(0xffffffff) = 0x1 overflow 1 carry 0 +negate(0xffff8000) = 0x7fff overflow 1 carry 0 +mult_r(0x2, 0x2) = 0 overflow 1 carry 0 +mult_r(0xffffffff, 0xffffffff) = 0 overflow 1 carry 0 +mult_r(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +norm_s(0x1) = 0xe overflow 1 carry 0 +norm_s(0xffffffff) = 0xf overflow 1 carry 0 +norm_s(0xffff8000) = 0 overflow 1 carry 0 +norm_s(0x2ee0) = 0x1 overflow 1 carry 0 +norm_l(0x1) = 0x1e overflow 1 carry 0 +norm_l(0xffffffff) = 0x1f overflow 1 carry 0 +norm_l(0xffff8000) = 0x10 overflow 1 carry 0 +norm_l(0x2ee0) = 0x11 overflow 1 carry 0 +norm_l(0x123456) = 0xa overflow 1 carry 0 +norm_l(0xabcdef) = 0x7 overflow 1 carry 0 +L_shl(0x1, 1) = 0x2 overflow 1 carry 0 +L_shl(0xa, 1) = 0x14 overflow 1 carry 0 +L_shl(0xfff, 10) = 0x3ffc00 overflow 1 carry 0 +L_shl(0xfff, 20) = 0x7fffffff overflow 1 carry 0 +L_shl(0x12345678, 2) = 0x48d159e0 overflow 1 carry 0 +L_shl(0x12345678, 40) = 0x7fffffff overflow 1 carry 0 +L_shl(0x1, -1) = 0 overflow 1 carry 0 +L_shl(0x14, -1) = 0xa overflow 1 carry 0 +L_shl(0xfff, -10) = 0x3 overflow 1 carry 0 +L_shl(0xfff, -64) = 0 overflow 1 carry 0 +L_shl(0x12345678, -10) = 0x48d15 overflow 1 carry 0 +L_shl(0x12345678, -64) = 0 overflow 1 carry 0 +L_shr(0x1, -1) = 0x2 overflow 1 carry 0 +L_shr(0xa, -1) = 0x14 overflow 1 carry 0 +L_shr(0xfff, -10) = 0x3ffc00 overflow 1 carry 0 +L_shr(0xfff, -20) = 0x7fffffff overflow 1 carry 0 +L_shr(0x12345678, -10) = 0x7fffffff overflow 1 carry 0 +L_shr(0x12345678, -40) = 0x7fffffff overflow 1 carry 0 +L_shr(0x1, 1) = 0 overflow 1 carry 0 +L_shr(0x14, 1) = 0xa overflow 1 carry 0 +L_shr(0xfff, 10) = 0x3 overflow 1 carry 0 +L_shr(0xfff, 64) = 0 overflow 1 carry 0 +L_shr(0x12345678, 10) = 0x48d15 overflow 1 carry 0 +L_shr(0x12345678, 64) = 0 overflow 1 carry 0 +shr_r(0x1, -1) = 0x2 overflow 1 carry 0 +shr_r(0xa, -1) = 0x14 overflow 1 carry 0 +shr_r(0xfff, -10) = 0x7fff overflow 1 carry 0 +shr_r(0xfff, -20) = 0x7fff overflow 1 carry 0 +shr_r(0x1, 1) = 0x1 overflow 1 carry 0 +shr_r(0x14, 1) = 0xa overflow 1 carry 0 +shr_r(0xfff, 10) = 0x4 overflow 1 carry 0 +shr_r(0xfff, 64) = 0 overflow 1 carry 0 +mac_r(0x1234, 0x2, 0x2) = 0 overflow 1 carry 0 +mac_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 1 carry 0 +mac_r(0x1234, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +mac_r(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0 +mac_r(0x123456, 0x244, 0x522) = 0x29 overflow 1 carry 0 +msu_r(0x1234, 0x2, 0x2) = 0 overflow 1 carry 0 +msu_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 1 carry 0 +msu_r(0x1234, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 0 +msu_r(0x1, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 0 +msu_r(0x123456, 0x321, 0x243) = 0x4 overflow 1 carry 0 +L_deposit_h(0x1) = 0x10000 overflow 1 carry 0 +L_deposit_h(0xffffffff) = 0xffff0000 overflow 1 carry 0 +L_deposit_h(0xffff8000) = 0x80000000 overflow 1 carry 0 +L_deposit_h(0x1234) = 0x12340000 overflow 1 carry 0 +L_deposit_l(0x1) = 0x1 overflow 1 carry 0 +L_deposit_l(0xffffffff) = 0xffffffff overflow 1 carry 0 +L_deposit_l(0xffff8000) = 0xffff8000 overflow 1 carry 0 +L_deposit_l(0x1234) = 0x1234 overflow 1 carry 0 +L_shr_r(0x1, -1) = 0x2 overflow 1 carry 0 +L_shr_r(0xa, -1) = 0x14 overflow 1 carry 0 +L_shr_r(0xfff, -10) = 0x3ffc00 overflow 1 carry 0 +L_shr_r(0xfff, -20) = 0x7fffffff overflow 1 carry 0 +L_shr_r(0x12345678, -10) = 0x7fffffff overflow 1 carry 0 +L_shr_r(0x12345678, -40) = 0x7fffffff overflow 1 carry 0 +L_shr_r(0x1, 1) = 0x1 overflow 1 carry 0 +L_shr_r(0x14, 1) = 0xa overflow 1 carry 0 +L_shr_r(0xfff, 10) = 0x4 overflow 1 carry 0 +L_shr_r(0xfff, 64) = 0 overflow 1 carry 0 +L_shr_r(0x12345678, 10) = 0x48d16 overflow 1 carry 0 +L_shr_r(0x12345678, 64) = 0 overflow 1 carry 0 +L_abs(0x1) = 0x1 overflow 1 carry 0 +L_abs(0xffffffff) = 0x1 overflow 1 carry 0 +L_abs(0x80000000) = 0x7fffffff overflow 1 carry 0 +L_sat(0x1) = 0x7fffffff overflow 0 carry 0 +L_sat(0xffffffff) = 0x7fffffff overflow 0 carry 0 +L_sat(0xffff8000) = 0x7fffffff overflow 0 carry 0 +L_sat(0x8000) = 0x7fffffff overflow 0 carry 0 +div_s(0x1, 0x1) = 0x7fff overflow 1 carry 0 +div_s(0x2710, 0x4e20) = 0x4000 overflow 1 carry 0 +div_s(0x2710, 0x4e20) = 0x4000 overflow 1 carry 0 + + +DSP FNS (non-NEON/ITU) intrinsics with input Overflow=1 and input Carry=1 +Checking saturate with input Overflow=1 and input Carry=1 +saturate(0x1) = 0x1 overflow 1 carry 1 +saturate(0xffffffff) = 0xffffffff overflow 1 carry 1 +saturate(0x8000) = 0x7fff overflow 1 carry 1 +saturate(0xffff8000) = 0xffff8000 overflow 1 carry 1 +saturate(0xffff7fff) = 0xffff8000 overflow 1 carry 1 +add(0x1, 0x1) = 0x2 overflow 1 carry 1 +add(0xffffffff, 0xffffffff) = 0xfffffffe overflow 1 carry 1 +add(0x4e20, 0x4e20) = 0x7fff overflow 1 carry 1 +add(0xffffb1e0, 0xffffb1e0) = 0xffff8000 overflow 1 carry 1 +sub(0x1, 0x1) = 0 overflow 1 carry 1 +sub(0xffffffff, 0xffffffff) = 0 overflow 1 carry 1 +sub(0x4e20, 0x4e20) = 0 overflow 1 carry 1 +sub(0xffffb1e0, 0xffffb1e0) = 0 overflow 1 carry 1 +sub(0, 0xffff8000) = 0x7fff overflow 1 carry 1 +abs_s(0x1) = 0x1 overflow 1 carry 1 +abs_s(0xffffffff) = 0x1 overflow 1 carry 1 +abs_s(0xffff8000) = 0x7fff overflow 1 carry 1 +shl(0x1, 1) = 0x2 overflow 1 carry 1 +shl(0xa, 1) = 0x14 overflow 1 carry 1 +shl(0xfff, 10) = 0x7fff overflow 1 carry 1 +shl(0xfff, 20) = 0x7fff overflow 1 carry 1 +shl(0x1, -1) = 0 overflow 1 carry 1 +shl(0x14, -1) = 0xa overflow 1 carry 1 +shl(0xfff, -10) = 0x3 overflow 1 carry 1 +shl(0xfff, -64) = 0 overflow 1 carry 1 +shr(0x1, -1) = 0x2 overflow 1 carry 1 +shr(0xa, -1) = 0x14 overflow 1 carry 1 +shr(0xfff, -10) = 0x7fff overflow 1 carry 1 +shr(0xfff, -20) = 0x7fff overflow 1 carry 1 +shr(0x1, 1) = 0 overflow 1 carry 1 +shr(0x14, 1) = 0xa overflow 1 carry 1 +shr(0xfff, 10) = 0x3 overflow 1 carry 1 +shr(0xfff, 64) = 0 overflow 1 carry 1 +mult(0x2, 0x2) = 0 overflow 1 carry 1 +mult(0xffffffff, 0xffffffff) = 0 overflow 1 carry 1 +mult(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +L_mult(0x2, 0x2) = 0x8 overflow 1 carry 1 +L_mult(0xffffffff, 0xffffffff) = 0x2 overflow 1 carry 1 +L_mult(0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +negate(0x1) = 0xffffffff overflow 1 carry 1 +negate(0xffffffff) = 0x1 overflow 1 carry 1 +negate(0xffff8000) = 0x7fff overflow 1 carry 1 +extract_h(0x1) = 0 overflow 1 carry 1 +extract_h(0xffffffff) = 0xffffffff overflow 1 carry 1 +extract_h(0xffff8000) = 0xffffffff overflow 1 carry 1 +extract_h(0x12345678) = 0x1234 overflow 1 carry 1 +extract_l(0x1) = 0x1 overflow 1 carry 1 +extract_l(0xffffffff) = 0xffffffff overflow 1 carry 1 +extract_l(0xffff8000) = 0xffff8000 overflow 1 carry 1 +extract_l(0x43218765) = 0xffff8765 overflow 1 carry 1 +round(0x1) = 0 overflow 1 carry 1 +round(0xffffffff) = 0 overflow 1 carry 1 +round(0xffff8000) = 0 overflow 1 carry 1 +round(0x43218765) = 0x4322 overflow 1 carry 1 +round(0x87654321) = 0xffff8765 overflow 1 carry 1 +L_mac(0x1234, 0x2, 0x2) = 0x123c overflow 1 carry 1 +L_mac(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 1 carry 1 +L_mac(0x1234, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +L_mac(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 1 +L_msu(0x1234, 0x2, 0x2) = 0x122c overflow 1 carry 1 +L_msu(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 1 carry 1 +L_msu(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 1 +L_msu(0x1, 0xffff8000, 0xffff8000) = 0x80000002 overflow 1 carry 1 +L_add(0x1, 0x2) = 0x3 overflow 1 carry 1 +L_add(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 1 carry 1 +L_add(0xffffffff, 0x2) = 0x1 overflow 1 carry 1 +L_add(0x7000, 0x7000) = 0xe000 overflow 1 carry 1 +L_add(0x8fff, 0x8fff) = 0x11ffe overflow 1 carry 1 +L_add(0x70000000, 0x70000000) = 0x7fffffff overflow 1 carry 1 +L_add(0x8fffffff, 0x8fffffff) = 0x80000000 overflow 1 carry 1 +L_sub(0x1, 0x2) = 0xffffffff overflow 1 carry 1 +L_sub(0xffffffff, 0xfffffffe) = 0x1 overflow 1 carry 1 +L_sub(0xffffffff, 0x2) = 0xfffffffd overflow 1 carry 1 +L_sub(0x7000, 0xffff9000) = 0xe000 overflow 1 carry 1 +L_sub(0x8fff, 0xffff7001) = 0x11ffe overflow 1 carry 1 +L_sub(0x70000000, 0x90000000) = 0x7fffffff overflow 1 carry 1 +L_sub(0x8fffffff, 0x70000001) = 0x80000000 overflow 1 carry 1 +L_sub(0, 0x80000000) = 0x7fffffff overflow 1 carry 1 +Checking L_add_c with input Overflow=1 and input Carry=1 +L_add_c(0x1, 0x2) = 0x4 overflow 1 carry 0 +L_add_c(0xffffffff, 0xfffffffe) = 0xfffffffe overflow 1 carry 1 +L_add_c(0xffffffff, 0x2) = 0x2 overflow 1 carry 1 +L_add_c(0x7000, 0x7000) = 0xe001 overflow 1 carry 0 +L_add_c(0x8fff, 0x8fff) = 0x11fff overflow 1 carry 0 +L_add_c(0x70000000, 0x70000000) = 0xe0000001 overflow 1 carry 0 +L_add_c(0x8fffffff, 0x8fffffff) = 0x1fffffff overflow 1 carry 1 +L_add_c(0x8fffffff, 0xffffffff) = 0x8fffffff overflow 1 carry 1 +L_sub_c(0x1, 0x2) = 0xffffffff overflow 1 carry 0 +L_sub_c(0xffffffff, 0xfffffffe) = 0x1 overflow 1 carry 1 +L_sub_c(0xffffffff, 0x2) = 0xfffffffd overflow 1 carry 1 +L_sub_c(0x7000, 0x7000) = 0 overflow 1 carry 1 +L_sub_c(0x8fff, 0x8fff) = 0 overflow 1 carry 1 +L_sub_c(0x70000000, 0x70000000) = 0 overflow 1 carry 1 +L_sub_c(0x8fffffff, 0x8fffffff) = 0 overflow 1 carry 1 +L_sub_c(0x1, 0x80000000) = 0x80000001 overflow 1 carry 0 +L_sub_c(0xffffffff, 0x7fffffff) = 0x80000000 overflow 1 carry 1 +Checking L_macNs with input Overflow=1 and input Carry=1 +L_macNs(0x1234, 0x2, 0x2) = 0x123d overflow 0 carry 0 +L_macNs(0x1234, 0xffffffff, 0xffffffff) = 0x1237 overflow 0 carry 0 +L_macNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001234 overflow 1 carry 0 +L_macNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1 +Checking L_msuNs with input Overflow=1 and input Carry=1 +L_msuNs(0x1234, 0x2, 0x2) = 0x122c overflow 0 carry 1 +L_msuNs(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 0 carry 1 +L_msuNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 0 +L_msuNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x80000000 overflow 1 carry 1 +negate(0x1) = 0xffffffff overflow 1 carry 1 +negate(0xffffffff) = 0x1 overflow 1 carry 1 +negate(0xffff8000) = 0x7fff overflow 1 carry 1 +mult_r(0x2, 0x2) = 0 overflow 1 carry 1 +mult_r(0xffffffff, 0xffffffff) = 0 overflow 1 carry 1 +mult_r(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +norm_s(0x1) = 0xe overflow 1 carry 1 +norm_s(0xffffffff) = 0xf overflow 1 carry 1 +norm_s(0xffff8000) = 0 overflow 1 carry 1 +norm_s(0x2ee0) = 0x1 overflow 1 carry 1 +norm_l(0x1) = 0x1e overflow 1 carry 1 +norm_l(0xffffffff) = 0x1f overflow 1 carry 1 +norm_l(0xffff8000) = 0x10 overflow 1 carry 1 +norm_l(0x2ee0) = 0x11 overflow 1 carry 1 +norm_l(0x123456) = 0xa overflow 1 carry 1 +norm_l(0xabcdef) = 0x7 overflow 1 carry 1 +L_shl(0x1, 1) = 0x2 overflow 1 carry 1 +L_shl(0xa, 1) = 0x14 overflow 1 carry 1 +L_shl(0xfff, 10) = 0x3ffc00 overflow 1 carry 1 +L_shl(0xfff, 20) = 0x7fffffff overflow 1 carry 1 +L_shl(0x12345678, 2) = 0x48d159e0 overflow 1 carry 1 +L_shl(0x12345678, 40) = 0x7fffffff overflow 1 carry 1 +L_shl(0x1, -1) = 0 overflow 1 carry 1 +L_shl(0x14, -1) = 0xa overflow 1 carry 1 +L_shl(0xfff, -10) = 0x3 overflow 1 carry 1 +L_shl(0xfff, -64) = 0 overflow 1 carry 1 +L_shl(0x12345678, -10) = 0x48d15 overflow 1 carry 1 +L_shl(0x12345678, -64) = 0 overflow 1 carry 1 +L_shr(0x1, -1) = 0x2 overflow 1 carry 1 +L_shr(0xa, -1) = 0x14 overflow 1 carry 1 +L_shr(0xfff, -10) = 0x3ffc00 overflow 1 carry 1 +L_shr(0xfff, -20) = 0x7fffffff overflow 1 carry 1 +L_shr(0x12345678, -10) = 0x7fffffff overflow 1 carry 1 +L_shr(0x12345678, -40) = 0x7fffffff overflow 1 carry 1 +L_shr(0x1, 1) = 0 overflow 1 carry 1 +L_shr(0x14, 1) = 0xa overflow 1 carry 1 +L_shr(0xfff, 10) = 0x3 overflow 1 carry 1 +L_shr(0xfff, 64) = 0 overflow 1 carry 1 +L_shr(0x12345678, 10) = 0x48d15 overflow 1 carry 1 +L_shr(0x12345678, 64) = 0 overflow 1 carry 1 +shr_r(0x1, -1) = 0x2 overflow 1 carry 1 +shr_r(0xa, -1) = 0x14 overflow 1 carry 1 +shr_r(0xfff, -10) = 0x7fff overflow 1 carry 1 +shr_r(0xfff, -20) = 0x7fff overflow 1 carry 1 +shr_r(0x1, 1) = 0x1 overflow 1 carry 1 +shr_r(0x14, 1) = 0xa overflow 1 carry 1 +shr_r(0xfff, 10) = 0x4 overflow 1 carry 1 +shr_r(0xfff, 64) = 0 overflow 1 carry 1 +mac_r(0x1234, 0x2, 0x2) = 0 overflow 1 carry 1 +mac_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 1 carry 1 +mac_r(0x1234, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +mac_r(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1 +mac_r(0x123456, 0x244, 0x522) = 0x29 overflow 1 carry 1 +msu_r(0x1234, 0x2, 0x2) = 0 overflow 1 carry 1 +msu_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 1 carry 1 +msu_r(0x1234, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 1 +msu_r(0x1, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 1 +msu_r(0x123456, 0x321, 0x243) = 0x4 overflow 1 carry 1 +L_deposit_h(0x1) = 0x10000 overflow 1 carry 1 +L_deposit_h(0xffffffff) = 0xffff0000 overflow 1 carry 1 +L_deposit_h(0xffff8000) = 0x80000000 overflow 1 carry 1 +L_deposit_h(0x1234) = 0x12340000 overflow 1 carry 1 +L_deposit_l(0x1) = 0x1 overflow 1 carry 1 +L_deposit_l(0xffffffff) = 0xffffffff overflow 1 carry 1 +L_deposit_l(0xffff8000) = 0xffff8000 overflow 1 carry 1 +L_deposit_l(0x1234) = 0x1234 overflow 1 carry 1 +L_shr_r(0x1, -1) = 0x2 overflow 1 carry 1 +L_shr_r(0xa, -1) = 0x14 overflow 1 carry 1 +L_shr_r(0xfff, -10) = 0x3ffc00 overflow 1 carry 1 +L_shr_r(0xfff, -20) = 0x7fffffff overflow 1 carry 1 +L_shr_r(0x12345678, -10) = 0x7fffffff overflow 1 carry 1 +L_shr_r(0x12345678, -40) = 0x7fffffff overflow 1 carry 1 +L_shr_r(0x1, 1) = 0x1 overflow 1 carry 1 +L_shr_r(0x14, 1) = 0xa overflow 1 carry 1 +L_shr_r(0xfff, 10) = 0x4 overflow 1 carry 1 +L_shr_r(0xfff, 64) = 0 overflow 1 carry 1 +L_shr_r(0x12345678, 10) = 0x48d16 overflow 1 carry 1 +L_shr_r(0x12345678, 64) = 0 overflow 1 carry 1 +L_abs(0x1) = 0x1 overflow 1 carry 1 +L_abs(0xffffffff) = 0x1 overflow 1 carry 1 +L_abs(0x80000000) = 0x7fffffff overflow 1 carry 1 +L_sat(0x1) = 0x80000000 overflow 0 carry 0 +L_sat(0xffffffff) = 0x80000000 overflow 0 carry 0 +L_sat(0xffff8000) = 0x80000000 overflow 0 carry 0 +L_sat(0x8000) = 0x80000000 overflow 0 carry 0 +div_s(0x1, 0x1) = 0x7fff overflow 1 carry 1 +div_s(0x2710, 0x4e20) = 0x4000 overflow 1 carry 1 +div_s(0x2710, 0x4e20) = 0x4000 overflow 1 carry 1 diff --git a/ref_dsp.c b/ref_dsp.c new file mode 100644 index 0000000..85de61e --- /dev/null +++ b/ref_dsp.c @@ -0,0 +1,411 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <stdio.h> +#include <stdint.h> + +#ifdef __arm__ +#include <armdsp.h> +#else +#include "stm-armdsp.h" +#endif +#include <dspfns.h> /* For Overflow */ + +extern FILE* ref_file; + +void exec_dsp (void) +{ + int32_t svar1, svar2, sacc, sres; + int32_t lo, hi; + + + fprintf(ref_file, "\n\nDSP (non-NEON) intrinsics\n"); + + /* qadd */ + /* int32_t qadd(int32_t val1, int32_t val2); */ + svar1 = 1; + svar2 = 2; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = -2; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = 2; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x7000; + svar2 = 0x7000; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFF; + svar2 = 0x8FFF; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x70000000; + svar2 = 0x70000000; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFFFFFF; + svar2 = 0x8FFFFFFF; + Overflow = 0; + sres = qadd(svar1, svar2); + fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + /* qsub */ + /* int32_t qsub(int32_t val1, int32_t val2); */ + svar1 = 1; + svar2 = 2; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = -2; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = 2; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x7000; + svar2 = 0xFFFF9000; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFF; + svar2 = 0xFFFF7001; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x70000000; + svar2 = 0x90000000; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFFFFFF; + svar2 = 0x70000001; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0; + svar2 = 0x80000000; + Overflow = 0; + sres = qsub(svar1, svar2); + fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + + /* qdadd */ + /* int32_t qdadd(int32_t val1, int32_t val2); */ + svar1 = 1; + svar2 = 2; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = -2; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = 2; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x7000; + svar2 = 0x7000; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFF; + svar2 = 0x8FFF; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x70000000; + svar2 = 0x70000000; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0; + svar2 = 0x70000000; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFFFFFF; + svar2 = 0x8FFFFFFF; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0; + svar2 = 0x8FFFFFFF; + Overflow = 0; + sres = qdadd(svar1, svar2); + fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + /* qdsub */ + /* int32_t qdsub(int32_t val1, int32_t val2); */ + svar1 = 1; + svar2 = 2; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = -2; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = -1; + svar2 = 2; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x7000; + svar2 = 0xFFFF9000; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFF; + svar2 = 0xFFFF7001; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x70000000; + svar2 = 0x90000000; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0; + svar2 = 0x90000000; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0x8FFFFFFF; + svar2 = 0x70000001; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + svar1 = 0; + svar2 = 0x70000001; + Overflow = 0; + sres = qdsub(svar1, svar2); + fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow); + + + /* smulbb, smulbt, smultb, smultt */ + /* int32_t smulbb(int32_t val1, int32_t val2); */ + svar1 = 0x12345678; + svar2 = 0x12345678; + sres = smulbb(svar1, svar2); + fprintf(ref_file, "smulbb(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smulbt(svar1, svar2); + fprintf(ref_file, "smulbt(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smultb(svar1, svar2); + fprintf(ref_file, "smultb(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smultt(svar1, svar2); + fprintf(ref_file, "smultt(%#x, %#x) = %#x\n", svar1, svar2, sres); + + svar1 = 0xF123F456; + svar2 = 0xF123F456; + sres = smulbb(svar1, svar2); + fprintf(ref_file, "smulbb(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smulbt(svar1, svar2); + fprintf(ref_file, "smulbt(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smultb(svar1, svar2); + fprintf(ref_file, "smultb(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smultt(svar1, svar2); + fprintf(ref_file, "smultt(%#x, %#x) = %#x\n", svar1, svar2, sres); + + + /* smlabb, smlabt, smlatb, smlatt */ + /* int32_t smlabb(int32_t val1, int32_t val2, int32_t acc); */ + sacc = 0x01020304; + svar1 = 0x12345678; + svar2 = 0x12345678; + sres = smlabb(svar1, svar2, sacc); + fprintf(ref_file, "smlabb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlabt(svar1, svar2, sacc); + fprintf(ref_file, "smlabt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlatb(svar1, svar2, sacc); + fprintf(ref_file, "smlatb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlatt(svar1, svar2, sacc); + fprintf(ref_file, "smlatt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + + svar1 = 0xF123F456; + svar2 = 0xF123F456; + sres = smlabb(svar1, svar2, sacc); + fprintf(ref_file, "smlabb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlabt(svar1, svar2, sacc); + fprintf(ref_file, "smlabt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlatb(svar1, svar2, sacc); + fprintf(ref_file, "smlatb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlatt(svar1, svar2, sacc); + fprintf(ref_file, "smlatt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + + + /* smlalbb, smlalbt, smlaltb, smlaltt */ + /* int32_t smlalbb(int32_t *lo, int32_t *hi, int32_t val1, int32_t val2); */ + svar1 = 0x12345678; + svar2 = 0x12345678; + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlalbb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlalbb(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlalbt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlalbt(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlaltb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlaltb(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlaltt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlaltt(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + + svar1 = 0xF123F456; + svar2 = 0xF123F456; + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlalbb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlalbb(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlalbt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlalbt(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlaltb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlaltb(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0x9ABCDEF0; + fprintf(ref_file, "smlaltt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlaltt(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + + svar1 = 0x7FFF7FFF; + svar2 = 0x7FFF7FFF; + hi = 0x12345678; + lo = 0xFFFFFFFF; + fprintf(ref_file, "smlalbb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlalbb(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0xFFFFFFFF; + fprintf(ref_file, "smlalbt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlalbt(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0xFFFFFFFF; + fprintf(ref_file, "smlaltb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlaltb(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + hi = 0x12345678; + lo = 0xFFFFFFFF; + fprintf(ref_file, "smlaltt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2); + smlaltt(&lo, &hi, svar1, svar2); + fprintf(ref_file, "%#x%#x\n", hi, lo); + + + /* smulwb, smulwt */ + /* int32_t smulwb(int32_t val1, int32_t val2); */ + svar1 = 0x12345678; + svar2 = 0x12345678; + sres = smulwb(svar1, svar2); + fprintf(ref_file, "smulwb(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smulwt(svar1, svar2); + fprintf(ref_file, "smulwt(%#x, %#x) = %#x\n", svar1, svar2, sres); + + svar1 = 0xF123F456; + svar2 = 0xF123F456; + sres = smulwb(svar1, svar2); + fprintf(ref_file, "smulwb(%#x, %#x) = %#x\n", svar1, svar2, sres); + sres = smulwt(svar1, svar2); + fprintf(ref_file, "smulwt(%#x, %#x) = %#x\n", svar1, svar2, sres); + + + /* smlawb, smlawt */ + /* int32_t smlawb(int32_t val1, int32_t val2, int32_t acc); */ + sacc = 0x01020304; + svar1 = 0x12345678; + svar2 = 0x12345678; + sres = smlawb(svar1, svar2, sacc); + fprintf(ref_file, "smlawb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlawt(svar1, svar2, sacc); + fprintf(ref_file, "smlawt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + + svar1 = 0xF123F456; + svar2 = 0xF123F456; + sres = smlawb(svar1, svar2, sacc); + fprintf(ref_file, "smlawb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres); + sres = smlawt(svar1, svar2, sacc); + fprintf(ref_file, "smlawt(%#x, %#x, %#X) = %#x\n", svar1, svar2, sacc, sres); + +} diff --git a/ref_dspfns.c b/ref_dspfns.c new file mode 100644 index 0000000..5680779 --- /dev/null +++ b/ref_dspfns.c @@ -0,0 +1,1493 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <stdio.h> +#include <stdint.h> + +#ifdef __arm__ +#include <dspfns.h> +#else +#include <dspfns.h> +#endif + +extern FILE* ref_file; + +#define __xSTR(X) #X +#define __STR(X) __xSTR(X) + +#define FN(X) X, __STR(X) + +typedef int32_t func32_32_32_ptr(int32_t, int32_t); +typedef int16_t func16_32_ptr(int32_t); +typedef int32_t func32_32_16_16_ptr(int32_t, int16_t, int16_t); + +void test_16_fn_32(func16_32_ptr func, char* func_name, + int init_Overflow, int init_Carry) +{ + int32_t svar32_a; + int16_t svar16_a; + + fprintf(ref_file, "Checking %s with input Overflow=%d and input Carry=%d\n", + func_name, init_Overflow, init_Carry); + + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = func(svar32_a); + fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = func(svar32_a); + fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = func(svar32_a); + fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = func(svar32_a); + fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32769; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = func(svar32_a); + fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, Overflow, Carry); +} + +void test_32_fn_32_32(func32_32_32_ptr func, char* func_name, + int init_Overflow, int init_Carry) +{ + int32_t svar32_a, svar32_b, svar32_c; + + fprintf(ref_file, "Checking %s with input Overflow=%d and input Carry=%d\n", + func_name, init_Overflow, init_Carry); + + svar32_a = 1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = -2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x7000; + svar32_b = 0x7000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFF; + svar32_b = 0x8FFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x70000000; + svar32_b = 0x70000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFFFFFF; + svar32_b = 0x8FFFFFFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFFFFFF; + svar32_b = 0xFFFFFFFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = func(svar32_a, svar32_b); + fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry); +} + +void test_32_fn_32_16_16(func32_32_16_16_ptr func, char* func_name, + int init_Overflow, int init_Carry) +{ + int32_t svar32_a, svar32_b; + int16_t svar16_a, svar16_b; + + fprintf(ref_file, "Checking %s with input Overflow=%d and input Carry=%d\n", + func_name, init_Overflow, init_Carry); + + svar16_a = 2; + svar16_b = 2; + svar32_a = 0x1234; + Overflow = 0; + Carry = init_Carry; + svar32_b = func(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "%s(%#x, %#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = 0; + Carry = init_Carry; + svar32_b = func(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "%s(%#x, %#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = 0; + Carry = init_Carry; + svar32_b = func(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "%s(%#x, %#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = 0; + Carry = init_Carry; + svar32_a = -1; + svar32_b = func(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "%s(%#x, %#x, %#x) = %#x overflow %d carry %d\n", + func_name, svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); +} + +void exec_dspfns1 (int init_Overflow, int init_Carry) +{ + int32_t svar32_a, svar32_b, svar32_c; + int16_t svar16_a, svar16_b, svar16_c; + + + fprintf(ref_file, "\n\nDSP FNS (non-NEON/ITU) intrinsics with input Overflow=%d and input Carry=%d\n", init_Overflow, init_Carry); + + /* saturate */ + /* int16_t saturate(int32_t x) */ + test_16_fn_32(FN(saturate), init_Overflow, init_Carry); + + + /* add */ + /* int16_t add(int16_t x, int16_t y) */ + svar16_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = add(svar16_a, svar16_b); + fprintf(ref_file, "add(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = add(svar16_a, svar16_b); + fprintf(ref_file, "add(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 20000; + svar16_b = 20000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = add(svar16_a, svar16_b); + fprintf(ref_file, "add(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -20000; + svar16_b = -20000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = add(svar16_a, svar16_b); + fprintf(ref_file, "add(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* sub */ + /* int16_t sub(int16_t x, int16_t y) */ + svar16_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = sub(svar16_a, svar16_b); + fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = sub(svar16_a, svar16_b); + fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 20000; + svar16_b = 20000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = sub(svar16_a, svar16_b); + fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -20000; + svar16_b = -20000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = sub(svar16_a, svar16_b); + fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0; + svar16_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = sub(svar16_a, svar16_b); + fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* abs_s */ + /* int16_t abs_s(int16_t x) */ + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = abs_s(svar16_b); + fprintf(ref_file, "abs_s(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry); + + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = abs_s(svar16_b); + fprintf(ref_file, "abs_s(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry); + + svar16_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = abs_s(svar16_b); + fprintf(ref_file, "abs_s(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry); + + + /* shl */ + /* int16_t shl(int16_t x, int16_t y) */ + svar16_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 10; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = 20; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 20; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = -64; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shl(svar16_a, svar16_b); + fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* shr */ + /* int16_t shr(int16_t x, int16_t y) */ + svar16_a = 1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 10; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = -20; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 20; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = 64; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr(svar16_a, svar16_b); + fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* mult */ + /* int16_t mult(int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mult(svar16_a, svar16_b); + fprintf(ref_file, "mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mult(svar16_a, svar16_b); + fprintf(ref_file, "mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mult(svar16_a, svar16_b); + fprintf(ref_file, "mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* L_mult */ + /* int32_t L_mult(int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_mult(svar16_a, svar16_b); + fprintf(ref_file, "L_mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar32_a, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_mult(svar16_a, svar16_b); + fprintf(ref_file, "L_mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar32_a, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_mult(svar16_a, svar16_b); + fprintf(ref_file, "L_mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar32_a, Overflow, Carry); + + + /* negate */ + /* int16_t negate(int16_t x) */ + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = negate(svar16_b); + fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry); + + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = negate(svar16_b); + fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry); + + svar16_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = negate(svar16_b); + fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry); + + + /* extract_h */ + /* int16_t extract_h(int32_t x) */ + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_h(svar32_a); + fprintf(ref_file, "extract_h(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_h(svar32_a); + fprintf(ref_file, "extract_h(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_h(svar32_a); + fprintf(ref_file, "extract_h(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 0x12345678; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_h(svar32_a); + fprintf(ref_file, "extract_h(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + + /* extract_l */ + /* int16_t extract_l(int32_t x) */ + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_l(svar32_a); + fprintf(ref_file, "extract_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_l(svar32_a); + fprintf(ref_file, "extract_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_l(svar32_a); + fprintf(ref_file, "extract_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 0x43218765; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = extract_l(svar32_a); + fprintf(ref_file, "extract_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + + /* round */ + /* int16_t round(int32_t x) */ + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = round(svar32_a); + fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = round(svar32_a); + fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = round(svar32_a); + fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 0x43218765; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = round(svar32_a); + fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 0x87654321; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = round(svar32_a); + fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + + /* L_mac */ + /* int32_t L_mac(int32_t acc, int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + svar32_a = 0x1234; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_mac(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_mac(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_mac(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_mac(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_mac(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_mac(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = -1; + svar32_b = L_mac(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_mac(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + + /* L_msu */ + /* int32_t L_msu(int32_t acc, int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + svar32_a = 0x1234; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_msu(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_msu(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_msu(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_msu(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_msu(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_msu(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_b = L_msu(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "L_msu(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry); + + + /* L_add */ + /* int32_t L_add(int32_t val1, int32_t val2); */ + svar32_a = 1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = -2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x7000; + svar32_b = 0x7000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFF; + svar32_b = 0x8FFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x70000000; + svar32_b = 0x70000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFFFFFF; + svar32_b = 0x8FFFFFFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_add(svar32_a, svar32_b); + fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + /* L_sub */ + /* int32_t L_sub(int32_t val1, int32_t val2); */ + svar32_a = 1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = -2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x7000; + svar32_b = 0xFFFF9000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFF; + svar32_b = 0xFFFF7001; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x70000000; + svar32_b = 0x90000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFFFFFF; + svar32_b = 0x70000001; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0; + svar32_b = 0x80000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_sub(svar32_a, svar32_b); + fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + + /* L_add_c */ + /* int32_t L_add_c(int32_t val1, int32_t val2); */ + test_32_fn_32_32(FN(L_add_c), init_Overflow, init_Carry); + + + + /* L_sub_c */ + /* int32_t L_sub_c(int32_t val1, int32_t val2); */ +#undef MYFN +#define MYFN L_sub_c + svar32_a = 1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = -2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = -1; + svar32_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x7000; + svar32_b = 0x7000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFF; + svar32_b = 0x8FFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x70000000; + svar32_b = 0x70000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x8FFFFFFF; + svar32_b = 0x8FFFFFFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0x1; + svar32_b = 0x80000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFFFFFFF; + svar32_b = 0x7FFFFFFF; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = MYFN(svar32_a, svar32_b); + fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry); + + + /* L_macNs */ + /* int32_t L_macNs(int32_t acc, int16_t x, int16_t y) */ + test_32_fn_32_16_16(FN(L_macNs), init_Overflow, init_Carry); + + /* L_msuNs */ + /* int32_t L_msuNs(int32_t acc, int16_t x, int16_t y) */ + test_32_fn_32_16_16(FN(L_msuNs), init_Overflow, init_Carry); + + + /* negate */ + /* int32_t negate(int32_t x) */ + svar32_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = negate(svar32_b); + fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = negate(svar32_b); + fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = negate(svar32_b); + fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + + /* mult_r */ + /* int16_t mult_r(int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mult_r(svar16_a, svar16_b); + fprintf(ref_file, "mult_r(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mult_r(svar16_a, svar16_b); + fprintf(ref_file, "mult_r(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mult_r(svar16_a, svar16_b); + fprintf(ref_file, "mult_r(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* norm_s */ + /* int16_t norm_s(int32_t x) */ + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_s(svar32_a); + fprintf(ref_file, "norm_s(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_s(svar32_a); + fprintf(ref_file, "norm_s(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_s(svar32_a); + fprintf(ref_file, "norm_s(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 12000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_s(svar32_a); + fprintf(ref_file, "norm_s(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + + /* norm_l */ + /* int16_t norm_l(int16_t x) */ + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_l(svar32_a); + fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_l(svar32_a); + fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_l(svar32_a); + fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 12000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_l(svar32_a); + fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 0x123456; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_l(svar32_a); + fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + svar32_a = 0xABCDEF; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_a = norm_l(svar32_a); + fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry); + + + /* L_shl */ + /* int32_t L_shl(int32_t x, int16_t y) */ + svar32_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 10; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = 20; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = 2; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = 40; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 20; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = -64; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = -64; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shl(svar32_a, svar16_b); + fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + + /* L_shr */ + /* int32_t L_shr(int32_t x, int16_t y) */ + svar32_a = 1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 10; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = -20; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = -40; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 20; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = 64; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = 64; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr(svar32_a, svar16_b); + fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + + /* shr_r */ + /* int16_t shr_r(int16_t x, int16_t y) */ + svar16_a = 1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 10; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = -20; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 20; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0xFFF; + svar16_b = 64; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = shr_r(svar16_a, svar16_b); + fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* mac_r */ + /* int16_t mac_r(int32_t acc, int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + svar32_a = 0x1234; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mac_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mac_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mac_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = -1; + svar16_c = mac_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x244; + svar16_b = 0x522; + svar32_a = 0x123456; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = mac_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* msu_r */ + /* int32_t msu_r(int32_t acc, int16_t x, int16_t y) */ + svar16_a = 2; + svar16_b = 2; + svar32_a = 0x1234; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = msu_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = -1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = msu_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = msu_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x8000; + svar16_b = 0x8000; + svar32_a = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = msu_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 0x321; + svar16_b = 0x243; + svar32_a = 0x123456; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = msu_r(svar32_a, svar16_a, svar16_b); + fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + /* L_deposit_h */ + /* int32_t L_deposit_h(int16_t x) */ + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_h(svar16_b); + fprintf(ref_file, "L_deposit_h(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_h(svar16_b); + fprintf(ref_file, "L_deposit_h(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + svar16_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_h(svar16_b); + fprintf(ref_file, "L_deposit_h(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + svar16_b = 0x1234; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_h(svar16_b); + fprintf(ref_file, "L_deposit_h(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + + /* L_deposit_l */ + /* int32_t L_deposit_l(int16_t x) */ + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_l(svar16_b); + fprintf(ref_file, "L_deposit_l(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_l(svar16_b); + fprintf(ref_file, "L_deposit_l(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + svar16_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_l(svar16_b); + fprintf(ref_file, "L_deposit_l(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + svar16_b = 0x1234; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_deposit_l(svar16_b); + fprintf(ref_file, "L_deposit_l(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry); + + + /* L_shr_r */ + /* int32_t L_shr_r(int32_t x, int16_t y) */ + svar32_a = 1; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 10; + svar16_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = -20; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = -10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = -40; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 20; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0xFFF; + svar16_b = 64; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = 10; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + svar32_a = 0x12345678; + svar16_b = 64; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_c = L_shr_r(svar32_a, svar16_b); + fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry); + + + /* L_abs */ + /* int32_t L_abs(int32_t x) */ + svar32_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_abs(svar32_b); + fprintf(ref_file, "L_abs(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_abs(svar32_b); + fprintf(ref_file, "L_abs(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = 0x80000000; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_abs(svar32_b); + fprintf(ref_file, "L_abs(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + + /* L_sat */ + /* int32_t L_sat(int32_t x) */ + svar32_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_sat(svar32_b); + fprintf(ref_file, "L_sat(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = -1; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_sat(svar32_b); + fprintf(ref_file, "L_sat(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = -32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_sat(svar32_b); + fprintf(ref_file, "L_sat(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + svar32_b = 32768; + Overflow = init_Overflow; + Carry = init_Carry; + svar32_a = L_sat(svar32_b); + fprintf(ref_file, "L_sat(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry); + + + /* div_s */ + /* int16_t div_s(int16_t x, int16_t y) */ + svar16_a = 1; + svar16_b = 1; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = div_s(svar16_a, svar16_b); + fprintf(ref_file, "div_s(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + svar16_a = 10000; + svar16_b = 20000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = div_s(svar16_a, svar16_b); + fprintf(ref_file, "div_s(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + + + svar16_a = 10000; + svar16_b = 20000; + Overflow = init_Overflow; + Carry = init_Carry; + svar16_c = div_s(svar16_a, svar16_b); + fprintf(ref_file, "div_s(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry); + +} + +void exec_dspfns(void) +{ + Overflow = 0; + + exec_dspfns1(0, 0); + exec_dspfns1(0, 1); + exec_dspfns1(1, 0); + exec_dspfns1(1, 1); +} diff --git a/ref_integer.c b/ref_integer.c new file mode 100644 index 0000000..0b6151a --- /dev/null +++ b/ref_integer.c @@ -0,0 +1,279 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <stdio.h> +#include <stdint.h> + +#ifndef __arm__ +#error Target not supported +#endif +#include <dspfns.h> /* For Overflow */ + +extern FILE* ref_file; + +void exec_integer (void) +{ + int i; + uint32_t uvar, ures; + int32_t svar1, svar2, sres; + + uint8_t clz; + + fprintf(ref_file, "\n\nInteger (non-NEON) intrinsics\n"); + + /* __clz */ + /* uint8_t __clz(uint32_t val); */ + uvar = 0xFFFFFFFF; + for(i=0; i<=32; i++) { + clz = __clz(uvar); + fprintf(ref_file, "__clz(%#x) = %d\n", (unsigned int)uvar, clz); + uvar >>= 1; + } + + /* __qadd */ + /* int32_t __qadd(int32_t val1, int32_t val2); */ + svar1 = 1; + svar2 = 2; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = -1; + svar2 = -2; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = -1; + svar2 = 2; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x7000; + svar2 = 0x7000; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x8FFF; + svar2 = 0x8FFF; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x70000000; + svar2 = 0x70000000; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x8FFFFFFF; + svar2 = 0x8FFFFFFF; + Overflow = 0; + sres = __qadd(svar1, svar2); + fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + /* __qdbl */ + /* int32_t __qdbl(int32_t val); */ + svar1 = 1; + Overflow = 0; + sres = __qdbl(svar1); + fprintf(ref_file, "__qdbl(%#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x70000000; + Overflow = 0; + sres = __qdbl(svar1); + fprintf(ref_file, "__qdbl(%#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x8FFFFFFF; + Overflow = 0; + sres = __qdbl(svar1); + fprintf(ref_file, "__qdbl(%#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0xEFFFFFFF; + Overflow = 0; + sres = __qdbl(svar1); + fprintf(ref_file, "__qdbl(%#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + /* __qsub */ + /* int32_t __qsub(int32_t val1, int32_t val2); */ + svar1 = 1; + svar2 = 2; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = -1; + svar2 = -2; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = -1; + svar2 = 2; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x7000; + svar2 = 0xFFFF9000; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x8FFF; + svar2 = 0xFFFF7001; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x70000000; + svar2 = 0x90000000; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0x8FFFFFFF; + svar2 = 0x70000001; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + svar1 = 0; + svar2 = 0x80000000; + Overflow = 0; + sres = __qsub(svar1, svar2); + fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow); + + /* __rbit */ + /* uint32_t __rbit(uint32_t val); */ + uvar = 0x12345678; + ures = __rbit(uvar); + fprintf(ref_file, "__rbit(%#x) = %#x\n", + (unsigned int)uvar, (unsigned int)ures); + + /* __rev */ + /* uint32_t __rev(uint32_t val); */ + uvar = 0x12345678; + ures = __rev(uvar); + fprintf(ref_file, "__rev(%#x) = %#x\n", + (unsigned int)uvar, (unsigned int)ures); + + /* __ssat */ + /* int32_t __ssat(int32_t val, uint32_t sat); */ + svar1 = 0x12345678; + Overflow = 0; + sres = __ssat(svar1, 30); + fprintf(ref_file, "__ssat(%#x, 30) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x12345678; + Overflow = 0; + sres = __ssat(svar1, 19); + fprintf(ref_file, "__ssat(%#x, 19) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __ssat(svar1, 29); + fprintf(ref_file, "__ssat(%#x, 29) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __ssat(svar1, 12); + fprintf(ref_file, "__ssat(%#x, 12) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __ssat(svar1, 32); + fprintf(ref_file, "__ssat(%#x, 32) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __ssat(svar1, 1); + fprintf(ref_file, "__ssat(%#x, 1) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + /* __usat */ + /* int32_t __usat(uint32_t val, uint32_t sat); */ + svar1 = 0x12345678; + Overflow = 0; + sres = __usat(svar1, 30); + fprintf(ref_file, "__usat(%#x, 30) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x12345678; + Overflow = 0; + sres = __usat(svar1, 19); + fprintf(ref_file, "__usat(%#x, 19) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __usat(svar1, 29); + fprintf(ref_file, "__usat(%#x, 29) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __usat(svar1, 12); + fprintf(ref_file, "__usat(%#x, 12) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __usat(svar1, 31); + fprintf(ref_file, "__usat(%#x, 31) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); + + svar1 = 0x87654321; + Overflow = 0; + sres = __usat(svar1, 0); + fprintf(ref_file, "__usat(%#x, 0) = %#x sat %d\n", + (unsigned int)svar1, (unsigned int)sres, Overflow); +} diff --git a/ref_v_binary_op.c b/ref_v_binary_op.c new file mode 100644 index 0000000..c09e710 --- /dev/null +++ b/ref_v_binary_op.c @@ -0,0 +1,88 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* Template file for binary operator validation */ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x,x), then store the result. */ +#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Fill input vector2 with arbitrary values */ + TEST_VDUP(vector2, , int, s, 8, 8, 2); + TEST_VDUP(vector2, , int, s, 16, 4, -4); + TEST_VDUP(vector2, , int, s, 32, 2, 3); + TEST_VDUP(vector2, , int, s, 64, 1, 100); + TEST_VDUP(vector2, , uint, u, 8, 8, 20); + TEST_VDUP(vector2, , uint, u, 16, 4, 30); + TEST_VDUP(vector2, , uint, u, 32, 2, 40); + TEST_VDUP(vector2, , uint, u, 64, 1, 2); + TEST_VDUP(vector2, q, int, s, 8, 16, -10); + TEST_VDUP(vector2, q, int, s, 16, 8, -20); + TEST_VDUP(vector2, q, int, s, 32, 4, -30); + TEST_VDUP(vector2, q, int, s, 64, 2, 24); + TEST_VDUP(vector2, q, uint, u, 8, 16, 12); + TEST_VDUP(vector2, q, uint, u, 16, 8, 3); + TEST_VDUP(vector2, q, uint, u, 32, 4, 55); + TEST_VDUP(vector2, q, uint, u, 64, 2, 3); + + /* Apply a binary operator named INSN_NAME */ + TEST_MACRO_ALL_VARIANTS_1_5(TEST_BINARY_OP, INSN_NAME); + + dump_results_hex (TEST_MSG); + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} diff --git a/ref_v_binary_sat_op.c b/ref_v_binary_sat_op.c new file mode 100644 index 0000000..5608104 --- /dev/null +++ b/ref_v_binary_sat_op.c @@ -0,0 +1,108 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* Template file for binary saturating operator validation */ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = OP(vector1,vector2), then store the result. */ +#define TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W)) + +#define TEST_BINARY_SAT_OP(INSN, Q, T1, T2, W, N) \ + TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector1); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector1" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector1, buffer); + + /* Choose arbitrary initialization values */ + TEST_VDUP(vector2, , int, s, 8, 8, 0x11); + TEST_VDUP(vector2, , int, s, 16, 4, 0x22); + TEST_VDUP(vector2, , int, s, 32, 2, 0x33); + TEST_VDUP(vector2, , int, s, 64, 1, 0x44); + TEST_VDUP(vector2, , uint, u, 8, 8, 0x55); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x66); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x77); + TEST_VDUP(vector2, , uint, u, 64, 1, 0x88); + + TEST_VDUP(vector2, q, int, s, 8, 16, 0x11); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x22); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x33); + TEST_VDUP(vector2, q, int, s, 64, 2, 0x44); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0x55); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0x66); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0x77); + TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88); + + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + dump_results_hex (TEST_MSG); + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} diff --git a/ref_v_comp_f_op.c b/ref_v_comp_f_op.c new file mode 100644 index 0000000..9ec7e32 --- /dev/null +++ b/ref_v_comp_f_op.c @@ -0,0 +1,87 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + int i; + + /* Basic test: y=vcomp(x,x), then store the result. */ +#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) \ + VECT_VAR(vector_res, T3, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vector_res, T3, W, N)) + +#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N) \ + TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector2, float, 32, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + + clean_results (); + + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose init value arbitrarily, will be used for vector + comparison */ + TEST_VDUP(vector2, , float, f, 32, 2, -16.0); + TEST_VDUP(vector2, q, float, f, 32, 4, -14.0); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + + TEST_VDUP(vector2, , float, f, 32, 2, -10.0); + TEST_VDUP(vector2, q, float, f, 32, 4, 10.0); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); +} diff --git a/ref_v_comp_op.c b/ref_v_comp_op.c new file mode 100644 index 0000000..6def17d --- /dev/null +++ b/ref_v_comp_op.c @@ -0,0 +1,178 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + int i; + + /* Basic test: y=vcomp(x,x), then store the result. */ +#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) \ + VECT_VAR(vector_res, T3, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vector_res, T3, W, N)) + +#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N) \ + TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + /* No need for 64 bits elements */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, uint, 8, 16); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector2, int, 8, 8); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 8, 8); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector2, int, 8, 16); + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + DECL_VARIABLE(vector2, uint, 8, 16); + DECL_VARIABLE(vector2, uint, 16, 8); + DECL_VARIABLE(vector2, uint, 32, 4); + DECL_VARIABLE(vector2, float, 32, 4); + + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 16); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + + clean_results (); + + /* There is no 64 bits variant, we can't use the generic initializer */ + TEST_VLOAD(vector, buffer, , int, s, 8, 8); + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, , uint, u, 8, 8); + TEST_VLOAD(vector, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + + TEST_VLOAD(vector, buffer, q, int, s, 8, 16); + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 8, 16); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose init value arbitrarily, will be used for vector + comparison */ + TEST_VDUP(vector2, , int, s, 8, 8, -10); + TEST_VDUP(vector2, , int, s, 16, 4, -14); + TEST_VDUP(vector2, , int, s, 32, 2, -16); + TEST_VDUP(vector2, , uint, u, 8, 8, 0xF3); + TEST_VDUP(vector2, , uint, u, 16, 4, 0xFFF2); + TEST_VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF1); + TEST_VDUP(vector2, , float, f, 32, 2, -15.0); + + TEST_VDUP(vector2, q, int, s, 8, 16, -4); + TEST_VDUP(vector2, q, int, s, 16, 8, -10); + TEST_VDUP(vector2, q, int, s, 32, 4, -14); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0xF4); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0xFFF6); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFF2); + TEST_VDUP(vector2, q, float, f, 32, 4, -14.0); + + /* The same result buffers are used multiple times, so output them + before overwriting them */ + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + TEST_VCOMP(INSN_NAME, , int, s, uint, 8, 8); + TEST_VCOMP(INSN_NAME, , int, s, uint, 16, 4); + TEST_VCOMP(INSN_NAME, , int, s, uint, 32, 2); + DUMP(TEST_MSG, uint, 8, 8, PRIx8); + DUMP(TEST_MSG, uint, 16, 4, PRIx16); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + TEST_VCOMP(INSN_NAME, , uint, u, uint, 8, 8); + TEST_VCOMP(INSN_NAME, , uint, u, uint, 16, 4); + TEST_VCOMP(INSN_NAME, , uint, u, uint, 32, 2); + DUMP(TEST_MSG, uint, 8, 8, PRIx8); + DUMP(TEST_MSG, uint, 16, 4, PRIx16); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + TEST_VCOMP(INSN_NAME, q, int, s, uint, 8, 16); + TEST_VCOMP(INSN_NAME, q, int, s, uint, 16, 8); + TEST_VCOMP(INSN_NAME, q, int, s, uint, 32, 4); + DUMP(TEST_MSG, uint, 8, 16, PRIx8); + DUMP(TEST_MSG, uint, 16, 8, PRIx16); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + + TEST_VCOMP(INSN_NAME, q, uint, u, uint, 8, 16); + TEST_VCOMP(INSN_NAME, q, uint, u, uint, 16, 8); + TEST_VCOMP(INSN_NAME, q, uint, u, uint, 32, 4); + DUMP(TEST_MSG, uint, 8, 16, PRIx8); + DUMP(TEST_MSG, uint, 16, 8, PRIx16); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + + /* Extra tests to have 100% coverage on all the variants */ + TEST_VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0); + TEST_VCOMP(INSN_NAME, , uint, u, uint, 32, 2); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + TEST_VDUP(vector2, , int, s, 32, 2, -15); + TEST_VCOMP(INSN_NAME, , int, s, uint, 32, 2); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + + TEST_VDUP(vector2, , float, f, 32, 2, -16.0); + TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); +} diff --git a/ref_v_unary_op.c b/ref_v_unary_op.c new file mode 100644 index 0000000..63d68e7 --- /dev/null +++ b/ref_v_unary_op.c @@ -0,0 +1,91 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* Template file for unary operator validation */ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_VLOAD(vector, buffer, , int, s, 8, 8); + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, q, int, s, 8, 16); + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + + dump_results_hex (TEST_MSG); + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} diff --git a/ref_v_unary_sat_op.c b/ref_v_unary_sat_op.c new file mode 100644 index 0000000..e854068 --- /dev/null +++ b/ref_v_unary_sat_op.c @@ -0,0 +1,95 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* Template file for unary saturating operator validation */ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W)) + +#define TEST_UNARY_SAT_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_VLOAD(vector, buffer, , int, s, 8, 8); + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, q, int, s, 8, 16); + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4); + + dump_results_hex (TEST_MSG); + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} diff --git a/ref_vaba.c b/ref_vaba.c new file mode 100644 index 0000000..bcbd68f --- /dev/null +++ b/ref_vaba.c @@ -0,0 +1,125 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VABA/VABAQ" +void exec_vaba (void) +{ + /* Basic test: v4=vaba(v1,v2,v3), then store the result. */ +#define TEST_VABA(Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vaba##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + VECT_VAR(vector3, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ +#define DECL_VABA_VAR(VAR) \ + DECL_VARIABLE(VAR, int, 8, 8); \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 8, 8); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, int, 8, 16); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 8, 16); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4) + + DECL_VABA_VAR(vector1); + DECL_VABA_VAR(vector2); + DECL_VABA_VAR(vector3); + DECL_VABA_VAR(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_VLOAD(vector1, buffer, , int, s, 8, 8); + TEST_VLOAD(vector1, buffer, , int, s, 16, 4); + TEST_VLOAD(vector1, buffer, , int, s, 32, 2); + TEST_VLOAD(vector1, buffer, , uint, u, 8, 8); + TEST_VLOAD(vector1, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector1, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector1, buffer, q, int, s, 8, 16); + TEST_VLOAD(vector1, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector1, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector1, buffer, q, uint, u, 8, 16); + TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4); + + + /* Choose init value arbitrarily */ + TEST_VDUP(vector2, , int, s, 8, 8, 1); + TEST_VDUP(vector2, , int, s, 16, 4, -13); + TEST_VDUP(vector2, , int, s, 32, 2, 8); + TEST_VDUP(vector2, , uint, u, 8, 8, 1); + TEST_VDUP(vector2, , uint, u, 16, 4, 13); + TEST_VDUP(vector2, , uint, u, 32, 2, 8); + TEST_VDUP(vector2, q, int, s, 8, 16, 10); + TEST_VDUP(vector2, q, int, s, 16, 8, -12); + TEST_VDUP(vector2, q, int, s, 32, 4, 32); + TEST_VDUP(vector2, q, uint, u, 8, 16, 10); + TEST_VDUP(vector2, q, uint, u, 16, 8, 12); + TEST_VDUP(vector2, q, uint, u, 32, 4, 32); + + /* Choose init value arbitrarily */ + TEST_VDUP(vector3, , int, s, 8, 8, -5); + TEST_VDUP(vector3, , int, s, 16, 4, 25); + TEST_VDUP(vector3, , int, s, 32, 2, -40); + TEST_VDUP(vector3, , uint, u, 8, 8, 100); + TEST_VDUP(vector3, , uint, u, 16, 4, 2340); + TEST_VDUP(vector3, , uint, u, 32, 2, 0xffffffff); + TEST_VDUP(vector3, q, int, s, 8, 16, -100); + TEST_VDUP(vector3, q, int, s, 16, 8, -3000); + TEST_VDUP(vector3, q, int, s, 32, 4, 10000); + TEST_VDUP(vector3, q, uint, u, 8, 16, 2); + TEST_VDUP(vector3, q, uint, u, 16, 8, 3); + TEST_VDUP(vector3, q, uint, u, 32, 4, 4); + + TEST_VABA(, int, s, 8, 8); + TEST_VABA(, int, s, 16, 4); + TEST_VABA(, int, s, 32, 2); + TEST_VABA(, uint, u, 8, 8); + TEST_VABA(, uint, u, 16, 4); + TEST_VABA(, uint, u, 32, 2); + TEST_VABA(q, int, s, 8, 16); + TEST_VABA(q, int, s, 16, 8); + TEST_VABA(q, int, s, 32, 4); + TEST_VABA(q, uint, u, 8, 16); + TEST_VABA(q, uint, u, 16, 8); + TEST_VABA(q, uint, u, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vabal.c b/ref_vabal.c new file mode 100644 index 0000000..274901b --- /dev/null +++ b/ref_vabal.c @@ -0,0 +1,103 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VABAL" +void exec_vabal (void) +{ + /* Basic test: v4=vabal(v1,v2,v3), then store the result. */ +#define TEST_VABAL(T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vabal_##T2##W(VECT_VAR(vector1, T1, W2, N), \ + VECT_VAR(vector2, T1, W, N), \ + VECT_VAR(vector3, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ +#define DECL_VABAL_VAR_LONG(VAR) \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, int, 64, 2); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, uint, 64, 2) + +#define DECL_VABAL_VAR_SHORT(VAR) \ + DECL_VARIABLE(VAR, int, 8, 8); \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 8, 8); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2) + + DECL_VABAL_VAR_LONG(vector1); + DECL_VABAL_VAR_SHORT(vector2); + DECL_VABAL_VAR_SHORT(vector3); + DECL_VABAL_VAR_LONG(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_VLOAD(vector1, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector1, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector1, buffer, q, int, s, 64, 2); + TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector1, buffer, q, uint, u, 64, 2); + + + /* Choose init value arbitrarily */ + TEST_VDUP(vector2, , int, s, 8, 8, 1); + TEST_VDUP(vector2, , int, s, 16, 4, -13); + TEST_VDUP(vector2, , int, s, 32, 2, 8); + TEST_VDUP(vector2, , uint, u, 8, 8, 1); + TEST_VDUP(vector2, , uint, u, 16, 4, 13); + TEST_VDUP(vector2, , uint, u, 32, 2, 8); + + /* Choose init value arbitrarily */ + TEST_VDUP(vector3, , int, s, 8, 8, -5); + TEST_VDUP(vector3, , int, s, 16, 4, 25); + TEST_VDUP(vector3, , int, s, 32, 2, -40); + TEST_VDUP(vector3, , uint, u, 8, 8, 100); + TEST_VDUP(vector3, , uint, u, 16, 4, 2340); + TEST_VDUP(vector3, , uint, u, 32, 2, 0xffffffff); + + TEST_VABAL(int, s, 8, 16, 8); + TEST_VABAL(int, s, 16, 32, 4); + TEST_VABAL(int, s, 32, 64, 2); + TEST_VABAL(uint, u, 8, 16, 8); + TEST_VABAL(uint, u, 16, 32, 4); + TEST_VABAL(uint, u, 32, 64, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vabd.c b/ref_vabd.c new file mode 100644 index 0000000..5b1420c --- /dev/null +++ b/ref_vabd.c @@ -0,0 +1,116 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VABD/VABDQ" +void exec_vabd (void) +{ + /* Basic test: v4=vabd(v1,v2), then store the result. */ +#define TEST_VABD(Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vabd##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ +#define DECL_VABD_VAR(VAR) \ + DECL_VARIABLE(VAR, int, 8, 8); \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 8, 8); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2); \ + DECL_VARIABLE(VAR, int, 8, 16); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 8, 16); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + DECL_VABD_VAR(vector1); + DECL_VABD_VAR(vector2); + DECL_VABD_VAR(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_VLOAD(vector1, buffer, , int, s, 8, 8); + TEST_VLOAD(vector1, buffer, , int, s, 16, 4); + TEST_VLOAD(vector1, buffer, , int, s, 32, 2); + TEST_VLOAD(vector1, buffer, , uint, u, 8, 8); + TEST_VLOAD(vector1, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector1, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector1, buffer, , float, f, 32, 2); + TEST_VLOAD(vector1, buffer, q, int, s, 8, 16); + TEST_VLOAD(vector1, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector1, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector1, buffer, q, uint, u, 8, 16); + TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector1, buffer, q, float, f, 32, 4); + + /* Choose init value arbitrarily */ + TEST_VDUP(vector2, , int, s, 8, 8, 1); + TEST_VDUP(vector2, , int, s, 16, 4, -13); + TEST_VDUP(vector2, , int, s, 32, 2, 8); + TEST_VDUP(vector2, , uint, u, 8, 8, 1); + TEST_VDUP(vector2, , uint, u, 16, 4, 13); + TEST_VDUP(vector2, , uint, u, 32, 2, 8); + TEST_VDUP(vector2, , float, f, 32, 2, 8.3); + TEST_VDUP(vector2, q, int, s, 8, 16, 10); + TEST_VDUP(vector2, q, int, s, 16, 8, -12); + TEST_VDUP(vector2, q, int, s, 32, 4, 32); + TEST_VDUP(vector2, q, uint, u, 8, 16, 10); + TEST_VDUP(vector2, q, uint, u, 16, 8, 12); + TEST_VDUP(vector2, q, uint, u, 32, 4, 32); + TEST_VDUP(vector2, q, float, f, 32, 4, 32.12); + + TEST_VABD(, int, s, 8, 8); + TEST_VABD(, int, s, 16, 4); + TEST_VABD(, int, s, 32, 2); + TEST_VABD(, uint, u, 8, 8); + TEST_VABD(, uint, u, 16, 4); + TEST_VABD(, uint, u, 32, 2); + TEST_VABD(, float, f, 32, 2); + TEST_VABD(q, int, s, 8, 16); + TEST_VABD(q, int, s, 16, 8); + TEST_VABD(q, int, s, 32, 4); + TEST_VABD(q, uint, u, 8, 16); + TEST_VABD(q, uint, u, 16, 8); + TEST_VABD(q, uint, u, 32, 4); + TEST_VABD(q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vabdl.c b/ref_vabdl.c new file mode 100644 index 0000000..a5d0d7f --- /dev/null +++ b/ref_vabdl.c @@ -0,0 +1,93 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VABDL" +void exec_vabdl (void) +{ + /* Basic test: v4=vabdl(v1,v2), then store the result. */ +#define TEST_VABDL(T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vabdl_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ +#define DECL_VABDL_VAR_LONG(VAR) \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, int, 64, 2); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, uint, 64, 2) + +#define DECL_VABDL_VAR_SHORT(VAR) \ + DECL_VARIABLE(VAR, int, 8, 8); \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 8, 8); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2) + + DECL_VABDL_VAR_SHORT(vector1); + DECL_VABDL_VAR_SHORT(vector2); + DECL_VABDL_VAR_LONG(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_VLOAD(vector1, buffer, , int, s, 8, 8); + TEST_VLOAD(vector1, buffer, , int, s, 16, 4); + TEST_VLOAD(vector1, buffer, , int, s, 32, 2); + TEST_VLOAD(vector1, buffer, , uint, u, 8, 8); + TEST_VLOAD(vector1, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector1, buffer, , uint, u, 32, 2); + + + /* Choose init value arbitrarily */ + TEST_VDUP(vector2, , int, s, 8, 8, 1); + TEST_VDUP(vector2, , int, s, 16, 4, -13); + TEST_VDUP(vector2, , int, s, 32, 2, 8); + TEST_VDUP(vector2, , uint, u, 8, 8, 1); + TEST_VDUP(vector2, , uint, u, 16, 4, 13); + TEST_VDUP(vector2, , uint, u, 32, 2, 8); + + TEST_VABDL(int, s, 8, 16, 8); + TEST_VABDL(int, s, 16, 32, 4); + TEST_VABDL(int, s, 32, 64, 2); + TEST_VABDL(uint, u, 8, 16, 8); + TEST_VABDL(uint, u, 16, 32, 4); + TEST_VABDL(uint, u, 32, 64, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vabs.c b/ref_vabs.c new file mode 100644 index 0000000..d8f1e2d --- /dev/null +++ b/ref_vabs.c @@ -0,0 +1,54 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vabs +#define TEST_MSG "VABS/VABSQ" + +/* Extra tests for functions requiring floating-point types */ +void exec_vabs_f32(void); +#define EXTRA_TESTS exec_vabs_f32 + +#include "ref_v_unary_op.c" + +void exec_vabs_f32(void) +{ + int i; + + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + TEST_VDUP(vector, , float, f, 32, 2, -2.3); + TEST_VDUP(vector, q, float, f, 32, 4, 3.4); + + TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4); + + fprintf(ref_file, "\nfloat32:\n"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vadd.c b/ref_vadd.c new file mode 100644 index 0000000..29d96c6 --- /dev/null +++ b/ref_vadd.c @@ -0,0 +1,60 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vadd +#define TEST_MSG "VADD/VADDQ" + +/* Extra tests for functions requiring floating-point types */ +void exec_vadd_f32(void); +#define EXTRA_TESTS exec_vadd_f32 + +#include "ref_v_binary_op.c" + +void exec_vadd_f32(void) +{ + int i; + + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector2, float, 32, 4); + + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + TEST_VDUP(vector, , float, f, 32, 2, 2.3); + TEST_VDUP(vector, q, float, f, 32, 4, 3.4); + + TEST_VDUP(vector2, , float, f, 32, 2, 4.5); + TEST_VDUP(vector2, q, float, f, 32, 4, 5.6); + + TEST_BINARY_OP(INSN_NAME, , float, f, 32, 2); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + + fprintf(ref_file, "\nfloat32:\n"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vaddhn.c b/ref_vaddhn.c new file mode 100644 index 0000000..284396a --- /dev/null +++ b/ref_vaddhn.c @@ -0,0 +1,86 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#include <stdint.h> + +#ifndef INSN_NAME +#define INSN_NAME vaddhn +#define TEST_MSG "VADDHN" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: vec64=vaddhn(vec128_a, vec128_b), then store the result. */ +#define TEST_VADDHN1(INSN, T1, T2, W, W2, N) \ + VECT_VAR(vector64, T1, W2, N) = INSN##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N)) + +#define TEST_VADDHN(INSN, T1, T2, W, W2, N) \ + TEST_VADDHN1(INSN, T1, T2, W, W2, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector1); + DECL_VARIABLE_128BITS_VARIANTS(vector2); + + clean_results (); + + /* Fill input vector1 and vector2 with arbitrary values */ + TEST_VDUP(vector1, q, int, s, 16, 8, 50*(UINT8_MAX+1)); + TEST_VDUP(vector1, q, int, s, 32, 4, 50*(UINT16_MAX+1)); + TEST_VDUP(vector1, q, int, s, 64, 2, 24*((uint64_t)UINT32_MAX+1)); + TEST_VDUP(vector1, q, uint, u, 16, 8, 3*(UINT8_MAX+1)); + TEST_VDUP(vector1, q, uint, u, 32, 4, 55*(UINT16_MAX+1)); + TEST_VDUP(vector1, q, uint, u, 64, 2, 3*((uint64_t)UINT32_MAX+1)); + + TEST_VDUP(vector2, q, int, s, 16, 8, (uint16_t)UINT8_MAX); + TEST_VDUP(vector2, q, int, s, 32, 4, (uint32_t)UINT16_MAX); + TEST_VDUP(vector2, q, int, s, 64, 2, (uint64_t)UINT32_MAX); + TEST_VDUP(vector2, q, uint, u, 16, 8, (uint16_t)UINT8_MAX); + TEST_VDUP(vector2, q, uint, u, 32, 4, (uint32_t)UINT16_MAX); + TEST_VDUP(vector2, q, uint, u, 64, 2, (uint64_t)UINT32_MAX); + + TEST_VADDHN(INSN_NAME, int, s, 16, 8, 8); + TEST_VADDHN(INSN_NAME, int, s, 32, 16, 4); + TEST_VADDHN(INSN_NAME, int, s, 64, 32, 2); + TEST_VADDHN(INSN_NAME, uint, u, 16, 8, 8); + TEST_VADDHN(INSN_NAME, uint, u, 32, 16, 4); + TEST_VADDHN(INSN_NAME, uint, u, 64, 32, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vaddl.c b/ref_vaddl.c new file mode 100644 index 0000000..74fce77 --- /dev/null +++ b/ref_vaddl.c @@ -0,0 +1,104 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vaddl +#define TEST_MSG "VADDL" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=vaddl(x,x), then store the result. */ +#define TEST_VADDL1(INSN, T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + +#define TEST_VADDL(INSN, T1, T2, W, W2, N) \ + TEST_VADDL1(INSN, T1, T2, W, W2, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + + DECL_VARIABLE(vector2, int, 8, 8); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 8, 8); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_VLOAD(vector, buffer, , int, s, 8, 8); + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, , uint, u, 8, 8); + TEST_VLOAD(vector, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector, buffer, , uint, u, 32, 2); + + /* Choose init value arbitrarily */ + TEST_VDUP(vector2, , int, s, 8, 8, -13); + TEST_VDUP(vector2, , int, s, 16, 4, -14); + TEST_VDUP(vector2, , int, s, 32, 2, -16); + TEST_VDUP(vector2, , uint, u, 8, 8, 0xf3); + TEST_VDUP(vector2, , uint, u, 16, 4, 0xfff1); + TEST_VDUP(vector2, , uint, u, 32, 2, 0xfffffff0); + + TEST_VADDL(INSN_NAME, int, s, 8, 16, 8); + TEST_VADDL(INSN_NAME, int, s, 16, 32, 4); + TEST_VADDL(INSN_NAME, int, s, 32, 64, 2); + TEST_VADDL(INSN_NAME, uint, u, 8, 16, 8); + TEST_VADDL(INSN_NAME, uint, u, 16, 32, 4); + TEST_VADDL(INSN_NAME, uint, u, 32, 64, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vaddw.c b/ref_vaddw.c new file mode 100644 index 0000000..324e843 --- /dev/null +++ b/ref_vaddw.c @@ -0,0 +1,104 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vaddw +#define TEST_MSG "VADDW" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=vaddw(x,x), then store the result. */ +#define TEST_VADDW1(INSN, T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W2, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + +#define TEST_VADDW(INSN, T1, T2, W, W2, N) \ + TEST_VADDW1(INSN, T1, T2, W, W2, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector2, int, 8, 8); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 8, 8); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, int, s, 64, 2); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Choose init value arbitrarily */ + TEST_VDUP(vector2, , int, s, 8, 8, -13); + TEST_VDUP(vector2, , int, s, 16, 4, -14); + TEST_VDUP(vector2, , int, s, 32, 2, -16); + TEST_VDUP(vector2, , uint, u, 8, 8, 0xf3); + TEST_VDUP(vector2, , uint, u, 16, 4, 0xfff1); + TEST_VDUP(vector2, , uint, u, 32, 2, 0xfffffff0); + + TEST_VADDW(INSN_NAME, int, s, 8, 16, 8); + TEST_VADDW(INSN_NAME, int, s, 16, 32, 4); + TEST_VADDW(INSN_NAME, int, s, 32, 64, 2); + TEST_VADDW(INSN_NAME, uint, u, 8, 16, 8); + TEST_VADDW(INSN_NAME, uint, u, 16, 32, 4); + TEST_VADDW(INSN_NAME, uint, u, 32, 64, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vand.c b/ref_vand.c new file mode 100644 index 0000000..57e9013 --- /dev/null +++ b/ref_vand.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vand +#define TEST_MSG "VAND/VANDQ" + +#include "ref_v_binary_op.c" diff --git a/ref_vbic.c b/ref_vbic.c new file mode 100644 index 0000000..0d033ac --- /dev/null +++ b/ref_vbic.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vbic +#define TEST_MSG "VBIC/VBICQ" + +#include "ref_v_binary_op.c" diff --git a/ref_vbsl.c b/ref_vbsl.c new file mode 100644 index 0000000..3bd8933 --- /dev/null +++ b/ref_vbsl.c @@ -0,0 +1,96 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VBSL/VBSLQ" +void exec_vbsl (void) +{ + /* Basic test: y=vbsl(unsigned_vec,x,x), then store the result. */ +#define TEST_VBSL(T3, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vbsl##Q##_##T2##W(VECT_VAR(vector_first, T3, W, N), \ + VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + DECL_VARIABLE_UNSIGNED_VARIANTS(vector_first); + + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose init value arbitrarily, will be used for vector + comparison. As we want different values for each type variant, we + can't use generic initialization macros. */ + TEST_VDUP(vector2, , int, s, 8, 8, -10); + TEST_VDUP(vector2, , int, s, 16, 4, -14); + TEST_VDUP(vector2, , int, s, 32, 2, -30); + TEST_VDUP(vector2, , int, s, 64, 1, -33); + TEST_VDUP(vector2, , uint, u, 8, 8, 0xF3); + TEST_VDUP(vector2, , uint, u, 16, 4, 0xFFF2); + TEST_VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0); + TEST_VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3); + TEST_VDUP(vector2, , float, f, 32, 2, -30.3); + + TEST_VDUP(vector2, q, int, s, 8, 16, -10); + TEST_VDUP(vector2, q, int, s, 16, 8, -14); + TEST_VDUP(vector2, q, int, s, 32, 4, -30); + TEST_VDUP(vector2, q, int, s, 64, 2, -33); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0xF3); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0xFFF2); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFF0); + TEST_VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3); + TEST_VDUP(vector2, q, float, f, 32, 4, -30.4); + + TEST_VDUP(vector_first, , uint, u, 8, 8, 0xF4); + TEST_VDUP(vector_first, , uint, u, 16, 4, 0xFFF6); + TEST_VDUP(vector_first, , uint, u, 32, 2, 0xFFFFFFF2); + TEST_VDUP(vector_first, , uint, u, 64, 1, 0xFFFFFFF2); + TEST_VDUP(vector_first, q, uint, u, 8, 16, 0xF4); + TEST_VDUP(vector_first, q, uint, u, 16, 8, 0xFFF6); + TEST_VDUP(vector_first, q, uint, u, 32, 4, 0xFFFFFFF2); + TEST_VDUP(vector_first, q, uint, u, 64, 2, 0xFFFFFFF2); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VBSL, uint); + TEST_VBSL(uint, , float, f, 32, 2); + TEST_VBSL(uint, q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vcage.c b/ref_vcage.c new file mode 100644 index 0000000..b9919f9 --- /dev/null +++ b/ref_vcage.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcage +#define TEST_MSG "VCAGE/VCAGEQ" + +#include "ref_v_comp_f_op.c" diff --git a/ref_vcagt.c b/ref_vcagt.c new file mode 100644 index 0000000..edb6fa0 --- /dev/null +++ b/ref_vcagt.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcagt +#define TEST_MSG "VCAGT/VCAGTQ" + +#include "ref_v_comp_f_op.c" diff --git a/ref_vcale.c b/ref_vcale.c new file mode 100644 index 0000000..b221f79 --- /dev/null +++ b/ref_vcale.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcale +#define TEST_MSG "VCALE/VCALEQ" + +#include "ref_v_comp_f_op.c" diff --git a/ref_vcalt.c b/ref_vcalt.c new file mode 100644 index 0000000..189a9ae --- /dev/null +++ b/ref_vcalt.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcalt +#define TEST_MSG "VCALT/VCALTQ" + +#include "ref_v_comp_f_op.c" diff --git a/ref_vceq.c b/ref_vceq.c new file mode 100644 index 0000000..100f201 --- /dev/null +++ b/ref_vceq.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vceq +#define TEST_MSG "VCEQ/VCEQQ" + +#include "ref_v_comp_op.c" diff --git a/ref_vcge.c b/ref_vcge.c new file mode 100644 index 0000000..9e1fdea --- /dev/null +++ b/ref_vcge.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcge +#define TEST_MSG "VCGE/VCGEQ" + +#include "ref_v_comp_op.c" diff --git a/ref_vcgt.c b/ref_vcgt.c new file mode 100644 index 0000000..afd8fe5 --- /dev/null +++ b/ref_vcgt.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcgt +#define TEST_MSG "VCGT/VCGTQ" + +#include "ref_v_comp_op.c" diff --git a/ref_vcle.c b/ref_vcle.c new file mode 100644 index 0000000..3da8d1a --- /dev/null +++ b/ref_vcle.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vcle +#define TEST_MSG "VCLE/VCLEQ" + +#include "ref_v_comp_op.c" diff --git a/ref_vcls.c b/ref_vcls.c new file mode 100644 index 0000000..5c96097 --- /dev/null +++ b/ref_vcls.c @@ -0,0 +1,107 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vcls +#define TEST_MSG "VCLS/VCLSQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + /* Fill input vector with arbitrary values */ + TEST_VDUP(vector, , int, s, 8, 8, 0x1); + TEST_VDUP(vector, , int, s, 16, 4, 0x1234); + TEST_VDUP(vector, , int, s, 32, 2, 0x34); + TEST_VDUP(vector, q, int, s, 8, 16, 0); + TEST_VDUP(vector, q, int, s, 16, 8, 0x1234); + TEST_VDUP(vector, q, int, s, 32, 4, 0x678); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + + dump_results_hex2 (TEST_MSG, " (positive input)"); + + + /* Fill input vector with arbitrary values (negative) */ + TEST_VDUP(vector, , int, s, 8, 8, 0xFF); + TEST_VDUP(vector, , int, s, 16, 4, 0xC234); + TEST_VDUP(vector, , int, s, 32, 2, 0xDEAD0034); + TEST_VDUP(vector, q, int, s, 8, 16, 0x80); + TEST_VDUP(vector, q, int, s, 16, 8, 0xE234); + TEST_VDUP(vector, q, int, s, 32, 4, 0xBEEF0678); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + + dump_results_hex2 (TEST_MSG, " (positive input)"); +} diff --git a/ref_vclt.c b/ref_vclt.c new file mode 100644 index 0000000..ce974d9 --- /dev/null +++ b/ref_vclt.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vclt +#define TEST_MSG "VCLT/VCLTQ" + +#include "ref_v_comp_op.c" diff --git a/ref_vclz.c b/ref_vclz.c new file mode 100644 index 0000000..ff57226 --- /dev/null +++ b/ref_vclz.c @@ -0,0 +1,112 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vclz +#define TEST_MSG "VCLZ/VCLZQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, uint, 8, 16); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, uint, 8, 16); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + + clean_results (); + + /* Fill input vector with arbitrary values */ + TEST_VDUP(vector, , int, s, 8, 8, 0x84); + TEST_VDUP(vector, , int, s, 16, 4, 0x1234); + TEST_VDUP(vector, , int, s, 32, 2, 0x5678); + TEST_VDUP(vector, , uint, u, 8, 8, 0x34); + TEST_VDUP(vector, , uint, u, 16, 4, 0x8234); + TEST_VDUP(vector, , uint, u, 32, 2, 0x7654321); + TEST_VDUP(vector, q, int, s, 8, 16, 0x34); + TEST_VDUP(vector, q, int, s, 16, 8, 0x1234); + TEST_VDUP(vector, q, int, s, 32, 4, 0x12345678); + TEST_VDUP(vector, q, uint, u, 8, 16, 0x13); + TEST_VDUP(vector, q, uint, u, 16, 8, 0x4); + TEST_VDUP(vector, q, uint, u, 32, 4, 0x1); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8); + TEST_UNARY_OP(INSN_NAME, , uint, u, 16, 4); + TEST_UNARY_OP(INSN_NAME, , uint, u, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vcnt.c b/ref_vcnt.c new file mode 100644 index 0000000..c680620 --- /dev/null +++ b/ref_vcnt.c @@ -0,0 +1,80 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vcnt +#define TEST_MSG "VCNT/VCNTQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, uint, 8, 16); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, uint, 8, 16); + + clean_results (); + + /* Fill input vector with arbitrary values */ + TEST_VDUP(vector, , int, s, 8, 8, 0xFF); + TEST_VDUP(vector, , uint, u, 8, 8, 0x34); + TEST_VDUP(vector, q, int, s, 8, 16, 0); + TEST_VDUP(vector, q, uint, u, 8, 16, 0xBD); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vcombine.c b/ref_vcombine.c new file mode 100644 index 0000000..0aa3cbf --- /dev/null +++ b/ref_vcombine.c @@ -0,0 +1,77 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VCOMBINE" +void exec_vcombine (void) +{ + /* Basic test: vec128=vcombine(vec64_a, vec64_b), then store the result. */ +#define TEST_VCOMBINE(T1, T2, W, N, N2) \ + VECT_VAR(vector128, T1, W, N2) = \ + vcombine_##T2##W(VECT_VAR(vector64_a, T1, W, N), \ + VECT_VAR(vector64_b, T1, W, N)); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N2), VECT_VAR(vector128, T1, W, N2)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_64BITS_VARIANTS(vector64_a); + DECL_VARIABLE_64BITS_VARIANTS(vector64_b); + DECL_VARIABLE_128BITS_VARIANTS(vector128); + + TEST_MACRO_64BITS_VARIANTS_2_5(TEST_VLOAD, vector64_a, buffer); + + TEST_VLOAD(vector64_a, buffer, , float, f, 32, 2); + + TEST_VDUP(vector64_b, , int, s, 8, 8, 0x11); + TEST_VDUP(vector64_b, , int, s, 16, 4, 0x22); + TEST_VDUP(vector64_b, , int, s, 32, 2, 0x33); + TEST_VDUP(vector64_b, , int, s, 64, 1, 0x44); + TEST_VDUP(vector64_b, , uint, u, 8, 8, 0x55); + TEST_VDUP(vector64_b, , uint, u, 16, 4, 0x66); + TEST_VDUP(vector64_b, , uint, u, 32, 2, 0x77); + TEST_VDUP(vector64_b, , uint, u, 64, 1, 0x88); + TEST_VDUP(vector64_b, , float, f, 32, 2, 3.3); + + clean_results (); + + TEST_VCOMBINE(int, s, 8, 8, 16); + TEST_VCOMBINE(int, s, 16, 4, 8); + TEST_VCOMBINE(int, s, 32, 2, 4); + TEST_VCOMBINE(int, s, 64, 1, 2); + TEST_VCOMBINE(uint, u, 8, 8, 16); + TEST_VCOMBINE(uint, u, 16, 4, 8); + TEST_VCOMBINE(uint, u, 32, 2, 4); + TEST_VCOMBINE(uint, u, 64, 1, 2); + TEST_VCOMBINE(float, f, 32, 2, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vcreate.c b/ref_vcreate.c new file mode 100644 index 0000000..9f4ee4b --- /dev/null +++ b/ref_vcreate.c @@ -0,0 +1,99 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* Template file for unary operator validation */ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vcreate +#define TEST_MSG "VCREATE" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=vcreate(x), then store the result. */ +#define TEST_VCREATE(T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = vcreate_##T2##W(VECT_VAR(val, T1, W, N)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + +#define DECL_VAL(VAR, T1, W, N) \ + uint64_t VECT_VAR(VAR, T1, W, N) + + DECL_VAL(val, int, 8, 8); + DECL_VAL(val, int, 16, 4); + DECL_VAL(val, int, 32, 2); + DECL_VAL(val, int, 64, 1); + DECL_VAL(val, float, 32, 2); + DECL_VAL(val, uint, 8, 8); + DECL_VAL(val, uint, 16, 4); + DECL_VAL(val, uint, 32, 2); + DECL_VAL(val, uint, 64, 1); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 1); + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 1); + + clean_results (); + + /* Initialize input values arbitrarily */ + VECT_VAR(val, int, 8, 8) = 0x123456789abcdef0LL; + VECT_VAR(val, int, 16, 4) = 0x123456789abcdef0LL; + VECT_VAR(val, int, 32, 2) = 0x123456789abcdef0LL; + VECT_VAR(val, int, 64, 1) = 0x123456789abcdef0LL; + VECT_VAR(val, float, 32, 2) = 0x123456789abcdef0LL; + VECT_VAR(val, uint, 8, 8) = 0x123456789abcdef0ULL; + VECT_VAR(val, uint, 16, 4) = 0x123456789abcdef0ULL; + VECT_VAR(val, uint, 32, 2) = 0x123456789abcdef0ULL; + VECT_VAR(val, uint, 64, 1) = 0x123456789abcdef0ULL; + + TEST_VCREATE(int, s, 8, 8); + TEST_VCREATE(int, s, 16, 4); + TEST_VCREATE(int, s, 32, 2); + TEST_VCREATE(float, f, 32, 2); + TEST_VCREATE(int, s, 64, 1); + TEST_VCREATE(uint, u, 8, 8); + TEST_VCREATE(uint, u, 16, 4); + TEST_VCREATE(uint, u, 32, 2); + TEST_VCREATE(uint, u, 64, 1); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vcvt.c b/ref_vcvt.c new file mode 100644 index 0000000..45553d0 --- /dev/null +++ b/ref_vcvt.c @@ -0,0 +1,160 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VCVT/VCVTQ" +void exec_vcvt (void) +{ + int i; + + /* Basic test: y=vcvt(x), then store the result. */ +#define TEST_VCVT(Q, T1, T2, W, N, TS1, TS2) \ + VECT_VAR(vector_res, T1, W, N) = \ + vcvt##Q##_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP(TEST_MSG, T1, W, N, PRIx##W); + +#define TEST_VCVT_FP(Q, T1, T2, W, N, TS1, TS2) \ + VECT_VAR(vector_res, T1, W, N) = \ + vcvt##Q##_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP_FP(TEST_MSG, T1, W, N, PRIx##W); + +#define TEST_VCVT_N(Q, T1, T2, W, N, TS1, TS2, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vcvt##Q##_n_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N), V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP(TEST_MSG, T1, W, N, PRIx##W); + +#define TEST_VCVT_N_FP(Q, T1, T2, W, N, TS1, TS2, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vcvt##Q##_n_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N), V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP_FP(TEST_MSG, T1, W, N, PRIx##W); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Make sure some elements have a fractional part, to exercise + integer conversions */ + TEST_VSET_LANE(vector, , float, f, 32, 2, 0, -15.3); + TEST_VSET_LANE(vector, , float, f, 32, 2, 1, 5.3); + TEST_VSET_LANE(vector, q, float, f, 32, 4, 2, -15.3); + TEST_VSET_LANE(vector, q, float, f, 32, 4, 3, 5.3); + + /* The same result buffers are used multiple times, so we output + them before overwriting them */ + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + + /* vcvt_f32_xx */ + TEST_VCVT_FP(, float, f, 32, 2, int, s); + TEST_VCVT_FP(, float, f, 32, 2, uint, u); + + /* vcvtq_f32_xx */ + TEST_VCVT_FP(q, float, f, 32, 4, int, s); + TEST_VCVT_FP(q, float, f, 32, 4, uint, u); + + /* vcvt_xx_f32 */ + TEST_VCVT(, int, s, 32, 2, float, f); + TEST_VCVT(, uint, u, 32, 2, float, f); + + /* vcvtq_xx_f32 */ + TEST_VCVT(q, int, s, 32, 4, float, f); + TEST_VCVT(q, uint, u, 32, 4, float, f); + + /* The same result buffers are used multiple times, so output them + before overwriting them */ +#undef TEST_MSG +#define TEST_MSG "VCVT_N/VCVTQ_N" + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + + /* vcvt_n_f32_xx */ + TEST_VCVT_N_FP(, float, f, 32, 2, int, s, 2); + TEST_VCVT_N_FP(, float, f, 32, 2, uint, u, 7); + + /* vcvtq_n_f32_xx */ + TEST_VCVT_N_FP(q, float, f, 32, 4, int, s, 30); + TEST_VCVT_N_FP(q, float, f, 32, 4, uint, u, 12); + + /* vcvt_n_xx_f32 */ + TEST_VCVT_N(, int, s, 32, 2, float, f, 20); + TEST_VCVT_N(, uint, u, 32, 2, float, f, 2); + + /* vcvtq_n_xx_f32 */ + TEST_VCVT_N(q, int, s, 32, 4, float, f, 13); + TEST_VCVT_N(q, uint, u, 32, 4, float, f, 1); + + /* Check rounding */ +#undef TEST_MSG +#define TEST_MSG "VCVT/VCVTQ" + fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check rounding)"); + TEST_VDUP(vector, , float, f, 32, 2, 10.4); + TEST_VDUP(vector, q, float, f, 32, 4, 125.9); + /* vcvt_xx_f32 */ + TEST_VCVT(, int, s, 32, 2, float, f); + TEST_VCVT(, uint, u, 32, 2, float, f); + /* vcvtq_xx_f32 */ + TEST_VCVT(q, int, s, 32, 4, float, f); + TEST_VCVT(q, uint, u, 32, 4, float, f); + +#undef TEST_MSG +#define TEST_MSG "VCVT_N/VCVTQ_N" + fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check rounding)"); + /* vcvt_n_xx_f32 */ + TEST_VCVT_N(, int, s, 32, 2, float, f, 20); + TEST_VCVT_N(, uint, u, 32, 2, float, f, 20); + /* vcvtq_n_xx_f32 */ + TEST_VCVT_N(q, int, s, 32, 4, float, f, 13); + TEST_VCVT_N(q, uint, u, 32, 4, float, f, 13); + +#undef TEST_MSG +#define TEST_MSG "VCVT_N/VCVTQ_N" + fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check saturation)"); + /* vcvt_n_xx_f32 */ + TEST_VCVT_N(, int, s, 32, 2, float, f, 31); + /* vcvtq_n_xx_f32 */ + TEST_VCVT_N(q, int, s, 32, 4, float, f, 31); +} diff --git a/ref_vdup.c b/ref_vdup.c new file mode 100644 index 0000000..42085ab --- /dev/null +++ b/ref_vdup.c @@ -0,0 +1,109 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VDUP/VDUPQ" +void exec_vdup (void) +{ + int i; + + /* Basic test: vec=vdup(x), then store the result. */ +#undef TEST_VDUP +#define TEST_VDUP(Q, T1, T2, W, N) \ + VECT_VAR(vector, T1, W, N) = \ + vdup##Q##_n_##T2##W(VECT_VAR(buffer, T1, W, N)[i]); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)) + + /* Basic test: vec=vmov(x), then store the result. */ +#define TEST_VMOV(Q, T1, T2, W, N) \ + VECT_VAR(vector, T1, W, N) = \ + vmov##Q##_n_##T2##W(VECT_VAR(buffer, T1, W, N)[i]); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + + for (i=0; i< 3; i++) { + clean_results (); + + TEST_VDUP(, int, s, 8, 8); + TEST_VDUP(, int, s, 16, 4); + TEST_VDUP(, int, s, 32, 2); + TEST_VDUP(, int, s, 64, 1); + TEST_VDUP(, uint, u, 8, 8); + TEST_VDUP(, uint, u, 16, 4); + TEST_VDUP(, uint, u, 32, 2); + TEST_VDUP(, uint, u, 64, 1); + TEST_VDUP(, float, f, 32, 2); + + TEST_VDUP(q, int, s, 8, 16); + TEST_VDUP(q, int, s, 16, 8); + TEST_VDUP(q, int, s, 32, 4); + TEST_VDUP(q, int, s, 64, 2); + TEST_VDUP(q, uint, u, 8, 16); + TEST_VDUP(q, uint, u, 16, 8); + TEST_VDUP(q, uint, u, 32, 4); + TEST_VDUP(q, uint, u, 64, 2); + TEST_VDUP(q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); + } + +#undef TEST_MSG +#define TEST_MSG "VMOV/VMOVQ" + for (i=0; i< 3; i++) { + clean_results (); + + TEST_VMOV(, int, s, 8, 8); + TEST_VMOV(, int, s, 16, 4); + TEST_VMOV(, int, s, 32, 2); + TEST_VMOV(, int, s, 64, 1); + TEST_VMOV(, uint, u, 8, 8); + TEST_VMOV(, uint, u, 16, 4); + TEST_VMOV(, uint, u, 32, 2); + TEST_VMOV(, uint, u, 64, 1); + TEST_VMOV(, float, f, 32, 2); + + TEST_VMOV(q, int, s, 8, 16); + TEST_VMOV(q, int, s, 16, 8); + TEST_VMOV(q, int, s, 32, 4); + TEST_VMOV(q, int, s, 64, 2); + TEST_VMOV(q, uint, u, 8, 16); + TEST_VMOV(q, uint, u, 16, 8); + TEST_VMOV(q, uint, u, 32, 4); + TEST_VMOV(q, uint, u, 64, 2); + TEST_VMOV(q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); + } +} diff --git a/ref_vdup_lane.c b/ref_vdup_lane.c new file mode 100644 index 0000000..e93969d --- /dev/null +++ b/ref_vdup_lane.c @@ -0,0 +1,77 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VDUP_LANE/VDUP_LANEQ" +void exec_vdup_lane (void) +{ + /* Basic test: vec1=vdup_lane(vec2, lane), then store the result. */ +#define TEST_VDUP_LANE(Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vector_res, T1, W, N) = \ + vdup##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N2), L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + /* Input vector can only have 64 bits */ + DECL_VARIABLE_64BITS_VARIANTS(vector); + + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + TEST_MACRO_64BITS_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + + /* Choose lane arbitrarily */ + TEST_VDUP_LANE(, int, s, 8, 8, 8, 1); + TEST_VDUP_LANE(, int, s, 16, 4, 4, 2); + TEST_VDUP_LANE(, int, s, 32, 2, 2, 1); + TEST_VDUP_LANE(, int, s, 64, 1, 1, 0); + TEST_VDUP_LANE(, uint, u, 8, 8, 8, 7); + TEST_VDUP_LANE(, uint, u, 16, 4, 4, 3); + TEST_VDUP_LANE(, uint, u, 32, 2, 2, 1); + TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0); + TEST_VDUP_LANE(, float, f, 32, 2, 2, 1); + + TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2); + TEST_VDUP_LANE(q, int, s, 16, 8, 4, 3); + TEST_VDUP_LANE(q, int, s, 32, 4, 2, 1); + TEST_VDUP_LANE(q, int, s, 64, 2, 1, 0); + TEST_VDUP_LANE(q, uint, u, 8, 16, 8, 5); + TEST_VDUP_LANE(q, uint, u, 16, 8, 4, 1); + TEST_VDUP_LANE(q, uint, u, 32, 4, 2, 0); + TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0); + TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_veor.c b/ref_veor.c new file mode 100644 index 0000000..50226ff --- /dev/null +++ b/ref_veor.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME veor +#define TEST_MSG "VEOR/VEORQ" + +#include "ref_v_binary_op.c" diff --git a/ref_vext.c b/ref_vext.c new file mode 100644 index 0000000..a14a5ac --- /dev/null +++ b/ref_vext.c @@ -0,0 +1,100 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VEXT/VEXTQ" +void exec_vext (void) +{ + /* vector_res = vext(vector1,vector2,offset), then store the result. */ +#define TEST_VEXT(Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vext##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector1); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector1, buffer); + TEST_VLOAD(vector1, buffer, , float, f, 32, 2); + TEST_VLOAD(vector1, buffer, q, float, f, 32, 4); + + /* Choose arbitrary initialization values */ + TEST_VDUP(vector2, , int, s, 8, 8, 0x11); + TEST_VDUP(vector2, , int, s, 16, 4, 0x22); + TEST_VDUP(vector2, , int, s, 32, 2, 0x33); + TEST_VDUP(vector2, , int, s, 64, 1, 0x44); + TEST_VDUP(vector2, , uint, u, 8, 8, 0x55); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x66); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x77); + TEST_VDUP(vector2, , uint, u, 64, 1, 0x88); + TEST_VDUP(vector2, , float, f, 32, 2, 33.6); + + TEST_VDUP(vector2, q, int, s, 8, 16, 0x11); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x22); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x33); + TEST_VDUP(vector2, q, int, s, 64, 2, 0x44); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0x55); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0x66); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0x77); + TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88); + TEST_VDUP(vector2, q, float, f, 32, 4, 33.2); + + /* Choose arbitrary extract offsets */ + TEST_VEXT(, int, s, 8, 8, 7); + TEST_VEXT(, int, s, 16, 4, 3); + TEST_VEXT(, int, s, 32, 2, 1); + TEST_VEXT(, int, s, 64, 1, 0); + TEST_VEXT(, uint, u, 8, 8, 6); + TEST_VEXT(, uint, u, 16, 4, 2); + TEST_VEXT(, uint, u, 32, 2, 1); + TEST_VEXT(, uint, u, 64, 1, 0); + TEST_VEXT(, float, f, 32, 2, 1); + + TEST_VEXT(q, int, s, 8, 16, 14); + TEST_VEXT(q, int, s, 16, 8, 7); + TEST_VEXT(q, int, s, 32, 4, 3); + TEST_VEXT(q, int, s, 64, 2, 1); + TEST_VEXT(q, uint, u, 8, 16, 12); + TEST_VEXT(q, uint, u, 16, 8, 6); + TEST_VEXT(q, uint, u, 32, 4, 3); + TEST_VEXT(q, uint, u, 64, 2, 1); + TEST_VEXT(q, float, f, 32, 4, 3); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vget_high.c b/ref_vget_high.c new file mode 100644 index 0000000..47ff9ee --- /dev/null +++ b/ref_vget_high.c @@ -0,0 +1,64 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VGET_HIGH" +void exec_vget_high (void) +{ + /* Basic test: vec64=vget_high(vec128), then store the result. */ +#define TEST_VGET_HIGH(T1, T2, W, N, N2) \ + VECT_VAR(vector64, T1, W, N) = \ + vget_high_##T2##W(VECT_VAR(vector128, T1, W, N2)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector64, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector128); + + TEST_MACRO_128BITS_VARIANTS_2_5(TEST_VLOAD, vector128, buffer); + TEST_VLOAD(vector128, buffer, q, float, f, 32, 4); + + clean_results (); + + TEST_VGET_HIGH(int, s, 8, 8, 16); + TEST_VGET_HIGH(int, s, 16, 4, 8); + TEST_VGET_HIGH(int, s, 32, 2, 4); + TEST_VGET_HIGH(int, s, 64, 1, 2); + TEST_VGET_HIGH(uint, u, 8, 8, 16); + TEST_VGET_HIGH(uint, u, 16, 4, 8); + TEST_VGET_HIGH(uint, u, 32, 2, 4); + TEST_VGET_HIGH(uint, u, 64, 1, 2); + TEST_VGET_HIGH(float, f, 32, 2, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vget_lane.c b/ref_vget_lane.c new file mode 100644 index 0000000..e603f33 --- /dev/null +++ b/ref_vget_lane.c @@ -0,0 +1,93 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vget_lane (void) +{ + /* vec=vget_lane(vec, lane), then store the result. */ +#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \ + VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)); \ + fprintf(ref_file, "%" PRIx##W ", ", VAR(var, T1, W)) + + /* Special variant for floating-point */ +#define TEST_VGET_LANE_F(Q, T1, T2, W, N, L) \ + VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)); \ + fprintf(ref_file, "%" PRIx##W ", ", *((uint##W##_t*)&VAR(var, T1, W))) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + + /* Scalar variables */ + VAR_DECL(var, int, 8); + VAR_DECL(var, int, 16); + VAR_DECL(var, int, 32); + VAR_DECL(var, int, 64); + VAR_DECL(var, uint, 8); + VAR_DECL(var, uint, 16); + VAR_DECL(var, uint, 32); + VAR_DECL(var, uint, 64); + VAR_DECL(var, float, 32); + + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); + + fprintf(ref_file, "\n%s output:\n", "VGET_LANE/VGETQ_LANE"); + + /* Choose lane arbitrarily */ + TEST_VGET_LANE(, int, s, 8, 8, 7); + TEST_VGET_LANE(, int, s, 16, 4, 3); + TEST_VGET_LANE(, int, s, 32, 2, 1); + TEST_VGET_LANE(, int, s, 64, 1, 0); + TEST_VGET_LANE(, uint, u, 8, 8, 6); + TEST_VGET_LANE(, uint, u, 16, 4, 2); + TEST_VGET_LANE(, uint, u, 32, 2, 1); + TEST_VGET_LANE(, uint, u, 64, 1, 0); + TEST_VGET_LANE_F(, float, f, 32, 2, 1); + + TEST_VGET_LANE(q, int, s, 8, 16, 15); + TEST_VGET_LANE(q, int, s, 16, 8, 5); + TEST_VGET_LANE(q, int, s, 32, 4, 3); + TEST_VGET_LANE(q, int, s, 64, 2, 1); + TEST_VGET_LANE(q, uint, u, 8, 16, 14); + TEST_VGET_LANE(q, uint, u, 16, 8, 6); + TEST_VGET_LANE(q, uint, u, 32, 4, 2); + TEST_VGET_LANE(q, uint, u, 64, 2, 1); + TEST_VGET_LANE_F(q, float, f, 32, 4, 3); + + fprintf(ref_file, "\n"); +} diff --git a/ref_vget_low.c b/ref_vget_low.c new file mode 100644 index 0000000..8a7170f --- /dev/null +++ b/ref_vget_low.c @@ -0,0 +1,64 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VGET_LOW" +void exec_vget_low (void) +{ + /* Basic test: vec64=vget_low(vec128), then store the result. */ +#define TEST_VGET_LOW(T1, T2, W, N, N2) \ + VECT_VAR(vector64, T1, W, N) = \ + vget_low_##T2##W(VECT_VAR(vector128, T1, W, N2)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector64, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector128); + + TEST_MACRO_128BITS_VARIANTS_2_5(TEST_VLOAD, vector128, buffer); + TEST_VLOAD(vector128, buffer, q, float, f, 32, 4); + + clean_results (); + + TEST_VGET_LOW(int, s, 8, 8, 16); + TEST_VGET_LOW(int, s, 16, 4, 8); + TEST_VGET_LOW(int, s, 32, 2, 4); + TEST_VGET_LOW(int, s, 64, 1, 2); + TEST_VGET_LOW(uint, u, 8, 8, 16); + TEST_VGET_LOW(uint, u, 16, 4, 8); + TEST_VGET_LOW(uint, u, 32, 2, 4); + TEST_VGET_LOW(uint, u, 64, 1, 2); + TEST_VGET_LOW(float, f, 32, 2, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vhadd.c b/ref_vhadd.c new file mode 100644 index 0000000..f7ab21b --- /dev/null +++ b/ref_vhadd.c @@ -0,0 +1,31 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vhadd +#define TEST_MSG "VHADD/VHADDQ" + +#define NO_FLOAT_VARIANT + +#include "ref_vmax.c" diff --git a/ref_vhsub.c b/ref_vhsub.c new file mode 100644 index 0000000..859df59 --- /dev/null +++ b/ref_vhsub.c @@ -0,0 +1,31 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vhsub +#define TEST_MSG "VHSUB/VHSUBQ" + +#define NO_FLOAT_VARIANT + +#include "ref_vmax.c" diff --git a/ref_vld1.c b/ref_vld1.c new file mode 100644 index 0000000..ac4d91c --- /dev/null +++ b/ref_vld1.c @@ -0,0 +1,55 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VLD1/VLD1Q" +void exec_vld1 (void) +{ + /* Basic test vec=vld1(buffer); then store vec: vst1(result, vector) */ + /* This test actually tests vdl1 and vst1 at the same time */ +#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \ + VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer); + + TEST_VLD1(vector, buffer, , float, f, 32, 2); + TEST_VLD1(vector, buffer, q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vld1_dup.c b/ref_vld1_dup.c new file mode 100644 index 0000000..7dabecd --- /dev/null +++ b/ref_vld1_dup.c @@ -0,0 +1,60 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VLD1_DUP/VLD1_DUPQ" +void exec_vld1_dup (void) +{ + int i; + + /* Fill vector with buffer item #i */ +#define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N) \ + VECT_VAR(VAR, T1, W, N) = \ + vld1##Q##_dup_##T2##W(&VECT_VAR(BUF, T1, W, N)[i]); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + + /* Try to read different places from the input buffer */ + for (i=0; i<3; i++) { + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1_DUP, vector, buffer); + + TEST_VLD1_DUP(vector, buffer, , float, f, 32, 2); + TEST_VLD1_DUP(vector, buffer, q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); + } +} diff --git a/ref_vld1_lane.c b/ref_vld1_lane.c new file mode 100644 index 0000000..7af7d03 --- /dev/null +++ b/ref_vld1_lane.c @@ -0,0 +1,102 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VLD1_LANE/VLD1_LANEQ" +void exec_vld1_lane (void) +{ + /* Fill vector_src with 0xAA, then load 1 lane */ +#define TEST_VLD1_LANE(Q, T1, T2, W, N, L) \ + memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, W/8*N); \ + VECT_VAR(vector_src, T1, W, N) = \ + vld1##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \ + VECT_VAR(vector, T1, W, N) = \ + vld1##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N), \ + VECT_VAR(vector_src, T1, W, N), L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_src); + + ARRAY(buffer_src, int, 8, 8); + ARRAY(buffer_src, int, 16, 4); + ARRAY(buffer_src, int, 32, 2); + ARRAY(buffer_src, int, 64, 1); + ARRAY(buffer_src, uint, 8, 8); + ARRAY(buffer_src, uint, 16, 4); + ARRAY(buffer_src, uint, 32, 2); + ARRAY(buffer_src, uint, 64, 1); + ARRAY(buffer_src, float, 32, 2); + + ARRAY(buffer_src, int, 8, 16); + ARRAY(buffer_src, int, 16, 8); + ARRAY(buffer_src, int, 32, 4); + ARRAY(buffer_src, int, 64, 2); + ARRAY(buffer_src, uint, 8, 16); + ARRAY(buffer_src, uint, 16, 8); + ARRAY(buffer_src, uint, 32, 4); + ARRAY(buffer_src, uint, 64, 2); + ARRAY(buffer_src, float, 32, 4); + + clean_results (); + + /* Choose lane arbitrarily */ + TEST_VLD1_LANE(, int, s, 8, 8, 6); + TEST_VLD1_LANE(, int, s, 16, 4, 3); + TEST_VLD1_LANE(, int, s, 32, 2, 1); + TEST_VLD1_LANE(, int, s, 64, 1, 0); + TEST_VLD1_LANE(, uint, u, 8, 8, 7); + TEST_VLD1_LANE(, uint, u, 16, 4, 3); + TEST_VLD1_LANE(, uint, u, 32, 2, 1); + TEST_VLD1_LANE(, uint, u, 64, 1, 0); + TEST_VLD1_LANE(, float, f, 32, 2, 1); + + TEST_VLD1_LANE(q, int, s, 8, 16, 15); + TEST_VLD1_LANE(q, int, s, 16, 8, 5); + TEST_VLD1_LANE(q, int, s, 32, 4, 2); + TEST_VLD1_LANE(q, int, s, 64, 2, 1); + TEST_VLD1_LANE(q, uint, u, 8, 16, 12); + TEST_VLD1_LANE(q, uint, u, 16, 8, 6); + TEST_VLD1_LANE(q, uint, u, 32, 4, 2); + TEST_VLD1_LANE(q, uint, u, 64, 2, 0); + TEST_VLD1_LANE(q, float, f, 32, 4, 2); + +#ifndef __CC_ARM + /* Check runtime assertions. With RVCT, the check is performed at + compile-time */ + // TEST_VLD1_LANE(, int, s, 64, 1, 1); +#endif + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vldX.c b/ref_vldX.c new file mode 100644 index 0000000..e52131e --- /dev/null +++ b/ref_vldX.c @@ -0,0 +1,157 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vldX (void) +{ + /* In this case, input variables are arrays of vectors */ +#define DECL_VLDX(T1, W, N, X) \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ + VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] + + /* We need to use a temporary result buffer (result_bis), because + the one used for other tests is not large enough. A subset of the + result data is moved from result_bis to result, and it is this + subset which is used to check the actual behaviour. The next + macro enables to move another chunk of data from result_bis to + result. */ +#define TEST_VLDX(Q, T1, T2, W, N, X) \ + VECT_ARRAY_VAR(vector, T1, W, N, X) = \ + vld##X##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \ + vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ + VECT_ARRAY_VAR(vector, T1, W, N, X)); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* Overwrite "result" with the contents of "result_bis"[Y] */ +#define TEST_EXTRA_CHUNK(T1, W, N, X,Y) \ + memcpy(VECT_VAR(result, T1, W, N), \ + &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* We need all variants in 64 bits, but there is no 64x2 variant */ +#define DECL_ALL_VLDX(X) \ + DECL_VLDX(int, 8, 8, X); \ + DECL_VLDX(int, 16, 4, X); \ + DECL_VLDX(int, 32, 2, X); \ + DECL_VLDX(int, 64, 1, X); \ + DECL_VLDX(uint, 8, 8, X); \ + DECL_VLDX(uint, 16, 4, X); \ + DECL_VLDX(uint, 32, 2, X); \ + DECL_VLDX(uint, 64, 1, X); \ + DECL_VLDX(float, 32, 2, X); \ + DECL_VLDX(int, 8, 16, X); \ + DECL_VLDX(int, 16, 8, X); \ + DECL_VLDX(int, 32, 4, X); \ + DECL_VLDX(uint, 8, 16, X); \ + DECL_VLDX(uint, 16, 8, X); \ + DECL_VLDX(uint, 32, 4, X); \ + DECL_VLDX(float, 32, 4, X) + +#define TEST_ALL_VLDX(X) \ + TEST_VLDX(, int, s, 8, 8, X); \ + TEST_VLDX(, int, s, 16, 4, X); \ + TEST_VLDX(, int, s, 32, 2, X); \ + TEST_VLDX(, int, s, 64, 1, X); \ + TEST_VLDX(, uint, u, 8, 8, X); \ + TEST_VLDX(, uint, u, 16, 4, X); \ + TEST_VLDX(, uint, u, 32, 2, X); \ + TEST_VLDX(, uint, u, 64, 1, X); \ + TEST_VLDX(, float, f, 32, 2, X); \ + TEST_VLDX(q, int, s, 8, 16, X); \ + TEST_VLDX(q, int, s, 16, 8, X); \ + TEST_VLDX(q, int, s, 32, 4, X); \ + TEST_VLDX(q, uint, u, 8, 16, X); \ + TEST_VLDX(q, uint, u, 16, 8, X); \ + TEST_VLDX(q, uint, u, 32, 4, X); \ + TEST_VLDX(q, float, f, 32, 4, X) + +#define TEST_ALL_EXTRA_CHUNKS(X, Y) \ + TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(int, 64, 1, X, Y); \ + TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(int, 8, 16, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 8, 16, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 4, X, Y) + + DECL_ALL_VLDX(2); + DECL_ALL_VLDX(3); + DECL_ALL_VLDX(4); + + /* Check vld2/vld2q */ + clean_results (); +#define TEST_MSG "VLD2/VLD2Q" + TEST_ALL_VLDX(2); + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(2, 1); + dump_results_hex2 (TEST_MSG, " chunk 1"); + + /* Check vld3/vld3q */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VLD3/VLD3Q" + TEST_ALL_VLDX(3); + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(3, 1); + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(3, 2); + dump_results_hex2 (TEST_MSG, " chunk 2"); + + /* Check vld4/vld4q */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VLD4/VLD4Q" + TEST_ALL_VLDX(4); + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(4, 1); + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(4, 2); + dump_results_hex2 (TEST_MSG, " chunk 2"); + TEST_ALL_EXTRA_CHUNKS(4, 3); + dump_results_hex2 (TEST_MSG, " chunk 3"); +} diff --git a/ref_vldX_dup.c b/ref_vldX_dup.c new file mode 100644 index 0000000..a65441e --- /dev/null +++ b/ref_vldX_dup.c @@ -0,0 +1,136 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vldX_dup (void) +{ + /* In this case, input variables are arrays of vectors */ +#define DECL_VLDX_DUP(T1, W, N, X) \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ + VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] + + /* We need to use a temporary result buffer (result_bis), because + the one used for other tests is not large enough. A subset of the + result data is moved from result_bis to result, and it is this + subset which is used to check the actual behaviour. The next + macro enables to move another chunk of data from result_bis to + result. */ + /* Fill vector with buffer item #i */ +#define TEST_VLDX_DUP(Q, T1, T2, W, N, X) \ + VECT_ARRAY_VAR(vector, T1, W, N, X) = \ + vld##X##Q##_dup_##T2##W(VECT_VAR(buffer, T1, W, N)); \ + \ + vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ + VECT_ARRAY_VAR(vector, T1, W, N, X)); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ + sizeof(VECT_VAR(result, T1, W, N))); + + + /* Overwrite "result" with the contents of "result_bis"[Y] */ +#define TEST_EXTRA_CHUNK(T1, W, N, X,Y) \ + memcpy(VECT_VAR(result, T1, W, N), \ + &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ +#define DECL_ALL_VLDX_DUP(X) \ + DECL_VLDX_DUP(int, 8, 8, X); \ + DECL_VLDX_DUP(int, 16, 4, X); \ + DECL_VLDX_DUP(int, 32, 2, X); \ + DECL_VLDX_DUP(int, 64, 1, X); \ + DECL_VLDX_DUP(uint, 8, 8, X); \ + DECL_VLDX_DUP(uint, 16, 4, X); \ + DECL_VLDX_DUP(uint, 32, 2, X); \ + DECL_VLDX_DUP(uint, 64, 1, X); \ + DECL_VLDX_DUP(float, 32, 2, X) + + +#define TEST_ALL_VLDX_DUP(X) \ + TEST_VLDX_DUP(, int, s, 8, 8, X); \ + TEST_VLDX_DUP(, int, s, 16, 4, X); \ + TEST_VLDX_DUP(, int, s, 32, 2, X); \ + TEST_VLDX_DUP(, int, s, 64, 1, X); \ + TEST_VLDX_DUP(, uint, u, 8, 8, X); \ + TEST_VLDX_DUP(, uint, u, 16, 4, X); \ + TEST_VLDX_DUP(, uint, u, 32, 2, X); \ + TEST_VLDX_DUP(, uint, u, 64, 1, X); \ + TEST_VLDX_DUP(, float, f, 32, 2, X) + +#define TEST_ALL_EXTRA_CHUNKS(X, Y) \ + TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(int, 64, 1, X, Y); \ + TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 2, X, Y) + + + DECL_ALL_VLDX_DUP(2); + DECL_ALL_VLDX_DUP(3); + DECL_ALL_VLDX_DUP(4); + + /* Check vld2_dup/vld2q_dup */ + clean_results (); +#define TEST_MSG "VLD2_DUP/VLD2Q_DUP" + TEST_ALL_VLDX_DUP(2); + dump_results_hex2 (TEST_MSG, " chunk 0"); + TEST_ALL_EXTRA_CHUNKS(2, 1); + dump_results_hex2 (TEST_MSG, " chunk 1"); + + /* Check vld3_dup/vld3q_dup */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VLD3_DUP/VLD3Q_DUP" + TEST_ALL_VLDX_DUP(3); + dump_results_hex2 (TEST_MSG, " chunk 0"); + TEST_ALL_EXTRA_CHUNKS(3, 1); + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(3, 2); + dump_results_hex2 (TEST_MSG, " chunk 2"); + + /* Check vld4_dup/vld4q_dup */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VLD4_DUP/VLD4Q_DUP" + TEST_ALL_VLDX_DUP(4); + dump_results_hex2 (TEST_MSG, " chunk 0"); + TEST_ALL_EXTRA_CHUNKS(4, 1); + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(4, 2); + dump_results_hex2 (TEST_MSG, " chunk 2"); + TEST_ALL_EXTRA_CHUNKS(4, 3); + dump_results_hex2 (TEST_MSG, " chunk 3"); +} diff --git a/ref_vldX_lane.c b/ref_vldX_lane.c new file mode 100644 index 0000000..4fa4f57 --- /dev/null +++ b/ref_vldX_lane.c @@ -0,0 +1,170 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vldX_lane (void) +{ + /* In this case, input variables are arrays of vectors */ +#define DECL_VLDX_LANE(T1, W, N, X) \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \ + VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] + + /* We need to use a temporary result buffer (result_bis), because + the one used for other tests is not large enough. A subset of the + result data is moved from result_bis to result, and it is this + subset which is used to check the actual behaviour. The next + macro enables to move another chunk of data from result_bis to + result. */ +#define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L) \ + memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \ + sizeof(VECT_VAR(buffer_src, T1, W, N))); \ + \ + VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \ + vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \ + \ + VECT_ARRAY_VAR(vector, T1, W, N, X) = \ + vld##X##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N), \ + VECT_ARRAY_VAR(vector_src, T1, W, N, X), \ + L); \ + vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ + VECT_ARRAY_VAR(vector, T1, W, N, X)); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ + sizeof(VECT_VAR(result, T1, W, N))) + + /* Overwrite "result" with the contents of "result_bis"[Y] */ +#define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \ + memcpy(VECT_VAR(result, T1, W, N), \ + &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* We need all variants in 64 bits, but there is no 64x2 variant */ +#define DECL_ALL_VLDX_LANE(X) \ + DECL_VLDX_LANE(int, 8, 8, X); \ + DECL_VLDX_LANE(int, 16, 4, X); \ + DECL_VLDX_LANE(int, 32, 2, X); \ + DECL_VLDX_LANE(uint, 8, 8, X); \ + DECL_VLDX_LANE(uint, 16, 4, X); \ + DECL_VLDX_LANE(uint, 32, 2, X); \ + DECL_VLDX_LANE(int, 16, 8, X); \ + DECL_VLDX_LANE(int, 32, 4, X); \ + DECL_VLDX_LANE(uint, 16, 8, X); \ + DECL_VLDX_LANE(uint, 32, 4, X); \ + DECL_VLDX_LANE(float, 32, 2, X); \ + DECL_VLDX_LANE(float, 32, 4, X) + +#define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L] + + /* Use the same lanes regardless of the size of the array (X), for + simplicity */ +#define TEST_ALL_VLDX_LANE(X) \ + TEST_VLDX_LANE(, int, s, 8, 8, X, 7); \ + TEST_VLDX_LANE(, int, s, 16, 4, X, 2); \ + TEST_VLDX_LANE(, int, s, 32, 2, X, 0); \ + TEST_VLDX_LANE(, uint, u, 8, 8, X, 4); \ + TEST_VLDX_LANE(, uint, u, 16, 4, X, 3); \ + TEST_VLDX_LANE(, uint, u, 32, 2, X, 1); \ + TEST_VLDX_LANE(q, int, s, 16, 8, X, 6); \ + TEST_VLDX_LANE(q, int, s, 32, 4, X, 2); \ + TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5); \ + TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0); \ + TEST_VLDX_LANE(, float, f, 32, 2, X, 0); \ + TEST_VLDX_LANE(q, float, f, 32, 4, X, 2) + +#define TEST_ALL_EXTRA_CHUNKS(X, Y) \ + TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 4, X, Y) + + /* Declare the temporary buffers / variables */ + DECL_ALL_VLDX_LANE(2); + DECL_ALL_VLDX_LANE(3); + DECL_ALL_VLDX_LANE(4); + + /* Define dummy input arrays, large enough for x4 vectors */ + DUMMY_ARRAY(buffer_src, int, 8, 8, 4); + DUMMY_ARRAY(buffer_src, int, 16, 4, 4); + DUMMY_ARRAY(buffer_src, int, 32, 2, 4); + DUMMY_ARRAY(buffer_src, uint, 8, 8, 4); + DUMMY_ARRAY(buffer_src, uint, 16, 4, 4); + DUMMY_ARRAY(buffer_src, uint, 32, 2, 4); + DUMMY_ARRAY(buffer_src, int, 16, 8, 4); + DUMMY_ARRAY(buffer_src, int, 32, 4, 4); + DUMMY_ARRAY(buffer_src, uint, 16, 8, 4); + DUMMY_ARRAY(buffer_src, uint, 32, 4, 4); + DUMMY_ARRAY(buffer_src, float, 32, 2, 4); + DUMMY_ARRAY(buffer_src, float, 32, 4, 4); + + /* Check vld2_lane/vld2q_lane */ + clean_results (); +#define TEST_MSG "VLD2_LANE/VLD2Q_LANE" + TEST_ALL_VLDX_LANE(2); + dump_results_hex2 (TEST_MSG, " chunk 0"); + TEST_ALL_EXTRA_CHUNKS(2, 1); + dump_results_hex2 (TEST_MSG, " chunk 1"); + + /* Check vld3_lane/vld3q_lane */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VLD3_LANE/VLD3Q_LANE" + TEST_ALL_VLDX_LANE(3); + dump_results_hex2 (TEST_MSG, " chunk 0"); + TEST_ALL_EXTRA_CHUNKS(3, 1); + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(3, 2); + dump_results_hex2 (TEST_MSG, " chunk 2"); + + /* Check vld4_lane/vld4q_lane */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VLD4_LANE/VLD4Q_LANE" + TEST_ALL_VLDX_LANE(4); + dump_results_hex2 (TEST_MSG, " chunk 0"); + TEST_ALL_EXTRA_CHUNKS(4, 1); + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(4, 2); + dump_results_hex2 (TEST_MSG, " chunk 2"); + TEST_ALL_EXTRA_CHUNKS(4, 3); + dump_results_hex2 (TEST_MSG, " chunk 3"); +} diff --git a/ref_vmax.c b/ref_vmax.c new file mode 100644 index 0000000..a2a6b60 --- /dev/null +++ b/ref_vmax.c @@ -0,0 +1,116 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmax +#define TEST_MSG "VMAX/VMAXQ" +#endif + +/* Can't use the standard ref_v_binary_op.c template because vmax has + no 64 bits variant */ +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x,x), then store the result. */ +#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); +#ifndef NO_FLOAT_VARIANT + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); +#endif + + /* Choose init value arbitrarily, will be used as comparison value */ + TEST_VDUP(vector2, , int, s, 8, 8, -13); + TEST_VDUP(vector2, , int, s, 16, 4, -14); + TEST_VDUP(vector2, , int, s, 32, 2, -16); + TEST_VDUP(vector2, , uint, u, 8, 8, 0xf3); + TEST_VDUP(vector2, , uint, u, 16, 4, 0xfff1); + TEST_VDUP(vector2, , uint, u, 32, 2, 0xfffffff0); + TEST_VDUP(vector2, q, int, s, 8, 16, -12); + TEST_VDUP(vector2, q, int, s, 16, 8, -13); + TEST_VDUP(vector2, q, int, s, 32, 4, -15); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0xf9); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0xfff2); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0xfffffff1); +#ifndef NO_FLOAT_VARIANT + TEST_VDUP(vector2, , float, f, 32, 2, -15.5); + TEST_VDUP(vector2, q, float, f, 32, 4, -14.5); +#endif + +#ifndef NO_FLOAT_VARIANT +#define FLOAT_VARIANT(MACRO, VAR) \ + MACRO(VAR, , float, f, 32, 2); \ + MACRO(VAR, q, float, f, 32, 4) +#else +#define FLOAT_VARIANT(MACRO, VAR) +#endif + +#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR) \ + MACRO(VAR, , int, s, 8, 8); \ + MACRO(VAR, , int, s, 16, 4); \ + MACRO(VAR, , int, s, 32, 2); \ + MACRO(VAR, , uint, u, 8, 8); \ + MACRO(VAR, , uint, u, 16, 4); \ + MACRO(VAR, , uint, u, 32, 2); \ + MACRO(VAR, q, int, s, 8, 16); \ + MACRO(VAR, q, int, s, 16, 8); \ + MACRO(VAR, q, int, s, 32, 4); \ + MACRO(VAR, q, uint, u, 8, 16); \ + MACRO(VAR, q, uint, u, 16, 8); \ + MACRO(VAR, q, uint, u, 32, 4); \ + FLOAT_VARIANT(MACRO, VAR) + + /* Apply a binary operator named INSN_NAME */ + TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmin.c b/ref_vmin.c new file mode 100644 index 0000000..496e3ae --- /dev/null +++ b/ref_vmin.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmin +#define TEST_MSG "VMIN/VMINQ" + +#include "ref_vmax.c" diff --git a/ref_vmla.c b/ref_vmla.c new file mode 100644 index 0000000..2b2a42b --- /dev/null +++ b/ref_vmla.c @@ -0,0 +1,144 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmla +#define TEST_MSG "VMLA" +#endif + + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ +#define DECL_VMLX(T, W, N) \ + DECL_VARIABLE(vector1, T, W, N); \ + DECL_VARIABLE(vector2, T, W, N); \ + DECL_VARIABLE(vector3, T, W, N); \ + DECL_VARIABLE(vector_res, T, W, N) + + /* vector_res = OP(vector, vector3, vector4), + then store the result. */ +#define TEST_VMLX1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + VECT_VAR(vector3, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLX(INSN, Q, T1, T2, W, N) \ + TEST_VMLX1(INSN, Q, T1, T2, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VMLX(int, 8, 8); + DECL_VMLX(int, 16, 4); + DECL_VMLX(int, 32, 2); + DECL_VMLX(uint, 8, 8); + DECL_VMLX(uint, 16, 4); + DECL_VMLX(uint, 32, 2); + DECL_VMLX(float, 32, 2); + DECL_VMLX(int, 8, 16); + DECL_VMLX(int, 16, 8); + DECL_VMLX(int, 32, 4); + DECL_VMLX(uint, 8, 16); + DECL_VMLX(uint, 16, 8); + DECL_VMLX(uint, 32, 4); + DECL_VMLX(float, 32, 4); + + clean_results (); + + TEST_VLOAD(vector1, buffer, , int, s, 8, 8); + TEST_VLOAD(vector1, buffer, , int, s, 16, 4); + TEST_VLOAD(vector1, buffer, , int, s, 32, 2); + TEST_VLOAD(vector1, buffer, , uint, u, 8, 8); + TEST_VLOAD(vector1, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector1, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector1, buffer, , float, f, 32, 2); + TEST_VLOAD(vector1, buffer, q, int, s, 8, 16); + TEST_VLOAD(vector1, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector1, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector1, buffer, q, uint, u, 8, 16); + TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector1, buffer, q, float, f, 32, 4); + + TEST_VDUP(vector2, , int, s, 8, 8, 0x11); + TEST_VDUP(vector2, , int, s, 16, 4, 0x22); + TEST_VDUP(vector2, , int, s, 32, 2, 0x33); + TEST_VDUP(vector2, , uint, u, 8, 8, 0x44); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x55); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x66); + TEST_VDUP(vector2, , float, f, 32, 2, 33.1); + TEST_VDUP(vector2, q, int, s, 8, 16, 0x77); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x88); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x99); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0xAA); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0xBB); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0xCC); + TEST_VDUP(vector2, q, float, f, 32, 4, 99.2); + + TEST_VDUP(vector3, , int, s, 8, 8, 0xFF); + TEST_VDUP(vector3, , int, s, 16, 4, 0xEE); + TEST_VDUP(vector3, , int, s, 32, 2, 0xDD); + TEST_VDUP(vector3, , uint, u, 8, 8, 0xCC); + TEST_VDUP(vector3, , uint, u, 16, 4, 0xBB); + TEST_VDUP(vector3, , uint, u, 32, 2, 0xAA); + TEST_VDUP(vector3, , float, f, 32, 2, 10.23); + TEST_VDUP(vector3, q, int, s, 8, 16, 0x99); + TEST_VDUP(vector3, q, int, s, 16, 8, 0x88); + TEST_VDUP(vector3, q, int, s, 32, 4, 0x77); + TEST_VDUP(vector3, q, uint, u, 8, 16, 0x66); + TEST_VDUP(vector3, q, uint, u, 16, 8, 0x55); + TEST_VDUP(vector3, q, uint, u, 32, 4, 0x44); + TEST_VDUP(vector3, q, float, f, 32, 4, 77.8); + + TEST_VMLX(INSN_NAME, , int, s, 8, 8); + TEST_VMLX(INSN_NAME, , int, s, 16, 4); + TEST_VMLX(INSN_NAME, , int, s, 32, 2); + TEST_VMLX(INSN_NAME, , uint, u, 8, 8); + TEST_VMLX(INSN_NAME, , uint, u, 16, 4); + TEST_VMLX(INSN_NAME, , uint, u, 32, 2); + TEST_VMLX(INSN_NAME, , float, f, 32, 2); + TEST_VMLX(INSN_NAME, q, int, s, 8, 16); + TEST_VMLX(INSN_NAME, q, int, s, 16, 8); + TEST_VMLX(INSN_NAME, q, int, s, 32, 4); + TEST_VMLX(INSN_NAME, q, uint, u, 8, 16); + TEST_VMLX(INSN_NAME, q, uint, u, 16, 8); + TEST_VMLX(INSN_NAME, q, uint, u, 32, 4); + TEST_VMLX(INSN_NAME, q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmla_lane.c b/ref_vmla_lane.c new file mode 100644 index 0000000..b79d44c --- /dev/null +++ b/ref_vmla_lane.c @@ -0,0 +1,125 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmla +#define TEST_MSG "VMLA_LANE" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME ##_lane (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ +#define DECL_VMLX_LANE(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + /* vector_res = vmlx_lane(vector, vector2, vector3, lane), + then store the result. */ +#define TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + VECT_VAR(vector3, T1, W, N2), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLX_LANE(INSN, Q, T1, T2, W, N, N2, V) \ + TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VMLX_LANE(vector); + DECL_VMLX_LANE(vector2); + DECL_VMLX_LANE(vector_res); + + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector3, uint, 16, 4); + DECL_VARIABLE(vector3, uint, 32, 2); + DECL_VARIABLE(vector3, float, 32, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); + + TEST_VDUP(vector2, , int, s, 16, 4, 0x55); + TEST_VDUP(vector2, , int, s, 32, 2, 0x55); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x55); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x55); + TEST_VDUP(vector2, , float, f, 32, 2, 55.3); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x55); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x55); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0x55); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0x55); + TEST_VDUP(vector2, q, float, f, 32, 4, 55.8); + + TEST_VDUP(vector3, , int, s, 16, 4, 0xBB); + TEST_VDUP(vector3, , int, s, 32, 2, 0xBB); + TEST_VDUP(vector3, , uint, u, 16, 4, 0xBB); + TEST_VDUP(vector3, , uint, u, 32, 2, 0xBB); + TEST_VDUP(vector3, , float, f, 32, 2, 11.34); + + /* Choose lane arbitrarily */ + TEST_VMLX_LANE(INSN_NAME, , int, s, 16, 4, 4, 2); + TEST_VMLX_LANE(INSN_NAME, , int, s, 32, 2, 2, 1); + TEST_VMLX_LANE(INSN_NAME, , uint, u, 16, 4, 4, 2); + TEST_VMLX_LANE(INSN_NAME, , uint, u, 32, 2, 2, 1); + TEST_VMLX_LANE(INSN_NAME, , float, f, 32, 2, 2, 1); + TEST_VMLX_LANE(INSN_NAME, q, int, s, 16, 8, 4, 3); + TEST_VMLX_LANE(INSN_NAME, q, int, s, 32, 4, 2, 1); + TEST_VMLX_LANE(INSN_NAME, q, uint, u, 16, 8, 4, 2); + TEST_VMLX_LANE(INSN_NAME, q, uint, u, 32, 4, 2, 1); + TEST_VMLX_LANE(INSN_NAME, q, float, f, 32, 4, 2, 1); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmla_n.c b/ref_vmla_n.c new file mode 100644 index 0000000..edcfc5e --- /dev/null +++ b/ref_vmla_n.c @@ -0,0 +1,112 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmla +#define TEST_MSG "VMLA_N" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME ##_n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ +#define DECL_VMLX_N(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, float, 32, 4); \ + DECL_VARIABLE(VAR, uint, 32, 4) + + /* vector_res = vmlx_n(vector, vector2, val), + then store the result. */ +#define TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLX_N(INSN, Q, T1, T2, W, N, V) \ + TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VMLX_N(vector); + DECL_VMLX_N(vector2); + DECL_VMLX_N(vector_res); + + clean_results (); + + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); + + TEST_VDUP(vector2, , int, s, 16, 4, 0x55); + TEST_VDUP(vector2, , int, s, 32, 2, 0x55); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x55); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x55); + TEST_VDUP(vector2, , float, f, 32, 2, 55.2); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x55); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x55); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0x55); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0x55); + TEST_VDUP(vector2, q, float, f, 32, 4, 55.9); + + /* Choose multiplier arbitrarily */ + TEST_VMLX_N(INSN_NAME, , int, s, 16, 4, 0x11); + TEST_VMLX_N(INSN_NAME, , int, s, 32, 2, 0x22); + TEST_VMLX_N(INSN_NAME, , uint, u, 16, 4, 0x33); + TEST_VMLX_N(INSN_NAME, , uint, u, 32, 2, 0x44); + TEST_VMLX_N(INSN_NAME, , float, f, 32, 2, 22.3); + TEST_VMLX_N(INSN_NAME, q, int, s, 16, 8, 0x55); + TEST_VMLX_N(INSN_NAME, q, int, s, 32, 4, 0x66); + TEST_VMLX_N(INSN_NAME, q, uint, u, 16, 8, 0x77); + TEST_VMLX_N(INSN_NAME, q, uint, u, 32, 4, 0x88); + TEST_VMLX_N(INSN_NAME, q, float, f, 32, 4, 66.7); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmlal.c b/ref_vmlal.c new file mode 100644 index 0000000..9c00fb7 --- /dev/null +++ b/ref_vmlal.c @@ -0,0 +1,119 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmlal +#define TEST_MSG "VMLAL" +#endif + + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = OP(vector, vector3, vector4), + then store the result. */ +#define TEST_VMLXL1(INSN, T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + VECT_VAR(vector4, T1, W2, N)); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLXL(INSN, T1, T2, W, W2, N) \ + TEST_VMLXL1(INSN, T1, T2, W, W2, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector3, int, 8, 8); + DECL_VARIABLE(vector4, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 8); + + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector3, uint, 8, 8); + DECL_VARIABLE(vector4, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 8); + + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector3, uint, 16, 4); + DECL_VARIABLE(vector4, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 4); + + DECL_VARIABLE(vector, uint, 64, 2); + DECL_VARIABLE(vector3, uint, 32, 2); + DECL_VARIABLE(vector4, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, int, s, 64, 2); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 64, 2); + + TEST_VDUP(vector3, , int, s, 8, 8, 0x55); + TEST_VDUP(vector4, , int, s, 8, 8, 0xBB); + TEST_VDUP(vector3, , int, s, 16, 4, 0x55); + TEST_VDUP(vector4, , int, s, 16, 4, 0xBB); + TEST_VDUP(vector3, , int, s, 32, 2, 0x55); + TEST_VDUP(vector4, , int, s, 32, 2, 0xBB); + TEST_VDUP(vector3, , uint, u, 8, 8, 0x55); + TEST_VDUP(vector4, , uint, u, 8, 8, 0xBB); + TEST_VDUP(vector3, , uint, u, 16, 4, 0x55); + TEST_VDUP(vector4, , uint, u, 16, 4, 0xBB); + TEST_VDUP(vector3, , uint, u, 32, 2, 0x55); + TEST_VDUP(vector4, , uint, u, 32, 2, 0xBB); + + TEST_VMLXL(INSN_NAME, int, s, 16, 8, 8); + TEST_VMLXL(INSN_NAME, int, s, 32, 16, 4); + TEST_VMLXL(INSN_NAME, int, s, 64, 32, 2); + TEST_VMLXL(INSN_NAME, uint, u, 16, 8, 8); + TEST_VMLXL(INSN_NAME, uint, u, 32, 16, 4); + TEST_VMLXL(INSN_NAME, uint, u, 64, 32, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmlal_lane.c b/ref_vmlal_lane.c new file mode 100644 index 0000000..afa8e6b --- /dev/null +++ b/ref_vmlal_lane.c @@ -0,0 +1,101 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmlal_lane +#define TEST_MSG "VMLAL_LANE" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = vmlxl_lane(vector, vector3, vector4, lane), + then store the result. */ +#define TEST_VMLXL_LANE1(INSN, T1, T2, W, W2, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + VECT_VAR(vector4, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLXL_LANE(INSN, T1, T2, W, W2, N, V) \ + TEST_VMLXL_LANE1(INSN, T1, T2, W, W2, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector3, uint, 16, 4); + DECL_VARIABLE(vector4, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 4); + + DECL_VARIABLE(vector, uint, 64, 2); + DECL_VARIABLE(vector3, uint, 32, 2); + DECL_VARIABLE(vector4, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, int, s, 64, 2); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 64, 2); + + TEST_VDUP(vector3, , int, s, 16, 4, 0x55); + TEST_VDUP(vector4, , int, s, 16, 4, 0xBB); + TEST_VDUP(vector3, , int, s, 32, 2, 0x55); + TEST_VDUP(vector4, , int, s, 32, 2, 0xBB); + TEST_VDUP(vector3, , uint, u, 16, 4, 0x55); + TEST_VDUP(vector4, , uint, u, 16, 4, 0xBB); + TEST_VDUP(vector3, , uint, u, 32, 2, 0x55); + TEST_VDUP(vector4, , uint, u, 32, 2, 0xBB); + + TEST_VMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 2); + TEST_VMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 1); + TEST_VMLXL_LANE(INSN_NAME, uint, u, 32, 16, 4, 2); + TEST_VMLXL_LANE(INSN_NAME, uint, u, 64, 32, 2, 1); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmlal_n.c b/ref_vmlal_n.c new file mode 100644 index 0000000..da981f7 --- /dev/null +++ b/ref_vmlal_n.c @@ -0,0 +1,92 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmlal_n +#define TEST_MSG "VMLAL_N" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = vmlxl_n(vector, vector2, val), + then store the result. */ +#define TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V) \ + VECT_VAR(vector_res, T1, W, N) = INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLXL_N(INSN, T1, T2, W, W2, N, V) \ + TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 4); + + DECL_VARIABLE(vector, uint, 64, 2); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, int, s, 64, 2); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 64, 2); + + TEST_VDUP(vector2, , int, s, 16, 4, 0x55); + TEST_VDUP(vector2, , int, s, 32, 2, 0x55); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x55); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x55); + + /* Choose multiplier arbitrarily */ + TEST_VMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x11); + TEST_VMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x22); + TEST_VMLXL_N(INSN_NAME, uint, u, 32, 16, 4, 0x33); + TEST_VMLXL_N(INSN_NAME, uint, u, 64, 32, 2, 0x33); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmls.c b/ref_vmls.c new file mode 100644 index 0000000..9eacdb2 --- /dev/null +++ b/ref_vmls.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmls +#define TEST_MSG "VMLS" + +#include "ref_vmla.c" diff --git a/ref_vmls_lane.c b/ref_vmls_lane.c new file mode 100644 index 0000000..68cce02 --- /dev/null +++ b/ref_vmls_lane.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmls +#define TEST_MSG "VMLS_LANE" + +#include "ref_vmla_lane.c" diff --git a/ref_vmls_n.c b/ref_vmls_n.c new file mode 100644 index 0000000..050a4b9 --- /dev/null +++ b/ref_vmls_n.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmls +#define TEST_MSG "VMLS_N" + +#include "ref_vmla_n.c" diff --git a/ref_vmlsl.c b/ref_vmlsl.c new file mode 100644 index 0000000..22b11aa --- /dev/null +++ b/ref_vmlsl.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmlsl +#define TEST_MSG "VMLSL" + +#include "ref_vmlal.c" diff --git a/ref_vmlsl_lane.c b/ref_vmlsl_lane.c new file mode 100644 index 0000000..a972b93 --- /dev/null +++ b/ref_vmlsl_lane.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmlsl_lane +#define TEST_MSG "VMLSL_LANE" + +#include "ref_vmlal_lane.c" diff --git a/ref_vmlsl_n.c b/ref_vmlsl_n.c new file mode 100644 index 0000000..b7f7000 --- /dev/null +++ b/ref_vmlsl_n.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vmlsl_n +#define TEST_MSG "VMLSL_N" + +#include "ref_vmlal_n.c" diff --git a/ref_vmovl.c b/ref_vmovl.c new file mode 100644 index 0000000..fa9f599 --- /dev/null +++ b/ref_vmovl.c @@ -0,0 +1,60 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VMOVL" +void exec_vmovl (void) +{ + /* Basic test: vec128=vmovl(vec64), then store the result. */ +#define TEST_VMOVL(T1, T2, W, W2, N) \ + VECT_VAR(vector128, T1, W2, N) = \ + vmovl_##T2##W(VECT_VAR(vector64, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector128, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector128); + + TEST_MACRO_64BITS_VARIANTS_2_5(TEST_VLOAD, vector64, buffer); + + clean_results (); + + TEST_VMOVL(int, s, 8, 16, 8); + TEST_VMOVL(int, s, 16, 32, 4); + TEST_VMOVL(int, s, 32, 64, 2); + TEST_VMOVL(uint, u, 8, 16, 8); + TEST_VMOVL(uint, u, 16, 32, 4); + TEST_VMOVL(uint, u, 32, 64, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmovn.c b/ref_vmovn.c new file mode 100644 index 0000000..ff00e75 --- /dev/null +++ b/ref_vmovn.c @@ -0,0 +1,60 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VMOVN" +void exec_vmovn (void) +{ + /* Basic test: vec64=vmovn(vec128), then store the result. */ +#define TEST_VMOVN(T1, T2, W, W2, N) \ + VECT_VAR(vector64, T1, W2, N) = \ + vmovn_##T2##W(VECT_VAR(vector128, T1, W, N)); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector128); + + TEST_MACRO_128BITS_VARIANTS_2_5(TEST_VLOAD, vector128, buffer); + + clean_results (); + + TEST_VMOVN(int, s, 16, 8, 8); + TEST_VMOVN(int, s, 32, 16, 4); + TEST_VMOVN(int, s, 64, 32, 2); + TEST_VMOVN(uint, u, 16, 8, 8); + TEST_VMOVN(uint, u, 32, 16, 4); + TEST_VMOVN(uint, u, 64, 32, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmul.c b/ref_vmul.c new file mode 100644 index 0000000..7e77338 --- /dev/null +++ b/ref_vmul.c @@ -0,0 +1,127 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vmul +#define TEST_MSG "VMUL" +#endif + + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ +#define DECL_VMUL(T, W, N) \ + DECL_VARIABLE(vector1, T, W, N); \ + DECL_VARIABLE(vector2, T, W, N); \ + DECL_VARIABLE(vector_res, T, W, N) + + /* vector_res = OP(vector, vector3, vector4), + then store the result. */ +#define TEST_VMUL1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMUL(INSN, Q, T1, T2, W, N) \ + TEST_VMUL1(INSN, Q, T1, T2, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VMUL(int, 8, 8); + DECL_VMUL(int, 16, 4); + DECL_VMUL(int, 32, 2); + DECL_VMUL(uint, 8, 8); + DECL_VMUL(uint, 16, 4); + DECL_VMUL(uint, 32, 2); + DECL_VMUL(float, 32, 2); + DECL_VMUL(int, 8, 16); + DECL_VMUL(int, 16, 8); + DECL_VMUL(int, 32, 4); + DECL_VMUL(uint, 8, 16); + DECL_VMUL(uint, 16, 8); + DECL_VMUL(uint, 32, 4); + DECL_VMUL(float, 32, 4); + + clean_results (); + + TEST_VLOAD(vector1, buffer, , int, s, 8, 8); + TEST_VLOAD(vector1, buffer, , int, s, 16, 4); + TEST_VLOAD(vector1, buffer, , int, s, 32, 2); + TEST_VLOAD(vector1, buffer, , uint, u, 8, 8); + TEST_VLOAD(vector1, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector1, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector1, buffer, , float, f, 32, 2); + TEST_VLOAD(vector1, buffer, q, int, s, 8, 16); + TEST_VLOAD(vector1, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector1, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector1, buffer, q, uint, u, 8, 16); + TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector1, buffer, q, float, f, 32, 4); + + TEST_VDUP(vector2, , int, s, 8, 8, 0x11); + TEST_VDUP(vector2, , int, s, 16, 4, 0x22); + TEST_VDUP(vector2, , int, s, 32, 2, 0x33); + TEST_VDUP(vector2, , uint, u, 8, 8, 0x44); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x55); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x66); + TEST_VDUP(vector2, , float, f, 32, 2, 33.3); + TEST_VDUP(vector2, q, int, s, 8, 16, 0x77); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x88); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x99); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0xAA); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0xBB); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0xCC); + TEST_VDUP(vector2, q, float, f, 32, 4, 99.6); + + TEST_VMUL(INSN_NAME, , int, s, 8, 8); + TEST_VMUL(INSN_NAME, , int, s, 16, 4); + TEST_VMUL(INSN_NAME, , int, s, 32, 2); + TEST_VMUL(INSN_NAME, , uint, u, 8, 8); + TEST_VMUL(INSN_NAME, , uint, u, 16, 4); + TEST_VMUL(INSN_NAME, , uint, u, 32, 2); + TEST_VMUL(INSN_NAME, , float, f, 32, 2); + TEST_VMUL(INSN_NAME, q, int, s, 8, 16); + TEST_VMUL(INSN_NAME, q, int, s, 16, 8); + TEST_VMUL(INSN_NAME, q, int, s, 32, 4); + TEST_VMUL(INSN_NAME, q, uint, u, 8, 16); + TEST_VMUL(INSN_NAME, q, uint, u, 16, 8); + TEST_VMUL(INSN_NAME, q, uint, u, 32, 4); + TEST_VMUL(INSN_NAME, q, float, f, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmul_lane.c b/ref_vmul_lane.c new file mode 100644 index 0000000..650c9d9 --- /dev/null +++ b/ref_vmul_lane.c @@ -0,0 +1,105 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VMUL_LANE" +void exec_vmul_lane (void) +{ +#define DECL_VMUL(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + /* vector_res = vmul_lane(vector,vector2,lane), then store the result. */ +#define TEST_VMUL_LANE(Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vector_res, T1, W, N) = \ + vmul##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N2), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VMUL(vector); + DECL_VMUL(vector_res); + + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector2, float, 32, 2); + + clean_results (); + + /* Initialize vector from pre-initialized values */ + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Initialize vector2 */ + TEST_VDUP(vector2, , int, s, 16, 4, 0x4); + TEST_VDUP(vector2, , int, s, 32, 2, 0x22); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x444); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x532); + TEST_VDUP(vector2, , float, f, 32, 2, 22.8); + + /* Choose lane arbitrarily */ + TEST_VMUL_LANE(, int, s, 16, 4, 4, 2); + TEST_VMUL_LANE(, int, s, 32, 2, 2, 1); + TEST_VMUL_LANE(, uint, u, 16, 4, 4, 2); + TEST_VMUL_LANE(, uint, u, 32, 2, 2, 1); + TEST_VMUL_LANE(, float, f, 32, 2, 2, 1); + TEST_VMUL_LANE(q, int, s, 16, 8, 4, 2); + TEST_VMUL_LANE(q, int, s, 32, 4, 2, 0); + TEST_VMUL_LANE(q, uint, u, 16, 8, 4, 2); + TEST_VMUL_LANE(q, uint, u, 32, 4, 2, 1); + TEST_VMUL_LANE(q, float, f, 32, 4, 2, 0); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmul_n.c b/ref_vmul_n.c new file mode 100644 index 0000000..628862d --- /dev/null +++ b/ref_vmul_n.c @@ -0,0 +1,91 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VMUL_N" +void exec_vmul_n (void) +{ +#define DECL_VMUL(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + /* vector_res = vmul_n(vector,val), then store the result. */ +#define TEST_VMUL_N(Q, T1, T2, W, N, L) \ + VECT_VAR(vector_res, T1, W, N) = \ + vmul##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VMUL(vector); + DECL_VMUL(vector_res); + + clean_results (); + + /* Initialize vector from pre-initialized values */ + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose multiplier arbitrarily */ + TEST_VMUL_N(, int, s, 16, 4, 0x11); + TEST_VMUL_N(, int, s, 32, 2, 0x22); + TEST_VMUL_N(, uint, u, 16, 4, 0x33); + TEST_VMUL_N(, uint, u, 32, 2, 0x44); + TEST_VMUL_N(, float, f, 32, 2, 22.3); + TEST_VMUL_N(q, int, s, 16, 8, 0x55); + TEST_VMUL_N(q, int, s, 32, 4, 0x66); + TEST_VMUL_N(q, uint, u, 16, 8, 0x77); + TEST_VMUL_N(q, uint, u, 32, 4, 0x88); + TEST_VMUL_N(q, float, f, 32, 4, 88.9); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmull.c b/ref_vmull.c new file mode 100644 index 0000000..e61d8e6 --- /dev/null +++ b/ref_vmull.c @@ -0,0 +1,77 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VMULL" +void exec_vmull (void) +{ + /* Basic test: y=vmull(x,x), then store the result. */ +#define TEST_VMULL(T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vmull_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, , int, s, 8, 8); + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, , uint, u, 8, 8); + TEST_VLOAD(vector, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector, buffer, , uint, u, 32, 2); + + TEST_VMULL(int, s, 8, 16, 8); + TEST_VMULL(int, s, 16, 32, 4); + TEST_VMULL(int, s, 32, 64, 2); + TEST_VMULL(uint, u, 8, 16, 8); + TEST_VMULL(uint, u, 16, 32, 4); + TEST_VMULL(uint, u, 32, 64, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmull_lane.c b/ref_vmull_lane.c new file mode 100644 index 0000000..23434e1 --- /dev/null +++ b/ref_vmull_lane.c @@ -0,0 +1,84 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VMULL_LANE" +void exec_vmull_lane (void) +{ + /* vector_res = vmull_lane(vector,vector2,lane), then store the result. */ +#define TEST_VMULL_LANE(T1, T2, W, W2, N, L) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vmull##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize vector */ + TEST_VDUP(vector, , int, s, 16, 4, 0x1000); + TEST_VDUP(vector, , int, s, 32, 2, 0x1000); + TEST_VDUP(vector, , uint, u, 16, 4, 0x1000); + TEST_VDUP(vector, , uint, u, 32, 2, 0x1000); + + /* Initialize vector2 */ + TEST_VDUP(vector2, , int, s, 16, 4, 0x4); + TEST_VDUP(vector2, , int, s, 32, 2, 0x2); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x4); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x2); + + /* Choose lane arbitrarily */ + TEST_VMULL_LANE(int, s, 16, 32, 4, 2); + TEST_VMULL_LANE(int, s, 32, 64, 2, 1); + TEST_VMULL_LANE(uint, u, 16, 32, 4, 2); + TEST_VMULL_LANE(uint, u, 32, 64, 2, 1); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vmull_n.c b/ref_vmull_n.c new file mode 100644 index 0000000..8713d35 --- /dev/null +++ b/ref_vmull_n.c @@ -0,0 +1,81 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vmull +#define TEST_MSG "VMULL_N" +void exec_vmull_n (void) +{ + int i; + + /* vector_res = vmull_n(vector,val), then store the result. */ +#define TEST_VMULL_N1(INSN, T1, T2, W, W2, N, L) \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + +#define TEST_VMULL_N(INSN, T1, T2, W, W2, N, L) \ + TEST_VMULL_N1(INSN, T1, T2, W, W2, N, L) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize vector */ + TEST_VDUP(vector, , int, s, 16, 4, 0x1000); + TEST_VDUP(vector, , int, s, 32, 2, 0x1000); + TEST_VDUP(vector, , uint, u, 16, 4, 0x1000); + TEST_VDUP(vector, , uint, u, 32, 2, 0x1000); + + /* Choose multiplier arbitrarily */ + TEST_VMULL_N(INSN_NAME, int, s, 16, 32, 4, 0x11); + TEST_VMULL_N(INSN_NAME, int, s, 32, 64, 2, 0x22); + TEST_VMULL_N(INSN_NAME, uint, u, 16, 32, 4, 0x33); + TEST_VMULL_N(INSN_NAME, uint, u, 32, 64, 2, 0x44); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); +} diff --git a/ref_vmvn.c b/ref_vmvn.c new file mode 100644 index 0000000..75b750e --- /dev/null +++ b/ref_vmvn.c @@ -0,0 +1,112 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vmvn +#define TEST_MSG "VMVN/VMVNQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_UNARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, uint, 8, 16); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, uint, 8, 16); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_VLOAD(vector, buffer, , int, s, 8, 8); + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, , uint, u, 8, 8); + TEST_VLOAD(vector, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector, buffer, q, int, s, 8, 16); + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 8, 16); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + + /* Apply a unary operator named INSN_NAME */ + TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8); + TEST_UNARY_OP(INSN_NAME, , uint, u, 16, 4); + TEST_UNARY_OP(INSN_NAME, , uint, u, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vneg.c b/ref_vneg.c new file mode 100644 index 0000000..4e9e68f --- /dev/null +++ b/ref_vneg.c @@ -0,0 +1,54 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vneg +#define TEST_MSG "VNEG/VNEGQ" + +/* Extra tests for functions requiring floating-point types */ +void exec_vneg_f32(void); +#define EXTRA_TESTS exec_vneg_f32 + +#include "ref_v_unary_op.c" + +void exec_vneg_f32(void) +{ + int i; + + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + TEST_VDUP(vector, , float, f, 32, 2, 2.3); + TEST_VDUP(vector, q, float, f, 32, 4, 3.4); + + TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2); + TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4); + + fprintf(ref_file, "\nfloat32:\n"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vorn.c b/ref_vorn.c new file mode 100644 index 0000000..df9823e --- /dev/null +++ b/ref_vorn.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vorn +#define TEST_MSG "VORN/VORNQ" + +#include "ref_v_binary_op.c" diff --git a/ref_vorr.c b/ref_vorr.c new file mode 100644 index 0000000..0762e10 --- /dev/null +++ b/ref_vorr.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vorr +#define TEST_MSG "VORR/VORRQ" + +#include "ref_v_binary_op.c" diff --git a/ref_vpadal.c b/ref_vpadal.c new file mode 100644 index 0000000..8ca3e07 --- /dev/null +++ b/ref_vpadal.c @@ -0,0 +1,140 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vpadal +#define TEST_MSG "VPADAL/VPADALQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_VPADAL1(INSN, Q, T1, T2, W, N, W2, N2) \ + VECT_VAR(vector_res, T1, W2, N2) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W2, N2), VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W2(VECT_VAR(result, T1, W2, N2), \ + VECT_VAR(vector_res, T1, W2, N2)) + +#define TEST_VPADAL(INSN, Q, T1, T2, W, N, W2, N2) \ + TEST_VPADAL1(INSN, Q, T1, T2, W, N, W2, N2) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 64, 1); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, uint, 64, 1); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector2, int, 8, 8); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 8, 8); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector2, int, 8, 16); + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + DECL_VARIABLE(vector2, uint, 8, 16); + DECL_VARIABLE(vector2, uint, 16, 8); + DECL_VARIABLE(vector2, uint, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 1); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 1); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, , int, s, 64, 1); + TEST_VLOAD(vector, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector, buffer, , uint, u, 64, 1); + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, int, s, 64, 2); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Initialize input "vector2" from "buffer" */ + TEST_VLOAD(vector2, buffer, , int, s, 8, 8); + TEST_VLOAD(vector2, buffer, , int, s, 16, 4); + TEST_VLOAD(vector2, buffer, , int, s, 32, 2); + TEST_VLOAD(vector2, buffer, , uint, u, 8, 8); + TEST_VLOAD(vector2, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector2, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector2, buffer, q, int, s, 8, 16); + TEST_VLOAD(vector2, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector2, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector2, buffer, q, uint, u, 8, 16); + TEST_VLOAD(vector2, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector2, buffer, q, uint, u, 32, 4); + + /* Apply a unary operator named INSN_NAME */ + TEST_VPADAL(INSN_NAME, , int, s, 8, 8, 16, 4); + TEST_VPADAL(INSN_NAME, , int, s, 16, 4, 32, 2); + TEST_VPADAL(INSN_NAME, , int, s, 32, 2, 64 ,1); + TEST_VPADAL(INSN_NAME, , uint, u, 8, 8, 16, 4); + TEST_VPADAL(INSN_NAME, , uint, u, 16, 4, 32, 2); + TEST_VPADAL(INSN_NAME, , uint, u, 32, 2, 64, 1); + TEST_VPADAL(INSN_NAME, q, int, s, 8, 16, 16, 8); + TEST_VPADAL(INSN_NAME, q, int, s, 16, 8, 32, 4); + TEST_VPADAL(INSN_NAME, q, int, s, 32, 4, 64 ,2); + TEST_VPADAL(INSN_NAME, q, uint, u, 8, 16, 16, 8); + TEST_VPADAL(INSN_NAME, q, uint, u, 16, 8, 32, 4); + TEST_VPADAL(INSN_NAME, q, uint, u, 32, 4, 64, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vpadd.c b/ref_vpadd.c new file mode 100644 index 0000000..ebd3127 --- /dev/null +++ b/ref_vpadd.c @@ -0,0 +1,96 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vpadd +#define TEST_MSG "VPADD" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_VPADD1(INSN, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector, T1, W, N)); \ + vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VPADD(INSN, T1, T2, W, N) \ + TEST_VPADD1(INSN, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, float, 32, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_VLOAD(vector, buffer, , int, s, 8, 8); + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, , uint, u, 8, 8); + TEST_VLOAD(vector, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + + /* Apply a unary operator named INSN_NAME */ + TEST_VPADD(INSN_NAME, int, s, 8, 8); + TEST_VPADD(INSN_NAME, int, s, 16, 4); + TEST_VPADD(INSN_NAME, int, s, 32, 2); + TEST_VPADD(INSN_NAME, uint, u, 8, 8); + TEST_VPADD(INSN_NAME, uint, u, 16, 4); + TEST_VPADD(INSN_NAME, uint, u, 32, 2); + TEST_VPADD(INSN_NAME, float, f, 32, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vpaddl.c b/ref_vpaddl.c new file mode 100644 index 0000000..43ad68a --- /dev/null +++ b/ref_vpaddl.c @@ -0,0 +1,113 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vpaddl +#define TEST_MSG "VPADDL/VPADDLQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_VPADDL1(INSN, Q, T1, T2, W, N, W2, N2) \ + VECT_VAR(vector_res, T1, W2, N2) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W2(VECT_VAR(result, T1, W2, N2), \ + VECT_VAR(vector_res, T1, W2, N2)) + +#define TEST_VPADDL(INSN, Q, T1, T2, W, N, W2, N2) \ + TEST_VPADDL1(INSN, Q, T1, T2, W, N, W2, N2) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, uint, 8, 16); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 1); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 1); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_VLOAD(vector, buffer, , int, s, 8, 8); + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, , uint, u, 8, 8); + TEST_VLOAD(vector, buffer, , uint, u, 16, 4); + TEST_VLOAD(vector, buffer, , uint, u, 32, 2); + TEST_VLOAD(vector, buffer, q, int, s, 8, 16); + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 8, 16); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + + /* Apply a unary operator named INSN_NAME */ + TEST_VPADDL(INSN_NAME, , int, s, 8, 8, 16, 4); + TEST_VPADDL(INSN_NAME, , int, s, 16, 4, 32, 2); + TEST_VPADDL(INSN_NAME, , int, s, 32, 2, 64, 1); + TEST_VPADDL(INSN_NAME, , uint, u, 8, 8, 16, 4); + TEST_VPADDL(INSN_NAME, , uint, u, 16, 4, 32, 2); + TEST_VPADDL(INSN_NAME, , uint, u, 32, 2, 64, 1); + TEST_VPADDL(INSN_NAME, q, int, s, 8, 16, 16, 8); + TEST_VPADDL(INSN_NAME, q, int, s, 16, 8, 32, 4); + TEST_VPADDL(INSN_NAME, q, int, s, 32, 4, 64, 2); + TEST_VPADDL(INSN_NAME, q, uint, u, 8, 16, 16, 8); + TEST_VPADDL(INSN_NAME, q, uint, u, 16, 8, 32, 4); + TEST_VPADDL(INSN_NAME, q, uint, u, 32, 4, 64, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vpmax.c b/ref_vpmax.c new file mode 100644 index 0000000..bac8d20 --- /dev/null +++ b/ref_vpmax.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vpmax +#define TEST_MSG "VPMAX" + +#include "ref_vpadd.c" diff --git a/ref_vpmin.c b/ref_vpmin.c new file mode 100644 index 0000000..cf0a044 --- /dev/null +++ b/ref_vpmin.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vpmin +#define TEST_MSG "VPMIN" + +#include "ref_vpadd.c" diff --git a/ref_vqabs.c b/ref_vqabs.c new file mode 100644 index 0000000..4a4d04e --- /dev/null +++ b/ref_vqabs.c @@ -0,0 +1,73 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqabs +#define TEST_MSG "VQABS/VQABSQ" + +/* Extra tests for functions requiring corner cases tests */ +void vqabs_extra(void); +#define EXTRA_TESTS vqabs_extra + +#include "ref_v_unary_sat_op.c" + +void vqabs_extra() +{ + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + /* Initialize input "vector" with max negative values to check + saturation */ + TEST_VDUP(vector, , int, s, 8, 8, 0x80); + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector, q, int, s, 8, 16, 0x80); + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqadd.c b/ref_vqadd.c new file mode 100644 index 0000000..6c9b508 --- /dev/null +++ b/ref_vqadd.c @@ -0,0 +1,153 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqadd +#define TEST_MSG "VQADD/VQADDQ" + +/* Extra tests for functions requiring types larger than 64 bits to + compute saturation */ +void vqadd_64(void); +#define EXTRA_TESTS vqadd_64 + +#include "ref_v_binary_sat_op.c" + +void vqadd_64(void) +{ + int i; + + DECL_VARIABLE_ALL_VARIANTS(vector1); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + /* Initialize input "vector1" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector1, buffer); + + TEST_VDUP(vector2, , int, s, 64, 1, 0x0); + TEST_VDUP(vector2, , uint, u, 64, 1, 0x0); + TEST_VDUP(vector2, q, int, s, 64, 2, 0x0); + TEST_VDUP(vector2, q, uint, u, 64, 2, 0x0); + + fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + fprintf(ref_file, "\n64 bits saturation:\n"); + DUMP(TEST_MSG, int, 64, 1, PRIx64); + DUMP(TEST_MSG, uint, 64, 1, PRIx64); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); + + /* Another set of tests */ + TEST_VDUP(vector2, , int, s, 64, 1, 0x44); + TEST_VDUP(vector2, , uint, u, 64, 1, 0x88); + TEST_VDUP(vector2, q, int, s, 64, 2, 0x44); + TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88); + + fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + DUMP(TEST_MSG, int, 64, 1, PRIx64); + DUMP(TEST_MSG, uint, 64, 1, PRIx64); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); + + /* Another set of tests */ + TEST_VDUP(vector2, , int, s, 64, 1, 0x8000000000000003LL); + TEST_VDUP(vector2, , uint, u, 64, 1, 0x88); + + TEST_VDUP(vector1, q, int, s, 64, 2, 0x4000000000000000LL); + TEST_VDUP(vector2, q, int, s, 64, 2, 0x4000000000000000LL); + + TEST_VDUP(vector2, q, uint, u, 64, 2, 0x22); + + fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + DUMP(TEST_MSG, int, 64, 1, PRIx64); + DUMP(TEST_MSG, uint, 64, 1, PRIx64); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); + + /* To improve coverage, check saturation with less than 64 bits too */ + TEST_VDUP(vector2, , int, s, 8, 8, 0x81); + TEST_VDUP(vector2, , int, s, 16, 4, 0x8001); + TEST_VDUP(vector2, , int, s, 32, 2, 0x80000001); + TEST_VDUP(vector2, q, int, s, 8, 16, 0x81); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x8001); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000001); + + fprintf(ref_file, "\nless than 64 bits saturation:\n"); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4); + + DUMP(TEST_MSG, int, 8, 8, PRIx8); + DUMP(TEST_MSG, int, 16, 4, PRIx16); + DUMP(TEST_MSG, int, 32, 2, PRIx32); + DUMP(TEST_MSG, int, 8, 16, PRIx8); + DUMP(TEST_MSG, int, 16, 8, PRIx16); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + + TEST_VDUP(vector1, , uint, u, 8, 8, 0xF0); + TEST_VDUP(vector1, , uint, u, 16, 4, 0xFFF0); + TEST_VDUP(vector1, , uint, u, 32, 2, 0xFFFFFFF0); + TEST_VDUP(vector1, q, uint, u, 8, 16, 0xF0); + TEST_VDUP(vector1, q, uint, u, 16, 8, 0xFFF0); + TEST_VDUP(vector1, q, uint, u, 32, 4, 0xFFFFFFF0); + + TEST_VDUP(vector2, , uint, u, 8, 8, 0x20); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x20); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x20); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0x20); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0x20); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0x20); + + fprintf(ref_file, "\n%s less than 64 bits saturation overflow output:\n", + TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4); + + DUMP(TEST_MSG, uint, 8, 8, PRIx8); + DUMP(TEST_MSG, uint, 16, 4, PRIx16); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 8, 16, PRIx8); + DUMP(TEST_MSG, uint, 16, 8, PRIx16); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); +} diff --git a/ref_vqdmlal.c b/ref_vqdmlal.c new file mode 100644 index 0000000..06d9bdc --- /dev/null +++ b/ref_vqdmlal.c @@ -0,0 +1,97 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vqdmlal +#define TEST_MSG "VQDMLAL" +#endif + + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = OP(vector, vector3, vector4), + then store the result. */ +#define TEST_VQDMLXL1(INSN, T1, T2, W, W2, N) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + VECT_VAR(vector4, T1, W2, N)); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W2)) + +#define TEST_VQDMLXL(INSN, T1, T2, W, W2, N) \ + TEST_VQDMLXL1(INSN, T1, T2, W, W2, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, int, s, 64, 2); + + TEST_VDUP(vector3, , int, s, 16, 4, 0x55); + TEST_VDUP(vector4, , int, s, 16, 4, 0xBB); + TEST_VDUP(vector3, , int, s, 32, 2, 0x55); + TEST_VDUP(vector4, , int, s, 32, 2, 0xBB); + + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQDMLXL(INSN_NAME, int, s, 32, 16, 4); + TEST_VQDMLXL(INSN_NAME, int, s, 64, 32, 2); + dump_results_hex (TEST_MSG); + + + TEST_VDUP(vector3, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector4, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector3, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector4, , int, s, 32, 2, 0x80000000); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check mul overflow)"); + TEST_VQDMLXL(INSN_NAME, int, s, 32, 16, 4); + TEST_VQDMLXL(INSN_NAME, int, s, 64, 32, 2); + dump_results_hex2 (TEST_MSG, " (check mul overflow)"); +} diff --git a/ref_vqdmlal_lane.c b/ref_vqdmlal_lane.c new file mode 100644 index 0000000..6c13460 --- /dev/null +++ b/ref_vqdmlal_lane.c @@ -0,0 +1,103 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vqdmlal_lane +#define TEST_MSG "VQDMLAL_LANE" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = vqdmlxl_lane(vector, vector3, vector4, lane), + then store the result. */ +#define TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + VECT_VAR(vector4, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W2)) + +#define TEST_VQDMLXL_LANE(INSN, T1, T2, W, W2, N, V) \ + TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, int, s, 64, 2); + + TEST_VDUP(vector3, , int, s, 16, 4, 0x55); + TEST_VDUP(vector4, , int, s, 16, 4, 0xBB); + TEST_VDUP(vector3, , int, s, 32, 2, 0x55); + TEST_VDUP(vector4, , int, s, 32, 2, 0xBB); + + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0); + dump_results_hex (TEST_MSG); + + TEST_VDUP(vector3, , int, s, 16, 4, 0); + TEST_VDUP(vector3, , int, s, 32, 2, 0); + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (mul with input=0)"); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0); + dump_results_hex2 (TEST_MSG, " (mul with input=0)"); + + TEST_VDUP(vector3, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector3, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector4, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector4, , int, s, 32, 2, 0x80000000); + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check mul overflow)"); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0); + TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0); + dump_results_hex2 (TEST_MSG, " (check mul overflow)"); +} diff --git a/ref_vqdmlal_n.c b/ref_vqdmlal_n.c new file mode 100644 index 0000000..98c180e --- /dev/null +++ b/ref_vqdmlal_n.c @@ -0,0 +1,91 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vqdmlal_n +#define TEST_MSG "VQDMLAL_N" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = vqdmlxl_n(vector, vector3, val), + then store the result. */ +#define TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector3, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W2)) + +#define TEST_VQDMLXL_N(INSN, T1, T2, W, W2, N, V) \ + TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, int, s, 64, 2); + + TEST_VDUP(vector3, , int, s, 16, 4, 0x55); + TEST_VDUP(vector3, , int, s, 32, 2, 0x55); + + /* Choose val arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQDMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x22); + TEST_VQDMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x33); + + dump_results_hex (TEST_MSG); + + TEST_VDUP(vector3, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector3, , int, s, 32, 2, 0x80000000); + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check mul overflow)"); + TEST_VQDMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x8000); + TEST_VQDMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x80000000); + dump_results_hex2 (TEST_MSG, " (check mul overflow)"); +} diff --git a/ref_vqdmlsl.c b/ref_vqdmlsl.c new file mode 100644 index 0000000..ceb0b6b --- /dev/null +++ b/ref_vqdmlsl.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqdmlsl +#define TEST_MSG "VQDMLSL" + +#include "ref_vqdmlal.c" diff --git a/ref_vqdmlsl_lane.c b/ref_vqdmlsl_lane.c new file mode 100644 index 0000000..7724d98 --- /dev/null +++ b/ref_vqdmlsl_lane.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqdmlsl_lane +#define TEST_MSG "VQDMLSL_LANE" + +#include "ref_vqdmlal_lane.c" diff --git a/ref_vqdmlsl_n.c b/ref_vqdmlsl_n.c new file mode 100644 index 0000000..c6f8818 --- /dev/null +++ b/ref_vqdmlsl_n.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqdmlsl_n +#define TEST_MSG "VQDMLSL_N" + +#include "ref_vqdmlal_n.c" diff --git a/ref_vqdmulh.c b/ref_vqdmulh.c new file mode 100644 index 0000000..84903c5 --- /dev/null +++ b/ref_vqdmulh.c @@ -0,0 +1,114 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqdmulh +#define TEST_MSG "VQDMULH" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* vector_res = vqdmulh(vector,vector2,lane), then store the result. */ +#define TEST_VQDMULH2(INSN, Q, T1, T2, W, N) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQDMULH1(INSN, Q, T1, T2, W, N) \ + TEST_VQDMULH2(INSN, Q, T1, T2, W, N) + +#define TEST_VQDMULH(Q, T1, T2, W, N) \ + TEST_VQDMULH1(INSN, Q, T1, T2, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + + clean_results (); + + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2 */ + TEST_VDUP(vector2, , int, s, 16, 4, 0x55); + TEST_VDUP(vector2, , int, s, 32, 2, 0xBB); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x33); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x22); + + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQDMULH(, int, s, 16, 4); + TEST_VQDMULH(, int, s, 32, 2); + TEST_VQDMULH(q, int, s, 16, 8); + TEST_VQDMULH(q, int, s, 32, 4); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); + + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector2, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000000); + + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQDMULH(, int, s, 16, 4); + TEST_VQDMULH(, int, s, 32, 2); + TEST_VQDMULH(q, int, s, 16, 8); + TEST_VQDMULH(q, int, s, 32, 4); + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqdmulh_lane.c b/ref_vqdmulh_lane.c new file mode 100644 index 0000000..feda86a --- /dev/null +++ b/ref_vqdmulh_lane.c @@ -0,0 +1,115 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqdmulh +#define TEST_MSG "VQDMULH_LANE" +#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* vector_res = vqdmulh_lane(vector,vector2,lane), then store the result. */ +#define TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N2), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_lane_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L) \ + TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) + +#define TEST_VQDMULH_LANE(Q, T1, T2, W, N, N2, L) \ + TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + /* vector2: vqdmulh_lane and vqdmulhq_lane have a 2nd argument with + the same number of elements, so we need only one variable of each + type. */ + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2 */ + TEST_VDUP(vector2, , int, s, 16, 4, 0x55); + TEST_VDUP(vector2, , int, s, 32, 2, 0xBB); + + /* Choose lane arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 2); + TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1); + TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 3); + TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 0); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); + + + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + TEST_VDUP(vector2, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check mul overflow)"); + TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 3); + TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1); + TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 2); + TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 1); + dump_results_hex2 (TEST_MSG, " (check mul overflow)"); +} diff --git a/ref_vqdmulh_n.c b/ref_vqdmulh_n.c new file mode 100644 index 0000000..785d304 --- /dev/null +++ b/ref_vqdmulh_n.c @@ -0,0 +1,108 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqdmulh +#define TEST_MSG "VQDMULH_N" +#define FNNAME1(NAME) void exec_ ## NAME ## _n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + int i; + + /* vector_res = vqdmulh_n(vector,val), then store the result. */ +#define TEST_VQDMULH_N2(INSN, Q, T1, T2, W, N, L) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_n_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQDMULH_N1(INSN, Q, T1, T2, W, N, L) \ + TEST_VQDMULH_N2(INSN, Q, T1, T2, W, N, L) + +#define TEST_VQDMULH_N(Q, T1, T2, W, N, L) \ + TEST_VQDMULH_N1(INSN, Q, T1, T2, W, N, L) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + + /* Initialize vector */ + TEST_VDUP(vector, , int, s, 16, 4, 0x1000); + TEST_VDUP(vector, , int, s, 32, 2, 0x100023); + TEST_VDUP(vector, q, int, s, 16, 8, 0x1000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x100045); + + /* Choose multiplier arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQDMULH_N(, int, s, 16, 4, 0xCF); + TEST_VQDMULH_N(, int, s, 32, 2, 0x2344); + TEST_VQDMULH_N(q, int, s, 16, 8, 0x80); + TEST_VQDMULH_N(q, int, s, 32, 4, 0x5422); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + DUMP(TEST_MSG, int, 16, 4, PRIx16); + DUMP(TEST_MSG, int, 32, 2, PRIx32); + DUMP(TEST_MSG, int, 16, 8, PRIx16); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + + + + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check mul overflow)"); + TEST_VQDMULH_N(, int, s, 16, 4, 0x8000); + TEST_VQDMULH_N(, int, s, 32, 2, 0x80000000); + TEST_VQDMULH_N(q, int, s, 16, 8, 0x8000); + TEST_VQDMULH_N(q, int, s, 32, 4, 0x80000000); + dump_results_hex2 (TEST_MSG, " (check mul overflow)"); +} diff --git a/ref_vqdmull.c b/ref_vqdmull.c new file mode 100644 index 0000000..d19794d --- /dev/null +++ b/ref_vqdmull.c @@ -0,0 +1,92 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqdmull +#define TEST_MSG "VQDMULL" +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqdmull(x,x), then store the result. */ +#define TEST_VQDMULL2(INSN, T1, T2, W, W2, N) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQDMULL1(INSN, T1, T2, W, W2, N) \ + TEST_VQDMULL2(INSN, T1, T2, W, W2, N) + +#define TEST_VQDMULL(T1, T2, W, W2, N) \ + TEST_VQDMULL1(INSN, T1, T2, W, W2, N) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector2, buffer, , int, s, 16, 4); + TEST_VLOAD(vector2, buffer, , int, s, 32, 2); + + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQDMULL(int, s, 16, 32, 4); + TEST_VQDMULL(int, s, 32, 64, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector2, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000); + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check mul overflow)"); + TEST_VQDMULL(int, s, 16, 32, 4); + TEST_VQDMULL(int, s, 32, 64, 2); + dump_results_hex2 (TEST_MSG, " (check mul overflow)"); +} diff --git a/ref_vqdmull_lane.c b/ref_vqdmull_lane.c new file mode 100644 index 0000000..0e67f54 --- /dev/null +++ b/ref_vqdmull_lane.c @@ -0,0 +1,105 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqdmull +#define TEST_MSG "VQDMULL_LANE" +#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + int i; + + /* vector_res = vqdmull_lane(vector,vector2,lane), then store the result. */ +#define TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##_lane_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQDMULL_LANE1(INSN, T1, T2, W, W2, N, L) \ + TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L) + +#define TEST_VQDMULL_LANE(T1, T2, W, W2, N, L) \ + TEST_VQDMULL_LANE1(INSN, T1, T2, W, W2, N, L) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + /* Initialize vector */ + TEST_VDUP(vector, , int, s, 16, 4, 0x1000); + TEST_VDUP(vector, , int, s, 32, 2, 0x1000); + + /* Initialize vector2 */ + TEST_VDUP(vector2, , int, s, 16, 4, 0x4); + TEST_VDUP(vector2, , int, s, 32, 2, 0x2); + + /* Choose lane arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2); + TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1); + + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + + + + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector2, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000); + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (check mul overflow)"); + TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2); + TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " (check mul overflow)"); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + DUMP(TEST_MSG, int, 64, 2, PRIx64); +} diff --git a/ref_vqdmull_n.c b/ref_vqdmull_n.c new file mode 100644 index 0000000..13ce1a2 --- /dev/null +++ b/ref_vqdmull_n.c @@ -0,0 +1,101 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqdmull +#define TEST_MSG "VQDMULL_N" +#define FNNAME1(NAME) void exec_ ## NAME ## _n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + int i; + + /* vector_res = vqdmull_n(vector,val), then store the result. */ +#define TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##_n_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQDMULL_N1(INSN, T1, T2, W, W2, N, L) \ + TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L) + +#define TEST_VQDMULL_N(T1, T2, W, W2, N, L) \ + TEST_VQDMULL_N1(INSN, T1, T2, W, W2, N, L) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + /* Initialize vector */ + TEST_VDUP(vector, , int, s, 16, 4, 0x1000); + TEST_VDUP(vector, , int, s, 32, 2, 0x1000); + + /* Initialize vector2 */ + TEST_VDUP(vector2, , int, s, 16, 4, 0x4); + TEST_VDUP(vector2, , int, s, 32, 2, 0x2); + + /* Choose multiplier arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQDMULL_N(int, s, 16, 32, 4, 0x22); + TEST_VQDMULL_N(int, s, 32, 64, 2, 0x55); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + + + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check mul overflow)"); + TEST_VQDMULL_N(int, s, 16, 32, 4, 0x8000); + TEST_VQDMULL_N(int, s, 32, 64, 2, 0x80000000); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check mul overflow)"); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + DUMP(TEST_MSG, int, 64, 2, PRIx64); +} diff --git a/ref_vqmovn.c b/ref_vqmovn.c new file mode 100644 index 0000000..c70c034 --- /dev/null +++ b/ref_vqmovn.c @@ -0,0 +1,112 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vqmovn +#define TEST_MSG "VQMOVN" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_##T2##W2(VECT_VAR(vector, T1, W2, N)); \ + vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W2)) + +#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N) \ + TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + /* Fill input vector with arbitrary values */ + TEST_VDUP(vector, q, int, s, 16, 8, 0x12); + TEST_VDUP(vector, q, int, s, 32, 4, 0x1278); + TEST_VDUP(vector, q, int, s, 64, 2, 0x12345678); + TEST_VDUP(vector, q, uint, u, 16, 8, 0x82); + TEST_VDUP(vector, q, uint, u, 32, 4, 0x8765); + TEST_VDUP(vector, q, uint, u, 64, 2, 0x87654321); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8); + TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4); + TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2); + TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8); + TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4); + TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2); + + dump_results_hex (TEST_MSG); + + + /* Fill input vector with arbitrary values which cause an overflow */ + TEST_VDUP(vector, q, int, s, 16, 8, 0x1234); + TEST_VDUP(vector, q, int, s, 32, 4, 0x12345678); + TEST_VDUP(vector, q, int, s, 64, 2, 0x1234567890ABLL); + TEST_VDUP(vector, q, uint, u, 16, 8, 0x8234); + TEST_VDUP(vector, q, uint, u, 32, 4, 0x87654321); + TEST_VDUP(vector, q, uint, u, 64, 2, 0x8765432187654321ULL); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8); + TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4); + TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2); + TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8); + TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4); + TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqmovun.c b/ref_vqmovun.c new file mode 100644 index 0000000..729d6c4 --- /dev/null +++ b/ref_vqmovun.c @@ -0,0 +1,93 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN_NAME vqmovun +#define TEST_MSG "VQMOVUN" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x), then store the result. */ +#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##_s##W2(VECT_VAR(vector, int, W2, N)); \ + vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##_s##W2)) + +#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N) \ + TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + /* Fill input vector with arbitrary values */ + TEST_VDUP(vector, q, int, s, 16, 8, 0x34); + TEST_VDUP(vector, q, int, s, 32, 4, 0x5678); + TEST_VDUP(vector, q, int, s, 64, 2, 0x12345678); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8); + TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4); + TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2); + + dump_results_hex (TEST_MSG); + + /* Fill input vector with negative values */ + TEST_VDUP(vector, q, int, s, 16, 8, 0x8234); + TEST_VDUP(vector, q, int, s, 32, 4, 0x87654321); + TEST_VDUP(vector, q, int, s, 64, 2, 0x8765432187654321LL); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (negative input)"); + TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8); + TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4); + TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2); + + dump_results_hex2 (TEST_MSG, " (negative input)"); +} diff --git a/ref_vqneg.c b/ref_vqneg.c new file mode 100644 index 0000000..4ae0af8 --- /dev/null +++ b/ref_vqneg.c @@ -0,0 +1,73 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqneg +#define TEST_MSG "VQNEG/VQNEGQ" + +/* Extra tests for functions requiring corner cases tests */ +void vqneg_extra(void); +#define EXTRA_TESTS vqneg_extra + +#include "ref_v_unary_sat_op.c" + +void vqneg_extra() +{ + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 8, 16); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + /* Initialize input "vector" with max negative values to check + saturation */ + TEST_VDUP(vector, , int, s, 8, 8, 0x80); + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector, q, int, s, 8, 16, 0x80); + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + + /* Apply a unary operator named INSN_NAME */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4); + TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8); + TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqrdmulh.c b/ref_vqrdmulh.c new file mode 100644 index 0000000..3c9ea4d --- /dev/null +++ b/ref_vqrdmulh.c @@ -0,0 +1,134 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqrdmulh +#define TEST_MSG "VQRDMULH" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* vector_res = vqrdmulh(vector,vector2), then store the result. */ +#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRDMULH1(INSN, Q, T1, T2, W, N) \ + TEST_VQRDMULH2(INSN, Q, T1, T2, W, N) + +#define TEST_VQRDMULH(Q, T1, T2, W, N) \ + TEST_VQRDMULH1(INSN, Q, T1, T2, W, N) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + + clean_results (); + + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2 */ + TEST_VDUP(vector2, , int, s, 16, 4, 0x5555); + TEST_VDUP(vector2, , int, s, 32, 2, 0xBB); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x33); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x22); + + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQRDMULH(, int, s, 16, 4); + TEST_VQRDMULH(, int, s, 32, 2); + TEST_VQRDMULH(q, int, s, 16, 8); + TEST_VQRDMULH(q, int, s, 32, 4); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); + + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + TEST_VDUP(vector2, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000000); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check mul overflow)"); + TEST_VQRDMULH(, int, s, 16, 4); + TEST_VQRDMULH(, int, s, 32, 2); + TEST_VQRDMULH(q, int, s, 16, 8); + TEST_VQRDMULH(q, int, s, 32, 4); + dump_results_hex2 (TEST_MSG, " (check mul overflow)"); + + + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + TEST_VDUP(vector2, , int, s, 16, 4, 0x8001); + TEST_VDUP(vector2, , int, s, 32, 2, 0x80000001); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x8001); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000001); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check rounding overflow)"); + TEST_VQRDMULH(, int, s, 16, 4); + TEST_VQRDMULH(, int, s, 32, 2); + TEST_VQRDMULH(q, int, s, 16, 8); + TEST_VQRDMULH(q, int, s, 32, 4); + dump_results_hex2 (TEST_MSG, " (check rounding overflow)"); +} diff --git a/ref_vqrdmulh_lane.c b/ref_vqrdmulh_lane.c new file mode 100644 index 0000000..d26eda4 --- /dev/null +++ b/ref_vqrdmulh_lane.c @@ -0,0 +1,133 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqrdmulh +#define TEST_MSG "VQRDMULH_LANE" + +#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* vector_res = vqrdmulh_lane(vector,vector2,lane), then store the result. */ +#define TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N2), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_lane_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L) \ + TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) + +#define TEST_VQRDMULH_LANE(Q, T1, T2, W, N, N2, L) \ + TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + /* vector2: vqrdmulh_lane and vqrdmulhq_lane have a 2nd argument with + the same number of elements, so we need only one variable of each + type. */ + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2 */ + TEST_VDUP(vector2, , int, s, 16, 4, 0x55); + TEST_VDUP(vector2, , int, s, 32, 2, 0xBB); + + /* Choose lane arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2); + TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1); + TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3); + TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0); + + /* FIXME: only a subset of the result buffers are used, but we + output all of them */ + dump_results_hex (TEST_MSG); + + + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + TEST_VDUP(vector2, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check mul overflow)"); + TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2); + TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1); + TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3); + TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0); + dump_results_hex2 (TEST_MSG, " (check mul overflow)"); + + + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + TEST_VDUP(vector2, , int, s, 16, 4, 0x8001); + TEST_VDUP(vector2, , int, s, 32, 2, 0x80000001); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check rounding overflow)"); + TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2); + TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1); + TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3); + TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0); + dump_results_hex2 (TEST_MSG, " (check rounding overflow)"); +} diff --git a/ref_vqrdmulh_n.c b/ref_vqrdmulh_n.c new file mode 100644 index 0000000..c780f21 --- /dev/null +++ b/ref_vqrdmulh_n.c @@ -0,0 +1,122 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqrdmulh +#define TEST_MSG "VQRDMULH_N" + +#define FNNAME1(NAME) void exec_ ## NAME ## _n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + int i; + + /* vector_res = vqrdmulh_n(vector,val), then store the result. */ +#define TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_n_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L) \ + TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L) + +#define TEST_VQRDMULH_N(Q, T1, T2, W, N, L) \ + TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + + clean_results (); + + TEST_VLOAD(vector, buffer, , int, s, 16, 4); + TEST_VLOAD(vector, buffer, , int, s, 32, 2); + + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + + /* Choose multiplier arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQRDMULH_N(, int, s, 16, 4, 0x2233); + TEST_VQRDMULH_N(, int, s, 32, 2, 0x12345678); + TEST_VQRDMULH_N(q, int, s, 16, 8, 0xCD12); + TEST_VQRDMULH_N(q, int, s, 32, 4, 0xFA23456); + + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + DUMP(TEST_MSG, int, 16, 4, PRIx16); + DUMP(TEST_MSG, int, 32, 2, PRIx32); + DUMP(TEST_MSG, int, 16, 8, PRIx16); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + + + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check mul overflow)"); + TEST_VQRDMULH_N(, int, s, 16, 4, 0x8000); + TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000000); + TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8000); + TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000000); + dump_results_hex2 (TEST_MSG, " (check mul overflow)"); + + + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check rounding overflow)"); + TEST_VQRDMULH_N(, int, s, 16, 4, 0x8001); + TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000001); + TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8001); + TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000001); + dump_results_hex2 (TEST_MSG, " (check rounding overflow)"); +} diff --git a/ref_vqrshl.c b/ref_vqrshl.c new file mode 100644 index 0000000..b266e71 --- /dev/null +++ b/ref_vqrshl.c @@ -0,0 +1,199 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqrshl +#define TEST_MSG "VQRSHL/VQRSHLQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: v3=vqrshl(v1,v2), then store the result. */ +#define TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector_shift, T3, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N) \ + TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N) + +#define TEST_VQRSHL(T3, Q, T1, T2, W, N) \ + TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); + + clean_results (); + + /* Fill input vector with 0, to check saturation on limits */ + TEST_VDUP(vector, , int, s, 8, 8, 0); + TEST_VDUP(vector, , int, s, 16, 4, 0); + TEST_VDUP(vector, , int, s, 32, 2, 0); + TEST_VDUP(vector, , int, s, 64, 1, 0); + TEST_VDUP(vector, , uint, u, 8, 8, 0); + TEST_VDUP(vector, , uint, u, 16, 4, 0); + TEST_VDUP(vector, , uint, u, 32, 2, 0); + TEST_VDUP(vector, , uint, u, 64, 1, 0); + TEST_VDUP(vector, q, int, s, 8, 16, 0); + TEST_VDUP(vector, q, int, s, 16, 8, 0); + TEST_VDUP(vector, q, int, s, 32, 4, 0); + TEST_VDUP(vector, q, int, s, 64, 2, 0); + TEST_VDUP(vector, q, uint, u, 8, 16, 0); + TEST_VDUP(vector, q, uint, u, 16, 8, 0); + TEST_VDUP(vector, q, uint, u, 32, 4, 0); + TEST_VDUP(vector, q, uint, u, 64, 2, 0); + + /* Choose init value arbitrarily, will be used as shift amount */ + /* Use values equal or one-less-than the type width to check + behaviour on limits */ + TEST_VDUP(vector_shift, , int, s, 8, 8, 7); + TEST_VDUP(vector_shift, , int, s, 16, 4, 15); + TEST_VDUP(vector_shift, , int, s, 32, 2, 31); + TEST_VDUP(vector_shift, , int, s, 64, 1, 63); + TEST_VDUP(vector_shift, q, int, s, 8, 16, 8); + TEST_VDUP(vector_shift, q, int, s, 16, 8, 16); + TEST_VDUP(vector_shift, q, int, s, 32, 4, 32); + TEST_VDUP(vector_shift, q, int, s, 64, 2, 64); + + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (with input = 0)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, " (with input = 0)"); + + /* Use negative shift amounts */ + TEST_VDUP(vector_shift, , int, s, 8, 8, -1); + TEST_VDUP(vector_shift, , int, s, 16, 4, -2); + TEST_VDUP(vector_shift, , int, s, 32, 2, -3); + TEST_VDUP(vector_shift, , int, s, 64, 1, -4); + TEST_VDUP(vector_shift, q, int, s, 8, 16, -7); + TEST_VDUP(vector_shift, q, int, s, 16, 8, -11); + TEST_VDUP(vector_shift, q, int, s, 32, 4, -13); + TEST_VDUP(vector_shift, q, int, s, 64, 2, -20); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (input 0 and negative shift amount)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, " (input 0 and negative shift amount)"); + + /* Test again, with predefined input values */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Choose init value arbitrarily, will be used as shift amount */ + TEST_VDUP(vector_shift, , int, s, 8, 8, 1); + TEST_VDUP(vector_shift, , int, s, 16, 4, 3); + TEST_VDUP(vector_shift, , int, s, 32, 2, 8); + TEST_VDUP(vector_shift, , int, s, 64, 1, 3); + TEST_VDUP(vector_shift, q, int, s, 8, 16, 10); + TEST_VDUP(vector_shift, q, int, s, 16, 8, 12); + TEST_VDUP(vector_shift, q, int, s, 32, 4, 31); + TEST_VDUP(vector_shift, q, int, s, 64, 2, 63); + + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex (TEST_MSG); + + /* Use negative shift amounts */ + TEST_VDUP(vector_shift, , int, s, 8, 8, -2); + TEST_VDUP(vector_shift, , int, s, 16, 4, -2); + TEST_VDUP(vector_shift, , int, s, 32, 2, -3); + TEST_VDUP(vector_shift, , int, s, 64, 1, -4); + TEST_VDUP(vector_shift, q, int, s, 8, 16, -7); + TEST_VDUP(vector_shift, q, int, s, 16, 8, -11); + TEST_VDUP(vector_shift, q, int, s, 32, 4, -13); + TEST_VDUP(vector_shift, q, int, s, 64, 2, -20); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (negative shift amount)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, " (negative shift amount)"); + + + /* Fill input vector with max value, to check saturation on limits */ + TEST_VDUP(vector, , int, s, 8, 8, 0x7F); + TEST_VDUP(vector, , int, s, 16, 4, 0x7FFF); + TEST_VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + TEST_VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, , uint, u, 8, 8, 0xFF); + TEST_VDUP(vector, , uint, u, 16, 4, 0xFFFF); + TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + TEST_VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + TEST_VDUP(vector, q, int, s, 8, 16, 0x7F); + TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF); + TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, q, uint, u, 8, 16, 0xFF); + TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* Use -1 shift amount to check overflow with round_const */ + TEST_VDUP(vector_shift, , int, s, 8, 8, -1); + TEST_VDUP(vector_shift, , int, s, 16, 4, -1); + TEST_VDUP(vector_shift, , int, s, 32, 2, -1); + TEST_VDUP(vector_shift, , int, s, 64, 1, -1); + TEST_VDUP(vector_shift, q, int, s, 8, 16, -1); + TEST_VDUP(vector_shift, q, int, s, 16, 8, -1); + TEST_VDUP(vector_shift, q, int, s, 32, 4, -1); + TEST_VDUP(vector_shift, q, int, s, 64, 2, -1); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (checking overflow: shift by -1)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by -1)"); + + + /* Use -3 shift amount to check overflow with round_const */ + TEST_VDUP(vector_shift, , int, s, 8, 8, -3); + TEST_VDUP(vector_shift, , int, s, 16, 4, -3); + TEST_VDUP(vector_shift, , int, s, 32, 2, -3); + TEST_VDUP(vector_shift, , int, s, 64, 1, -3); + TEST_VDUP(vector_shift, q, int, s, 8, 16, -3); + TEST_VDUP(vector_shift, q, int, s, 16, 8, -3); + TEST_VDUP(vector_shift, q, int, s, 32, 4, -3); + TEST_VDUP(vector_shift, q, int, s, 64, 2, -3); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (checking overflow: shift by -3)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int); + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by -3)"); +} diff --git a/ref_vqrshrn_n.c b/ref_vqrshrn_n.c new file mode 100644 index 0000000..f8eb4ec --- /dev/null +++ b/ref_vqrshrn_n.c @@ -0,0 +1,133 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqrshrn_n +#define TEST_MSG "VQRSHRN_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqrshrn_n(x,v), then store the result. */ +#define TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V) \ + TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V) + +#define TEST_VQRSHRN_N(T1, T2, W, W2, N, V) \ + TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* vector is twice as large as vector_res */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, int, s, 64, 2); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Choose shift amount arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQRSHRN_N(int, s, 16, 8, 8, 1); + TEST_VQRSHRN_N(int, s, 32, 16, 4, 1); + TEST_VQRSHRN_N(int, s, 64, 32, 2, 2); + TEST_VQRSHRN_N(uint, u, 16, 8, 8, 2); + TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3); + TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + /* Another set of tests */ + TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF); + TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* shift by 3 to exercise saturation code in the lib */ + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check saturation: shift by 3)"); + TEST_VQRSHRN_N(int, s, 16, 8, 8, 3); + TEST_VQRSHRN_N(int, s, 32, 16, 4, 3); + TEST_VQRSHRN_N(int, s, 64, 32, 2, 3); + TEST_VQRSHRN_N(uint, u, 16, 8, 8, 3); + TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3); + TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (check saturation: shift by 3)"); + + + /* shift by max to exercise saturation code in the lib */ + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check saturation: shift by max)"); + TEST_VQRSHRN_N(int, s, 16, 8, 8, 8); + TEST_VQRSHRN_N(int, s, 32, 16, 4, 16); + TEST_VQRSHRN_N(int, s, 64, 32, 2, 32); + TEST_VQRSHRN_N(uint, u, 16, 8, 8, 8); + TEST_VQRSHRN_N(uint, u, 32, 16, 4, 16); + TEST_VQRSHRN_N(uint, u, 64, 32, 2, 32); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (check saturation: shift by max)"); +} diff --git a/ref_vqrshrun_n.c b/ref_vqrshrun_n.c new file mode 100644 index 0000000..48ccbff --- /dev/null +++ b/ref_vqrshrun_n.c @@ -0,0 +1,138 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqrshrun_n +#define TEST_MSG "VQRSHRUN_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqrshrun_n(x,v), then store the result. */ +#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, uint, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_u##W2(VECT_VAR(result, uint, W2, N), \ + VECT_VAR(vector_res, uint, W2, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V) \ + TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V) + +#define TEST_VQRSHRUN_N(T1, T2, W, W2, N, V) \ + TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* vector is twice as large as vector_res */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + /* Fill input vector with negative values, to check saturation on limits */ + TEST_VDUP(vector, q, int, s, 16, 8, -2); + TEST_VDUP(vector, q, int, s, 32, 4, -3); + TEST_VDUP(vector, q, int, s, 64, 2, -4); + + /* Choose shift amount arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (negative input)"); + TEST_VQRSHRUN_N(int, s, 16, 8, 8, 3); + TEST_VQRSHRUN_N(int, s, 32, 16, 4, 4); + TEST_VQRSHRUN_N(int, s, 64, 32, 2, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (negative input)"); + + /* Fill input vector with max value, to check saturation on limits */ + TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF); + TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + + /* shift by 1 */ + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check saturation/overflow: shift by 1)"); + TEST_VQRSHRUN_N(int, s, 16, 8, 8, 1); + TEST_VQRSHRUN_N(int, s, 32, 16, 4, 1); + TEST_VQRSHRUN_N(int, s, 64, 32, 2, 1); + + dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by 1)"); + + /* shift by max */ + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check saturation/overflow: shift by max, positive input)"); + TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8); + TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16); + TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32); + + dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by max, positive input)"); + + + /* Fill input vector with min value, to check saturation on limits */ + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + TEST_VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); + + /* shift by max */ + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check saturation/overflow: shift by max, negative input)"); + TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8); + TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16); + TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32); + + dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by max, negative input)"); + + /* Fill input vector with positive values, to check normal case */ + TEST_VDUP(vector, q, int, s, 16, 8, 0x1234); + TEST_VDUP(vector, q, int, s, 32, 4, 0x87654321); + TEST_VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF); + + /* shift arbitrary amount */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQRSHRUN_N(int, s, 16, 8, 8, 6); + TEST_VQRSHRUN_N(int, s, 32, 16, 4, 7); + TEST_VQRSHRUN_N(int, s, 64, 32, 2, 8); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqshl.c b/ref_vqshl.c new file mode 100644 index 0000000..1b40c92 --- /dev/null +++ b/ref_vqshl.c @@ -0,0 +1,239 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqshl +#define TEST_MSG "VQSHL/VQSHLQ" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: v3=vqshl(v1,v2), then store the result. */ +#define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector_shift, T3, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N) \ + TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N) + +#define TEST_VQSHL(T3, Q, T1, T2, W, N) \ + TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); + + clean_results (); + + /* Fill input vector with 0, to check saturation on limits */ + TEST_VDUP(vector, , int, s, 8, 8, 0); + TEST_VDUP(vector, , int, s, 16, 4, 0); + TEST_VDUP(vector, , int, s, 32, 2, 0); + TEST_VDUP(vector, , int, s, 64, 1, 0); + TEST_VDUP(vector, , uint, u, 8, 8, 0); + TEST_VDUP(vector, , uint, u, 16, 4, 0); + TEST_VDUP(vector, , uint, u, 32, 2, 0); + TEST_VDUP(vector, , uint, u, 64, 1, 0); + TEST_VDUP(vector, q, int, s, 8, 16, 0); + TEST_VDUP(vector, q, int, s, 16, 8, 0); + TEST_VDUP(vector, q, int, s, 32, 4, 0); + TEST_VDUP(vector, q, int, s, 64, 2, 0); + TEST_VDUP(vector, q, uint, u, 8, 16, 0); + TEST_VDUP(vector, q, uint, u, 16, 8, 0); + TEST_VDUP(vector, q, uint, u, 32, 4, 0); + TEST_VDUP(vector, q, uint, u, 64, 2, 0); + + /* Choose init value arbitrarily, will be used as shift amount */ + /* Use values equal or one-less-than the type width to check + behaviour on limits */ + /* Shift all lanes by 7 ... */ + TEST_VDUP(vector_shift, , int, s, 8, 8, 7); + /* except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9) */ + TEST_VSET_LANE(vector_shift, , int, s, 8, 8, 0, 6); + TEST_VSET_LANE(vector_shift, , int, s, 8, 8, 1, 8); + TEST_VSET_LANE(vector_shift, , int, s, 8, 8, 2, 9); + + TEST_VDUP(vector_shift, , int, s, 16, 4, 15); + TEST_VSET_LANE(vector_shift, , int, s, 16, 4, 0, 14); + TEST_VSET_LANE(vector_shift, , int, s, 16, 4, 1, 16); + TEST_VSET_LANE(vector_shift, , int, s, 16, 4, 2, 17); + + TEST_VDUP(vector_shift, , int, s, 32, 2, 31); + TEST_VSET_LANE(vector_shift, , int, s, 32, 2, 1, 30); + + TEST_VDUP(vector_shift, , int, s, 64, 1, 63); + + TEST_VDUP(vector_shift, q, int, s, 8, 16, 8); + TEST_VDUP(vector_shift, q, int, s, 16, 8, 16); + TEST_VDUP(vector_shift, q, int, s, 32, 4, 32); + TEST_VSET_LANE(vector_shift, q, int, s, 32, 4, 1, 33); + + TEST_VDUP(vector_shift, q, int, s, 64, 2, 64); + TEST_VSET_LANE(vector_shift, q, int, s, 64, 2, 1, 62); + + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (with input = 0)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (with input = 0)"); + + + /* Use negative shift amounts */ + TEST_VDUP(vector_shift, , int, s, 8, 8, -1); + TEST_VDUP(vector_shift, , int, s, 16, 4, -2); + TEST_VDUP(vector_shift, , int, s, 32, 2, -3); + TEST_VDUP(vector_shift, , int, s, 64, 1, -4); + TEST_VDUP(vector_shift, q, int, s, 8, 16, -7); + TEST_VDUP(vector_shift, q, int, s, 16, 8, -11); + TEST_VDUP(vector_shift, q, int, s, 32, 4, -13); + TEST_VDUP(vector_shift, q, int, s, 64, 2, -20); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (input 0 and negative shift amount)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (input 0 and negative shift amount)"); + + /* Test again, with predefined input values */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Choose init value arbitrarily, will be used as shift amount */ + TEST_VDUP(vector_shift, , int, s, 8, 8, 1); + TEST_VDUP(vector_shift, , int, s, 16, 4, 3); + TEST_VDUP(vector_shift, , int, s, 32, 2, 8); + TEST_VDUP(vector_shift, , int, s, 64, 1, -3); + TEST_VDUP(vector_shift, q, int, s, 8, 16, 10); + TEST_VDUP(vector_shift, q, int, s, 16, 8, 12); + TEST_VDUP(vector_shift, q, int, s, 32, 4, 32); + TEST_VDUP(vector_shift, q, int, s, 64, 2, 63); + + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex (TEST_MSG); + + /* Use negative shift amounts */ + TEST_VDUP(vector_shift, , int, s, 8, 8, -1); + TEST_VDUP(vector_shift, , int, s, 16, 4, -2); + TEST_VDUP(vector_shift, , int, s, 32, 2, -3); + TEST_VDUP(vector_shift, , int, s, 64, 1, -4); + TEST_VDUP(vector_shift, q, int, s, 8, 16, -7); + TEST_VDUP(vector_shift, q, int, s, 16, 8, -11); + TEST_VDUP(vector_shift, q, int, s, 32, 4, -13); + TEST_VDUP(vector_shift, q, int, s, 64, 2, -20); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (negative shift amount)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (negative shift amount)"); + + /* Use large shift amounts */ + TEST_VDUP(vector_shift, , int, s, 8, 8, 8); + TEST_VDUP(vector_shift, , int, s, 16, 4, 16); + TEST_VDUP(vector_shift, , int, s, 32, 2, 32); + TEST_VDUP(vector_shift, , int, s, 64, 1, 64); + TEST_VDUP(vector_shift, q, int, s, 8, 16, 8); + TEST_VDUP(vector_shift, q, int, s, 16, 8, 16); + TEST_VDUP(vector_shift, q, int, s, 32, 4, 32); + TEST_VDUP(vector_shift, q, int, s, 64, 2, 64); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (large shift amount, negative input)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (large shift amount, negative input)"); + + /* Fill input vector with max value, to check saturation on limits */ + TEST_VDUP(vector, , int, s, 8, 8, 0x7F); + TEST_VDUP(vector, , int, s, 16, 4, 0x7FFF); + TEST_VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + TEST_VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, , uint, u, 8, 8, 0xFF); + TEST_VDUP(vector, , uint, u, 16, 4, 0xFFFF); + TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + TEST_VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + TEST_VDUP(vector, q, int, s, 8, 16, 0x7F); + TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF); + TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, q, uint, u, 8, 16, 0xFF); + TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* Shift by -1 */ + TEST_VDUP(vector_shift, , int, s, 8, 8, -1); + TEST_VDUP(vector_shift, , int, s, 16, 4, -1); + TEST_VDUP(vector_shift, , int, s, 32, 2, -1); + TEST_VDUP(vector_shift, , int, s, 64, 1, -1); + TEST_VDUP(vector_shift, q, int, s, 8, 16, -1); + TEST_VDUP(vector_shift, q, int, s, 16, 8, -1); + TEST_VDUP(vector_shift, q, int, s, 32, 4, -1); + TEST_VDUP(vector_shift, q, int, s, 64, 2, -1); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check saturation/overflow)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (check saturation/overflow)"); + + /* Use large shift amounts */ + TEST_VDUP(vector_shift, , int, s, 8, 8, 8); + TEST_VDUP(vector_shift, , int, s, 16, 4, 16); + TEST_VDUP(vector_shift, , int, s, 32, 2, 32); + TEST_VDUP(vector_shift, , int, s, 64, 1, 64); + TEST_VDUP(vector_shift, q, int, s, 8, 16, 8); + TEST_VDUP(vector_shift, q, int, s, 16, 8, 16); + TEST_VDUP(vector_shift, q, int, s, 32, 4, 32); + TEST_VDUP(vector_shift, q, int, s, 64, 2, 64); + + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (large shift amount, positive input)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (large shift amount, positive input)"); + + /* Check 64 bits saturation */ + TEST_VDUP(vector, , int, s, 64, 1, -10); + TEST_VDUP(vector_shift, , int, s, 64, 1, 64); + TEST_VDUP(vector, q, int, s, 64, 2, 10); + TEST_VDUP(vector_shift, q, int, s, 64, 2, 64); + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check saturation on 64 bits)"); + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int); + dump_results_hex2 (TEST_MSG, " (check saturation on 64 bits)"); +} diff --git a/ref_vqshl_n.c b/ref_vqshl_n.c new file mode 100644 index 0000000..799a773 --- /dev/null +++ b/ref_vqshl_n.c @@ -0,0 +1,130 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqshl +#define TEST_MSG "VQSHL_N/VQSHLQ_N" + +#define FNNAME1(NAME) void exec_ ## NAME ##_n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: v2=vqshl_n(v1,v), then store the result. */ +#define TEST_VQSHL_N2(INSN, Q, T1, T2, W, N, V) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_n_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N) \ + TEST_VQSHL_N2(INSN, T3, Q, T1, T2, W, N) + +#define TEST_VQSHL_N(T3, Q, T1, T2, W, N) \ + TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Choose shift amount arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQSHL_N(, int, s, 8, 8, 2); + TEST_VQSHL_N(, int, s, 16, 4, 1); + TEST_VQSHL_N(, int, s, 32, 2, 1); + TEST_VQSHL_N(, int, s, 64, 1, 2); + TEST_VQSHL_N(, uint, u, 8, 8, 3); + TEST_VQSHL_N(, uint, u, 16, 4, 2); + TEST_VQSHL_N(, uint, u, 32, 2, 3); + TEST_VQSHL_N(, uint, u, 64, 1, 3); + + TEST_VQSHL_N(q, int, s, 8, 16, 2); + TEST_VQSHL_N(q, int, s, 16, 8, 1); + TEST_VQSHL_N(q, int, s, 32, 4, 1); + TEST_VQSHL_N(q, int, s, 64, 2, 2); + TEST_VQSHL_N(q, uint, u, 8, 16, 3); + TEST_VQSHL_N(q, uint, u, 16, 8, 2); + TEST_VQSHL_N(q, uint, u, 32, 4, 3); + TEST_VQSHL_N(q, uint, u, 64, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + + /* Fill input vector with max value, to check saturation on limits */ + TEST_VDUP(vector, , int, s, 8, 8, 0x7F); + TEST_VDUP(vector, , int, s, 16, 4, 0x7FFF); + TEST_VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + TEST_VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, , uint, u, 8, 8, 0xFF); + TEST_VDUP(vector, , uint, u, 16, 4, 0xFFFF); + TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + TEST_VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + TEST_VDUP(vector, q, int, s, 8, 16, 0x7F); + TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF); + TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, q, uint, u, 8, 16, 0xFF); + TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (check saturation with large positive input)"); + TEST_VQSHL_N(, int, s, 8, 8, 2); + TEST_VQSHL_N(, int, s, 16, 4, 1); + TEST_VQSHL_N(, int, s, 32, 2, 1); + TEST_VQSHL_N(, int, s, 64, 1, 2); + TEST_VQSHL_N(, uint, u, 8, 8, 3); + TEST_VQSHL_N(, uint, u, 16, 4, 2); + TEST_VQSHL_N(, uint, u, 32, 2, 3); + TEST_VQSHL_N(, uint, u, 64, 1, 3); + + TEST_VQSHL_N(q, int, s, 8, 16, 2); + TEST_VQSHL_N(q, int, s, 16, 8, 1); + TEST_VQSHL_N(q, int, s, 32, 4, 1); + TEST_VQSHL_N(q, int, s, 64, 2, 2); + TEST_VQSHL_N(q, uint, u, 8, 16, 3); + TEST_VQSHL_N(q, uint, u, 16, 8, 2); + TEST_VQSHL_N(q, uint, u, 32, 4, 3); + TEST_VQSHL_N(q, uint, u, 64, 2, 3); + + dump_results_hex2 (TEST_MSG, " (check saturation with large positive input)"); +} diff --git a/ref_vqshlu_n.c b/ref_vqshlu_n.c new file mode 100644 index 0000000..d7a2a4c --- /dev/null +++ b/ref_vqshlu_n.c @@ -0,0 +1,155 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqshlu +#define TEST_MSG "VQSHLU_N/VQSHLUQ_N" + +#define FNNAME1(NAME) void exec_ ## NAME ## _n(void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: v2=vqshlu_n(v1,v), then store the result. */ +#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T3, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N), \ + VECT_VAR(vector_res, T3, W, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_n_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V) \ + TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V) + +#define TEST_VQSHLU_N(Q, T1, T2, T3, T4, W, N, V) \ + TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Fill input vector with negative values, to check saturation on limits */ + TEST_VDUP(vector, , int, s, 8, 8, -1); + TEST_VDUP(vector, , int, s, 16, 4, -2); + TEST_VDUP(vector, , int, s, 32, 2, -3); + TEST_VDUP(vector, , int, s, 64, 1, -4); + TEST_VDUP(vector, q, int, s, 8, 16, -1); + TEST_VDUP(vector, q, int, s, 16, 8, -2); + TEST_VDUP(vector, q, int, s, 32, 4, -3); + TEST_VDUP(vector, q, int, s, 64, 2, -4); + + /* Choose shift amount arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (negative input)"); + TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2); + TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1); + TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1); + TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2); + + TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2); + TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1); + TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1); + TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (negative input)"); + + /* Fill input vector with max value, to check saturation on limits */ + TEST_VDUP(vector, , int, s, 8, 8, 0x7F); + TEST_VDUP(vector, , int, s, 16, 4, 0x7FFF); + TEST_VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + TEST_VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, q, int, s, 8, 16, 0x7F); + TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF); + TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFULL); + + /* shift by 1 */ + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check saturation/overflow: shift by 1)"); + TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1); + TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1); + TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1); + TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 1); + + TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 1); + TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1); + TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1); + TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 1); + + dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by 1)"); + + /* shift by 2 to force saturation */ + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check saturation/overflow: shift by 2)"); + TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2); + TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2); + TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 2); + TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2); + + TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2); + TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 2); + TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 2); + TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2); + + dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by 2)"); + + /* Fill input vector with positive values, to check normal case */ + TEST_VDUP(vector, , int, s, 8, 8, 1); + TEST_VDUP(vector, , int, s, 16, 4, 2); + TEST_VDUP(vector, , int, s, 32, 2, 3); + TEST_VDUP(vector, , int, s, 64, 1, 4); + TEST_VDUP(vector, q, int, s, 8, 16, 5); + TEST_VDUP(vector, q, int, s, 16, 8, 6); + TEST_VDUP(vector, q, int, s, 32, 4, 7); + TEST_VDUP(vector, q, int, s, 64, 2, 8); + + /* shift arbitrary amount */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1); + TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2); + TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 3); + TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 4); + + TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 5); + TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 6); + TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 7); + TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 8); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqshrn_n.c b/ref_vqshrn_n.c new file mode 100644 index 0000000..4588bc0 --- /dev/null +++ b/ref_vqshrn_n.c @@ -0,0 +1,134 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqshrn_n +#define TEST_MSG "VQSHRN_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqshrn_n(x,v), then store the result. */ +#define TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, T1, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \ + VECT_VAR(vector_res, T1, W2, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V) \ + TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V) + +#define TEST_VQSHRN_N(T1, T2, W, W2, N, V) \ + TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* vector is twice as large as vector_res */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, int, s, 64, 2); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Choose shift amount arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQSHRN_N(int, s, 16, 8, 8, 1); + TEST_VQSHRN_N(int, s, 32, 16, 4, 1); + TEST_VQSHRN_N(int, s, 64, 32, 2, 2); + TEST_VQSHRN_N(uint, u, 16, 8, 8, 2); + TEST_VQSHRN_N(uint, u, 32, 16, 4, 3); + TEST_VQSHRN_N(uint, u, 64, 32, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + + /* Another set of tests */ + TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF); + TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* shift by 3 to exercise saturation code in the lib */ + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check saturation: shift by 3)"); + TEST_VQSHRN_N(int, s, 16, 8, 8, 3); + TEST_VQSHRN_N(int, s, 32, 16, 4, 3); + TEST_VQSHRN_N(int, s, 64, 32, 2, 3); + TEST_VQSHRN_N(uint, u, 16, 8, 8, 3); + TEST_VQSHRN_N(uint, u, 32, 16, 4, 3); + TEST_VQSHRN_N(uint, u, 64, 32, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (check saturation: shift by 3)"); + + + /* shift by max to exercise saturation code in the lib */ + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check saturation: shift by max)"); + TEST_VQSHRN_N(int, s, 16, 8, 8, 8); + TEST_VQSHRN_N(int, s, 32, 16, 4, 16); + TEST_VQSHRN_N(int, s, 64, 32, 2, 32); + TEST_VQSHRN_N(uint, u, 16, 8, 8, 8); + TEST_VQSHRN_N(uint, u, 32, 16, 4, 16); + TEST_VQSHRN_N(uint, u, 64, 32, 2, 32); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (check saturation: shift by max)"); +} diff --git a/ref_vqshrun_n.c b/ref_vqshrun_n.c new file mode 100644 index 0000000..f4e5a58 --- /dev/null +++ b/ref_vqshrun_n.c @@ -0,0 +1,114 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define INSN vqshrun_n +#define TEST_MSG "VQSHRUN_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqshrun_n(x,v), then store the result. */ +#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V) \ + Neon_Overflow = 0; \ + VECT_VAR(vector_res, uint, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_u##W2(VECT_VAR(result, uint, W2, N), \ + VECT_VAR(vector_res, uint, W2, N)); \ + dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W)) + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V) \ + TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V) + +#define TEST_VQSHRUN_N(T1, T2, W, W2, N, V) \ + TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V) + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* vector is twice as large as vector_res */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + /* Fill input vector with negative values, to check saturation on limits */ + TEST_VDUP(vector, q, int, s, 16, 8, -2); + TEST_VDUP(vector, q, int, s, 32, 4, -3); + TEST_VDUP(vector, q, int, s, 64, 2, -4); + + /* Choose shift amount arbitrarily */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (negative input)"); + TEST_VQSHRUN_N(int, s, 16, 8, 8, 3); + TEST_VQSHRUN_N(int, s, 32, 16, 4, 4); + TEST_VQSHRUN_N(int, s, 64, 32, 2, 2); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (negative input)"); + + /* Fill input vector with max value, to check saturation on limits */ + TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF); + TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + + /* shift by 1 */ + fprintf(ref_file, "\n%s overflow output:\n", + TEST_MSG " (check saturation/overflow)"); + TEST_VQSHRUN_N(int, s, 16, 8, 8, 1); + TEST_VQSHRUN_N(int, s, 32, 16, 4, 1); + TEST_VQSHRUN_N(int, s, 64, 32, 2, 1); + + dump_results_hex2 (TEST_MSG, " (check saturation/overflow)"); + + /* Fill input vector with positive values, to check normal case */ + TEST_VDUP(vector, q, int, s, 16, 8, 0x1234); + TEST_VDUP(vector, q, int, s, 32, 4, 0x87654321); + TEST_VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF); + + /* shift arbitrary amount */ + fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG); + TEST_VQSHRUN_N(int, s, 16, 8, 8, 6); + TEST_VQSHRUN_N(int, s, 32, 16, 4, 7); + TEST_VQSHRUN_N(int, s, 64, 32, 2, 8); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vqsub.c b/ref_vqsub.c new file mode 100644 index 0000000..3cd28ae --- /dev/null +++ b/ref_vqsub.c @@ -0,0 +1,156 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vqsub +#define TEST_MSG "VQSUB/VQSUBQ" + +/* Extra tests for functions requiring types larger than 64 bits to + compute saturation */ +void vqsub_64(void); +#define EXTRA_TESTS vqsub_64 + +#include "ref_v_binary_sat_op.c" + +void vqsub_64(void) +{ + int i; + + DECL_VARIABLE_ALL_VARIANTS(vector1); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + /* Initialize input "vector1" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector1, buffer); + + TEST_VDUP(vector2, , int, s, 64, 1, 0x0); + TEST_VDUP(vector2, , uint, u, 64, 1, 0x0); + TEST_VDUP(vector2, q, int, s, 64, 2, 0x0); + TEST_VDUP(vector2, q, uint, u, 64, 2, 0x0); + + fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + fprintf(ref_file, "\n64 bits saturation:\n"); + DUMP(TEST_MSG, int, 64, 1, PRIx64); + DUMP(TEST_MSG, uint, 64, 1, PRIx64); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); + + /* Another set of tests */ + TEST_VDUP(vector2, , int, s, 64, 1, 0x44); + TEST_VDUP(vector2, , uint, u, 64, 1, 0x88); + TEST_VDUP(vector2, q, int, s, 64, 2, 0x44); + TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88); + + fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + DUMP(TEST_MSG, int, 64, 1, PRIx64); + DUMP(TEST_MSG, uint, 64, 1, PRIx64); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); + + /* Another set of tests */ + TEST_VDUP(vector2, , int, s, 64, 1, 0x7fffffffffffffffLL); + TEST_VDUP(vector2, , uint, u, 64, 1, 0xffffffffffffffffULL); + + /* To check positive saturation, we need to write a positive value + in vector1 */ + TEST_VDUP(vector1, q, int, s, 64, 2, 0x3fffffffffffffffLL); + TEST_VDUP(vector2, q, int, s, 64, 2, 0x8000000000000000LL); + + TEST_VDUP(vector2, q, uint, u, 64, 2, 0xffffffffffffffffULL); + + fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2); + + DUMP(TEST_MSG, int, 64, 1, PRIx64); + DUMP(TEST_MSG, uint, 64, 1, PRIx64); + DUMP(TEST_MSG, int, 64, 2, PRIx64); + DUMP(TEST_MSG, uint, 64, 2, PRIx64); + + /* To improve coverage, check saturation with less than 64 bits too */ + fprintf(ref_file, "\nless than 64 bits saturation:\n"); + TEST_VDUP(vector2, , int, s, 8, 8, 0x7F); + TEST_VDUP(vector2, , int, s, 16, 4, 0x7FFF); + TEST_VDUP(vector2, , int, s, 32, 2, 0x7FFFFFFF); + TEST_VDUP(vector2, q, int, s, 8, 16, 0x7F); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x7FFF); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF); + + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4); + TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8); + TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4); + + DUMP(TEST_MSG, int, 8, 8, PRIx8); + DUMP(TEST_MSG, int, 16, 4, PRIx16); + DUMP(TEST_MSG, int, 32, 2, PRIx32); + DUMP(TEST_MSG, int, 8, 16, PRIx8); + DUMP(TEST_MSG, int, 16, 8, PRIx16); + DUMP(TEST_MSG, int, 32, 4, PRIx32); + + + TEST_VDUP(vector1, , uint, u, 8, 8, 0x10); + TEST_VDUP(vector1, , uint, u, 16, 4, 0x10); + TEST_VDUP(vector1, , uint, u, 32, 2, 0x10); + TEST_VDUP(vector1, q, uint, u, 8, 16, 0x10); + TEST_VDUP(vector1, q, uint, u, 16, 8, 0x10); + TEST_VDUP(vector1, q, uint, u, 32, 4, 0x10); + + TEST_VDUP(vector2, , uint, u, 8, 8, 0x20); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x20); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x20); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0x20); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0x20); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0x20); + + fprintf(ref_file, "\n%s less than 64 bits saturation overflow output:\n", + TEST_MSG); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4); + TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8); + TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4); + + DUMP(TEST_MSG, uint, 8, 8, PRIx8); + DUMP(TEST_MSG, uint, 16, 4, PRIx16); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 8, 16, PRIx8); + DUMP(TEST_MSG, uint, 16, 8, PRIx16); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); +} diff --git a/ref_vraddhn.c b/ref_vraddhn.c new file mode 100644 index 0000000..c6aac33 --- /dev/null +++ b/ref_vraddhn.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vraddhn +#define TEST_MSG "VRADDHN" + +#include "ref_vaddhn.c" diff --git a/ref_vrecpe.c b/ref_vrecpe.c new file mode 100644 index 0000000..4bb9714 --- /dev/null +++ b/ref_vrecpe.c @@ -0,0 +1,97 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VRECPE/VRECPEQ" +void exec_vrecpe(void) +{ + int i; + + /* Basic test: y=vrecpe(x), then store the result. */ +#define TEST_VRECPE(Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrecpe##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + clean_results (); + + /* Choose init value arbitrarily */ + TEST_VDUP(vector, , uint, u, 32, 2, 0x12345678); + TEST_VDUP(vector, , float, f, 32, 2, 1.9); + TEST_VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10); + TEST_VDUP(vector, q, float, f, 32, 4, 125); + + /* Apply the operator */ + TEST_VRECPE(, uint, u, 32, 2); + TEST_VRECPE(, float, f, 32, 2); + TEST_VRECPE(q, uint, u, 32, 4); + TEST_VRECPE(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s %s output:\n", TEST_MSG, " (positive input)"); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + /* Choose init value arbitrarily */ + TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + TEST_VDUP(vector, , float, f, 32, 2, -10.0); + TEST_VDUP(vector, q, uint, u, 32, 4, 0x89081234); + TEST_VDUP(vector, q, float, f, 32, 4, -125.0); + + /* Apply the operator */ + TEST_VRECPE(, uint, u, 32, 2); + TEST_VRECPE(, float, f, 32, 2); + TEST_VRECPE(q, uint, u, 32, 4); + TEST_VRECPE(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s %s output:\n", TEST_MSG, " (negative input)"); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vrecps.c b/ref_vrecps.c new file mode 100644 index 0000000..333fa07 --- /dev/null +++ b/ref_vrecps.c @@ -0,0 +1,76 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VRECPS/VRECPSQ" +void exec_vrecps(void) +{ + int i; + + /* Basic test: y=vrecps(x), then store the result. */ +#define TEST_VRECPS(Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrecps##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for integer variants */ + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector2, float, 32, 4); + + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + clean_results (); + + /* Choose init value arbitrarily */ + TEST_VDUP(vector, , float, f, 32, 2, 12.9); + TEST_VDUP(vector, q, float, f, 32, 4, 9.2); + + TEST_VDUP(vector2, , float, f, 32, 2, 8.9); + TEST_VDUP(vector2, q, float, f, 32, 4, 3.2); + + /* Apply the operator */ + TEST_VRECPS(, float, f, 32, 2); + TEST_VRECPS(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vreinterpret.c b/ref_vreinterpret.c new file mode 100644 index 0000000..abd5aa3 --- /dev/null +++ b/ref_vreinterpret.c @@ -0,0 +1,256 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VREINTERPRET/VREINTERPRETQ" + +void exec_vreinterpret (void) +{ + int i; + + /* Basic test: y=vreinterpret(x), then store the result. */ +#define TEST_VREINTERPRET(Q, T1, T2, W, N, TS1, TS2, WS, NS) \ + VECT_VAR(vector_res, T1, W, N) = \ + vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP(TEST_MSG, T1, W, N, PRIx##W); + +#define TEST_VREINTERPRET_FP(Q, T1, T2, W, N, TS1, TS2, WS, NS) \ + VECT_VAR(vector_res, T1, W, N) = \ + vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + DUMP_FP(TEST_MSG, T1, W, N, PRIx##W); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); + + /* The same result buffers are used multiple times, so output them + before overwriting them */ + fprintf(ref_file, "\n%s output:\n", TEST_MSG); + + + /* vreinterpret_s8_xx */ + TEST_VREINTERPRET(, int, s, 8, 8, int, s, 16, 4); + TEST_VREINTERPRET(, int, s, 8, 8, int, s, 32, 2); + TEST_VREINTERPRET(, int, s, 8, 8, int, s, 64, 1); + TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 8, 8); + TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 16, 4); + TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 32, 2); + TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 64, 1); + + /* vreinterpret_s16_xx */ + TEST_VREINTERPRET(, int, s, 16, 4, int, s, 8, 8); + TEST_VREINTERPRET(, int, s, 16, 4, int, s, 32, 2); + TEST_VREINTERPRET(, int, s, 16, 4, int, s, 64, 1); + TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 8, 8); + TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 16, 4); + TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 32, 2); + TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 64, 1); + + /* vreinterpret_s32_xx */ + TEST_VREINTERPRET(, int, s, 32, 2, int, s, 8, 8); + TEST_VREINTERPRET(, int, s, 32, 2, int, s, 16, 4); + TEST_VREINTERPRET(, int, s, 32, 2, int, s, 64, 1); + TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 8, 8); + TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 16, 4); + TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 32, 2); + TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 64, 1); + + /* vreinterpret_s64_xx */ + TEST_VREINTERPRET(, int, s, 64, 1, int, s, 8, 8); + TEST_VREINTERPRET(, int, s, 64, 1, int, s, 16, 4); + TEST_VREINTERPRET(, int, s, 64, 1, int, s, 32, 2); + TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 8, 8); + TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 16, 4); + TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 32, 2); + TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 64, 1); + + /* vreinterpret_u8_xx */ + TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 8, 8); + TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 16, 4); + TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 32, 2); + TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 64, 1); + TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 16, 4); + TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 32, 2); + TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 64, 1); + + /* vreinterpret_u16_xx */ + TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 8, 8); + TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 16, 4); + TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 32, 2); + TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 64, 1); + TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 8, 8); + TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 32, 2); + TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 64, 1); + + /* vreinterpret_u32_xx */ + TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 8, 8); + TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 16, 4); + TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 32, 2); + TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 64, 1); + TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 8, 8); + TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 16, 4); + TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 64, 1); + + /* vreinterpret_u64_xx */ + TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 8, 8); + TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 16, 4); + TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 32, 2); + TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 64, 1); + TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 8, 8); + TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 16, 4); + TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 32, 2); + + /* vreinterpretq_s8_xx */ + TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 16, 8); + TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 32, 4); + TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 64, 2); + TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 8, 16); + TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 16, 8); + TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 32, 4); + TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 64, 2); + + /* vreinterpretq_s16_xx */ + TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 8, 16); + TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 32, 4); + TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 64, 2); + TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 8, 16); + TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 16, 8); + TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 32, 4); + TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 64, 2); + + /* vreinterpretq_s32_xx */ + TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 8, 16); + TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 16, 8); + TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 64, 2); + TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 8, 16); + TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 16, 8); + TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 32, 4); + TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 64, 2); + + /* vreinterpretq_s64_xx */ + TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 8, 16); + TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 16, 8); + TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 32, 4); + TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 8, 16); + TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 16, 8); + TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 32, 4); + TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 64, 2); + + /* vreinterpretq_u16_xx */ + TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 8, 16); + TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 16, 8); + TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 32, 4); + TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 64, 2); + TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 8, 16); + TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 32, 4); + TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 64, 2); + + /* vreinterpretq_u32_xx */ + TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 8, 16); + TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 16, 8); + TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 32, 4); + TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 64, 2); + TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 8, 16); + TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 16, 8); + TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 64, 2); + + /* vreinterpretq_u64_xx */ + TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 8, 16); + TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 16, 8); + TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 32, 4); + TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 64, 2); + TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 8, 16); + TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 16, 8); + TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 32, 4); + + /* vreinterpretq_u8_xx */ + TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 8, 16); + TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 16, 8); + TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 32, 4); + TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 64, 2); + TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 16, 8); + TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 32, 4); + TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 64, 2); + + /* vreinterpret_f32_xx */ + TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 8, 8); + TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 16, 4); + TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 32, 2); + TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 64, 1); + TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 8, 8); + TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 16, 4); + TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 32, 2); + TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 64, 1); + + /* vreinterpretq_f32_xx */ + TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 8, 16); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 16, 8); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 32, 4); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 64, 2); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 8, 16); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 16, 8); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 32, 4); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 64, 2); + + /* vreinterpret_xx_f32 */ + TEST_VREINTERPRET(, int, s, 8, 8, float, f, 32, 2); + TEST_VREINTERPRET(, int, s, 16, 4, float, f, 32, 2); + TEST_VREINTERPRET(, int, s, 32, 2, float, f, 32, 2); + TEST_VREINTERPRET(, int, s, 64, 1, float, f, 32, 2); + TEST_VREINTERPRET(, uint, u, 8, 8, float, f, 32, 2); + TEST_VREINTERPRET(, uint, u, 16, 4, float, f, 32, 2); + TEST_VREINTERPRET(, uint, u, 32, 2, float, f, 32, 2); + TEST_VREINTERPRET(, uint, u, 64, 1, float, f, 32, 2); + + /* vreinterpretq_xx_f32 */ + TEST_VREINTERPRET(q, int, s, 8, 16, float, f, 32, 4); + TEST_VREINTERPRET(q, int, s, 16, 8, float, f, 32, 4); + TEST_VREINTERPRET(q, int, s, 32, 4, float, f, 32, 4); + TEST_VREINTERPRET(q, int, s, 64, 2, float, f, 32, 4); + TEST_VREINTERPRET(q, uint, u, 8, 16, float, f, 32, 4); + TEST_VREINTERPRET(q, uint, u, 16, 8, float, f, 32, 4); + TEST_VREINTERPRET(q, uint, u, 32, 4, float, f, 32, 4); + TEST_VREINTERPRET(q, uint, u, 64, 2, float, f, 32, 4); +} diff --git a/ref_vrev.c b/ref_vrev.c new file mode 100644 index 0000000..2f088c0 --- /dev/null +++ b/ref_vrev.c @@ -0,0 +1,96 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* Template file for unary operator validation */ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vrev (void) +{ + /* Basic test: y=vrev(x), then store the result. */ +#define TEST_VREV(Q, T1, T2, W, N, W2) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrev##W2##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Check vrev in each of the existing combinations */ +#define TEST_MSG "VREV16" + TEST_VREV(, int, s, 8, 8, 16); + TEST_VREV(, uint, u, 8, 8, 16); + TEST_VREV(q, int, s, 8, 16, 16); + TEST_VREV(q, uint, u, 8, 16, 16); + dump_results_hex (TEST_MSG); + +#undef TEST_MSG +#define TEST_MSG "VREV32" + TEST_VREV(, int, s, 8, 8, 32); + TEST_VREV(, int, s, 16, 4, 32); + TEST_VREV(, uint, u, 8, 8, 32); + TEST_VREV(, uint, u, 16, 4, 32); + TEST_VREV(q, int, s, 8, 16, 32); + TEST_VREV(q, int, s, 16, 8, 32); + TEST_VREV(q, uint, u, 8, 16, 32); + TEST_VREV(q, uint, u, 16, 8, 32); + dump_results_hex (TEST_MSG); + +#undef TEST_MSG +#define TEST_MSG "VREV64" + TEST_VREV(, int, s, 8, 8, 64); + TEST_VREV(, int, s, 16, 4, 64); + TEST_VREV(, int, s, 32, 2, 64); + TEST_VREV(, uint, u, 8, 8, 64); + TEST_VREV(, uint, u, 16, 4, 64); + TEST_VREV(, uint, u, 32, 2, 64); + TEST_VREV(q, int, s, 8, 16, 64); + TEST_VREV(q, int, s, 16, 8, 64); + TEST_VREV(q, int, s, 32, 4, 64); + TEST_VREV(q, uint, u, 8, 16, 64); + TEST_VREV(q, uint, u, 16, 8, 64); + TEST_VREV(q, uint, u, 32, 4, 64); + + TEST_VREV(, float, f, 32, 2, 64); + TEST_VREV(q, float, f, 32, 4, 64); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vrhadd.c b/ref_vrhadd.c new file mode 100644 index 0000000..20872a1 --- /dev/null +++ b/ref_vrhadd.c @@ -0,0 +1,31 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vrhadd +#define TEST_MSG "VRHADD/VRHADDQ" + +#define NO_FLOAT_VARIANT + +#include "ref_vmax.c" diff --git a/ref_vrshl.c b/ref_vrshl.c new file mode 100644 index 0000000..ec534c0 --- /dev/null +++ b/ref_vrshl.c @@ -0,0 +1,192 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VRSHL/VRSHLQ" +void exec_vrshl (void) +{ + /* Basic test: v3=vrshl(v1,v2), then store the result. */ +#define TEST_VRSHL(T3, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector_shift, T3, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); + + clean_results (); + + /* Fill input vector with 0, to check behavior on limits */ + TEST_VDUP(vector, , int, s, 8, 8, 0); + TEST_VDUP(vector, , int, s, 16, 4, 0); + TEST_VDUP(vector, , int, s, 32, 2, 0); + TEST_VDUP(vector, , int, s, 64, 1, 0); + TEST_VDUP(vector, , uint, u, 8, 8, 0); + TEST_VDUP(vector, , uint, u, 16, 4, 0); + TEST_VDUP(vector, , uint, u, 32, 2, 0); + TEST_VDUP(vector, , uint, u, 64, 1, 0); + TEST_VDUP(vector, q, int, s, 8, 16, 0); + TEST_VDUP(vector, q, int, s, 16, 8, 0); + TEST_VDUP(vector, q, int, s, 32, 4, 0); + TEST_VDUP(vector, q, int, s, 64, 2, 0); + TEST_VDUP(vector, q, uint, u, 8, 16, 0); + TEST_VDUP(vector, q, uint, u, 16, 8, 0); + TEST_VDUP(vector, q, uint, u, 32, 4, 0); + TEST_VDUP(vector, q, uint, u, 64, 2, 0); + + /* Choose init value arbitrarily, will be used as shift amount */ + /* Use values equal to one-less-than the type width to check + behaviour on limits */ + TEST_VDUP(vector_shift, , int, s, 8, 8, 7); + TEST_VDUP(vector_shift, , int, s, 16, 4, 15); + TEST_VDUP(vector_shift, , int, s, 32, 2, 31); + TEST_VDUP(vector_shift, , int, s, 64, 1, 63); + TEST_VDUP(vector_shift, q, int, s, 8, 16, 7); + TEST_VDUP(vector_shift, q, int, s, 16, 8, 15); + TEST_VDUP(vector_shift, q, int, s, 32, 4, 31); + TEST_VDUP(vector_shift, q, int, s, 64, 2, 63); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (with input = 0)"); + + /* Use negative shift amounts */ + TEST_VDUP(vector_shift, , int, s, 8, 8, -1); + TEST_VDUP(vector_shift, , int, s, 16, 4, -2); + TEST_VDUP(vector_shift, , int, s, 32, 2, -3); + TEST_VDUP(vector_shift, , int, s, 64, 1, -4); + TEST_VDUP(vector_shift, q, int, s, 8, 16, -7); + TEST_VDUP(vector_shift, q, int, s, 16, 8, -11); + TEST_VDUP(vector_shift, q, int, s, 32, 4, -13); + TEST_VDUP(vector_shift, q, int, s, 64, 2, -20); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (input 0 and negative shift amount)"); + + /* Test again, with predefined input values */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Choose init value arbitrarily, will be used as shift amount */ + TEST_VDUP(vector_shift, , int, s, 8, 8, 1); + TEST_VDUP(vector_shift, , int, s, 16, 4, 3); + TEST_VDUP(vector_shift, , int, s, 32, 2, 8); + TEST_VDUP(vector_shift, , int, s, 64, 1, -3); + TEST_VDUP(vector_shift, q, int, s, 8, 16, 10); + TEST_VDUP(vector_shift, q, int, s, 16, 8, 12); + TEST_VDUP(vector_shift, q, int, s, 32, 4, 32); + TEST_VDUP(vector_shift, q, int, s, 64, 2, 63); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex (TEST_MSG); + + + /* Use negative shift amounts */ + TEST_VDUP(vector_shift, , int, s, 8, 8, -1); + TEST_VDUP(vector_shift, , int, s, 16, 4, -2); + TEST_VDUP(vector_shift, , int, s, 32, 2, -3); + TEST_VDUP(vector_shift, , int, s, 64, 1, -4); + TEST_VDUP(vector_shift, q, int, s, 8, 16, -7); + TEST_VDUP(vector_shift, q, int, s, 16, 8, -11); + TEST_VDUP(vector_shift, q, int, s, 32, 4, -13); + TEST_VDUP(vector_shift, q, int, s, 64, 2, -20); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (negative shift amount)"); + + /* Fill input vector with max value, to check behavior on limits */ + TEST_VDUP(vector, , int, s, 8, 8, 0x7F); + TEST_VDUP(vector, , int, s, 16, 4, 0x7FFF); + TEST_VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + TEST_VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, , uint, u, 8, 8, 0xFF); + TEST_VDUP(vector, , uint, u, 16, 4, 0xFFFF); + TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + TEST_VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + TEST_VDUP(vector, q, int, s, 8, 16, 0x7F); + TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF); + TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, q, uint, u, 8, 16, 0xFF); + TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* Use -1 shift amount to check overflow with round_const */ + TEST_VDUP(vector_shift, , int, s, 8, 8, -1); + TEST_VDUP(vector_shift, , int, s, 16, 4, -1); + TEST_VDUP(vector_shift, , int, s, 32, 2, -1); + TEST_VDUP(vector_shift, , int, s, 64, 1, -1); + TEST_VDUP(vector_shift, q, int, s, 8, 16, -1); + TEST_VDUP(vector_shift, q, int, s, 16, 8, -1); + TEST_VDUP(vector_shift, q, int, s, 32, 4, -1); + TEST_VDUP(vector_shift, q, int, s, 64, 2, -1); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (checking round_const overflow: shift by -1)"); + + /* Use -1 shift amount to check overflow with round_const */ + TEST_VDUP(vector_shift, , int, s, 8, 8, -3); + TEST_VDUP(vector_shift, , int, s, 16, 4, -3); + TEST_VDUP(vector_shift, , int, s, 32, 2, -3); + TEST_VDUP(vector_shift, , int, s, 64, 1, -3); + TEST_VDUP(vector_shift, q, int, s, 8, 16, -3); + TEST_VDUP(vector_shift, q, int, s, 16, 8, -3); + TEST_VDUP(vector_shift, q, int, s, 32, 4, -3); + TEST_VDUP(vector_shift, q, int, s, 64, 2, -3); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (checking round_const overflow: shift by -3)"); + + /* Test large shift amount */ + TEST_VDUP(vector_shift, , int, s, 8, 8, 10); + TEST_VDUP(vector_shift, , int, s, 16, 4, 20); + TEST_VDUP(vector_shift, , int, s, 32, 2, 33); + TEST_VDUP(vector_shift, , int, s, 64, 1, 65); + TEST_VDUP(vector_shift, q, int, s, 8, 16, 9); + TEST_VDUP(vector_shift, q, int, s, 16, 8, 16); + TEST_VDUP(vector_shift, q, int, s, 32, 4, 32); + TEST_VDUP(vector_shift, q, int, s, 64, 2, 64); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int); + + dump_results_hex2 (TEST_MSG, " (large shift amount)"); +} diff --git a/ref_vrshr_n.c b/ref_vrshr_n.c new file mode 100644 index 0000000..c68b637 --- /dev/null +++ b/ref_vrshr_n.c @@ -0,0 +1,217 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VRSHR_N" +void exec_vrshr_n (void) +{ + /* Basic test: y=vrshr_n(x,v), then store the result. */ +#define TEST_VRSHR_N(Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrshr##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Choose shift amount arbitrarily */ + TEST_VRSHR_N(, int, s, 8, 8, 1); + TEST_VRSHR_N(, int, s, 16, 4, 12); + TEST_VRSHR_N(, int, s, 32, 2, 2); + TEST_VRSHR_N(, int, s, 64, 1, 32); + TEST_VRSHR_N(, uint, u, 8, 8, 2); + TEST_VRSHR_N(, uint, u, 16, 4, 3); + TEST_VRSHR_N(, uint, u, 32, 2, 5); + TEST_VRSHR_N(, uint, u, 64, 1, 33); + + TEST_VRSHR_N(q, int, s, 8, 16, 1); + TEST_VRSHR_N(q, int, s, 16, 8, 12); + TEST_VRSHR_N(q, int, s, 32, 4, 2); + TEST_VRSHR_N(q, int, s, 64, 2, 32); + TEST_VRSHR_N(q, uint, u, 8, 16, 2); + TEST_VRSHR_N(q, uint, u, 16, 8, 3); + TEST_VRSHR_N(q, uint, u, 32, 4, 5); + TEST_VRSHR_N(q, uint, u, 64, 2, 33); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + /* Another set of tests */ + TEST_VDUP(vector, , int, s, 8, 8, 0x7F); + TEST_VDUP(vector, , int, s, 16, 4, 0x7FFF); + TEST_VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF); + TEST_VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, , uint, u, 8, 8, 0xFF); + TEST_VDUP(vector, , uint, u, 16, 4, 0xFFFF); + TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + TEST_VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + TEST_VDUP(vector, q, int, s, 8, 16, 0x7F); + TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF); + TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF); + TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector, q, uint, u, 8, 16, 0xFF); + TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* Use max shift amount, to exercise saturation code in the lib */ + TEST_VRSHR_N(, int, s, 8, 8, 8); + TEST_VRSHR_N(, int, s, 16, 4, 16); + TEST_VRSHR_N(, int, s, 32, 2, 32); + TEST_VRSHR_N(, int, s, 64, 1, 64); + TEST_VRSHR_N(, uint, u, 8, 8, 8); + TEST_VRSHR_N(, uint, u, 16, 4, 16); + TEST_VRSHR_N(, uint, u, 32, 2, 32); + TEST_VRSHR_N(, uint, u, 64, 1, 64); + TEST_VRSHR_N(q, int, s, 8, 16, 8); + TEST_VRSHR_N(q, int, s, 16, 8, 16); + TEST_VRSHR_N(q, int, s, 32, 4, 32); + TEST_VRSHR_N(q, int, s, 64, 2, 64); + TEST_VRSHR_N(q, uint, u, 8, 16, 8); + TEST_VRSHR_N(q, uint, u, 16, 8, 16); + TEST_VRSHR_N(q, uint, u, 32, 4, 32); + TEST_VRSHR_N(q, uint, u, 64, 2, 64); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (overflow test: max shift amount, positive input)"); + + /* Use 1 as shift amount, to exercise saturation code in the lib */ + TEST_VRSHR_N(, int, s, 8, 8, 1); + TEST_VRSHR_N(, int, s, 16, 4, 1); + TEST_VRSHR_N(, int, s, 32, 2, 1); + TEST_VRSHR_N(, int, s, 64, 1, 1); + TEST_VRSHR_N(, uint, u, 8, 8, 1); + TEST_VRSHR_N(, uint, u, 16, 4, 1); + TEST_VRSHR_N(, uint, u, 32, 2, 1); + TEST_VRSHR_N(, uint, u, 64, 1, 1); + TEST_VRSHR_N(q, int, s, 8, 16, 1); + TEST_VRSHR_N(q, int, s, 16, 8, 1); + TEST_VRSHR_N(q, int, s, 32, 4, 1); + TEST_VRSHR_N(q, int, s, 64, 2, 1); + TEST_VRSHR_N(q, uint, u, 8, 16, 1); + TEST_VRSHR_N(q, uint, u, 16, 8, 1); + TEST_VRSHR_N(q, uint, u, 32, 4, 1); + TEST_VRSHR_N(q, uint, u, 64, 2, 1); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (overflow test: shift by 1, with negative input)"); + + /* Use 3 as shift amount, to exercise saturation code in the lib */ + TEST_VRSHR_N(, int, s, 8, 8, 3); + TEST_VRSHR_N(, int, s, 16, 4, 3); + TEST_VRSHR_N(, int, s, 32, 2, 3); + TEST_VRSHR_N(, int, s, 64, 1, 3); + TEST_VRSHR_N(, uint, u, 8, 8, 3); + TEST_VRSHR_N(, uint, u, 16, 4, 3); + TEST_VRSHR_N(, uint, u, 32, 2, 3); + TEST_VRSHR_N(, uint, u, 64, 1, 3); + TEST_VRSHR_N(q, int, s, 8, 16, 3); + TEST_VRSHR_N(q, int, s, 16, 8, 3); + TEST_VRSHR_N(q, int, s, 32, 4, 3); + TEST_VRSHR_N(q, int, s, 64, 2, 3); + TEST_VRSHR_N(q, uint, u, 8, 16, 3); + TEST_VRSHR_N(q, uint, u, 16, 8, 3); + TEST_VRSHR_N(q, uint, u, 32, 4, 3); + TEST_VRSHR_N(q, uint, u, 64, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (overflow test: shift by 3, positive input)"); + + TEST_VDUP(vector, , int, s, 8, 8, 0x80); + TEST_VDUP(vector, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector, , int, s, 64, 1, 0x8000000000000000LL); + TEST_VDUP(vector, , uint, u, 8, 8, 0xFF); + TEST_VDUP(vector, , uint, u, 16, 4, 0xFFFF); + TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + TEST_VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + TEST_VDUP(vector, q, int, s, 8, 16, 0x80); + TEST_VDUP(vector, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000); + TEST_VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL); + TEST_VDUP(vector, q, uint, u, 8, 16, 0xFF); + TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF); + TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF); + TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + + /* Use 1 as shift amount, to exercise saturation code in the lib */ + TEST_VRSHR_N(, int, s, 8, 8, 1); + TEST_VRSHR_N(, int, s, 16, 4, 1); + TEST_VRSHR_N(, int, s, 32, 2, 1); + TEST_VRSHR_N(, int, s, 64, 1, 1); + TEST_VRSHR_N(, uint, u, 8, 8, 1); + TEST_VRSHR_N(, uint, u, 16, 4, 1); + TEST_VRSHR_N(, uint, u, 32, 2, 1); + TEST_VRSHR_N(, uint, u, 64, 1, 1); + TEST_VRSHR_N(q, int, s, 8, 16, 1); + TEST_VRSHR_N(q, int, s, 16, 8, 1); + TEST_VRSHR_N(q, int, s, 32, 4, 1); + TEST_VRSHR_N(q, int, s, 64, 2, 1); + TEST_VRSHR_N(q, uint, u, 8, 16, 1); + TEST_VRSHR_N(q, uint, u, 16, 8, 1); + TEST_VRSHR_N(q, uint, u, 32, 4, 1); + TEST_VRSHR_N(q, uint, u, 64, 2, 1); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (overflow test: shift by 1, with negative input)"); + + /* Use 3 as shift amount, to exercise saturation code in the lib */ + TEST_VRSHR_N(, int, s, 8, 8, 3); + TEST_VRSHR_N(, int, s, 16, 4, 3); + TEST_VRSHR_N(, int, s, 32, 2, 3); + TEST_VRSHR_N(, int, s, 64, 1, 3); + TEST_VRSHR_N(, uint, u, 8, 8, 3); + TEST_VRSHR_N(, uint, u, 16, 4, 3); + TEST_VRSHR_N(, uint, u, 32, 2, 3); + TEST_VRSHR_N(, uint, u, 64, 1, 3); + TEST_VRSHR_N(q, int, s, 8, 16, 3); + TEST_VRSHR_N(q, int, s, 16, 8, 3); + TEST_VRSHR_N(q, int, s, 32, 4, 3); + TEST_VRSHR_N(q, int, s, 64, 2, 3); + TEST_VRSHR_N(q, uint, u, 8, 16, 3); + TEST_VRSHR_N(q, uint, u, 16, 8, 3); + TEST_VRSHR_N(q, uint, u, 32, 4, 3); + TEST_VRSHR_N(q, uint, u, 64, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (overflow test: shift by 3, with negative input)"); +} diff --git a/ref_vrshrn_n.c b/ref_vrshrn_n.c new file mode 100644 index 0000000..1cb67a0 --- /dev/null +++ b/ref_vrshrn_n.c @@ -0,0 +1,119 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VRSHRN_N" +void exec_vrshrn_n (void) +{ + /* Basic test: v2=vrshrn_n(v1,v), then store the result. */ +#define TEST_VRSHRN_N(T1, T2, W, N, W2, V) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vrshrn_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* vector is twice as large as vector_res */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + /* Fill input vector with 0, to check behavior on limits */ + TEST_VDUP(vector, q, int, s, 16, 8, 0); + TEST_VDUP(vector, q, int, s, 32, 4, 0); + TEST_VDUP(vector, q, int, s, 64, 2, 0); + TEST_VDUP(vector, q, uint, u, 16, 8, 0); + TEST_VDUP(vector, q, uint, u, 32, 4, 0); + TEST_VDUP(vector, q, uint, u, 64, 2, 0); + + /* Choose shift amount arbitrarily */ + TEST_VRSHRN_N(int, s, 16, 8, 8, 1); + TEST_VRSHRN_N(int, s, 32, 4, 16, 1); + TEST_VRSHRN_N(int, s, 64, 2, 32, 2); + TEST_VRSHRN_N(uint, u, 16, 8, 8, 2); + TEST_VRSHRN_N(uint, u, 32, 4, 16, 3); + TEST_VRSHRN_N(uint, u, 64, 2, 32, 3); + + dump_results_hex2 (TEST_MSG, " (with input = 0)"); + + /* Test again, with predefined input values */ + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, int, s, 64, 2); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Choose shift amount arbitrarily */ + TEST_VRSHRN_N(int, s, 16, 8, 8, 1); + TEST_VRSHRN_N(int, s, 32, 4, 16, 1); + TEST_VRSHRN_N(int, s, 64, 2, 32, 2); + TEST_VRSHRN_N(uint, u, 16, 8, 8, 2); + TEST_VRSHRN_N(uint, u, 32, 4, 16, 3); + TEST_VRSHRN_N(uint, u, 64, 2, 32, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + /* Fill input arbitrary values */ + TEST_VDUP(vector, q, int, s, 16, 8, 30); + TEST_VDUP(vector, q, int, s, 32, 4, 0); + TEST_VDUP(vector, q, int, s, 64, 2, 0); + TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFF0); + TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFF0); + TEST_VDUP(vector, q, uint, u, 64, 2, 0); + + /* Choose shift amount arbitrarily */ + TEST_VRSHRN_N(int, s, 16, 8, 8, 7); + TEST_VRSHRN_N(int, s, 32, 4, 16, 14); + TEST_VRSHRN_N(int, s, 64, 2, 32, 31); + TEST_VRSHRN_N(uint, u, 16, 8, 8, 7); + TEST_VRSHRN_N(uint, u, 32, 4, 16, 16); + TEST_VRSHRN_N(uint, u, 64, 2, 32, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex2 (TEST_MSG, " (with large shift amount)"); +} diff --git a/ref_vrsqrte.c b/ref_vrsqrte.c new file mode 100644 index 0000000..f66bfb8 --- /dev/null +++ b/ref_vrsqrte.c @@ -0,0 +1,105 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VRSQRTE/VRSQRTEQ" +void exec_vrsqrte(void) +{ + int i; + + /* Basic test: y=vrsqrte(x), then store the result. */ +#define TEST_VRSQRTE(Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrsqrte##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for 64 bits variants */ + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector_res, uint, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, float, 32, 4); + + clean_results (); + + /* Choose init value arbitrarily */ + TEST_VDUP(vector, , uint, u, 32, 2, 0x12345678); + TEST_VDUP(vector, , float, f, 32, 2, 12.9); + TEST_VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10); + TEST_VDUP(vector, q, float, f, 32, 4, 18.2); + + /* Apply the operator */ + TEST_VRSQRTE(, uint, u, 32, 2); + TEST_VRSQRTE(, float, f, 32, 2); + TEST_VRSQRTE(q, uint, u, 32, 4); + TEST_VRSQRTE(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); + + /* Don't test FP variants with negative inputs: the result depends + on the platform */ + /* Choose init value arbitrarily */ + TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); + TEST_VDUP(vector, q, uint, u, 32, 4, 0x89081234); + + /* Apply the operator */ + TEST_VRSQRTE(, uint, u, 32, 2); + TEST_VRSQRTE(q, uint, u, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " (2)"); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); + + /* Choose init value arbitrarily */ + TEST_VDUP(vector, , uint, u, 32, 2, 0x80000000); + TEST_VDUP(vector, q, uint, u, 32, 4, 0x4ABCDEF0); + + /* Apply the operator */ + TEST_VRSQRTE(, uint, u, 32, 2); + TEST_VRSQRTE(q, uint, u, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG " (3)"); + DUMP(TEST_MSG, uint, 32, 2, PRIx32); + DUMP(TEST_MSG, uint, 32, 4, PRIx32); +} diff --git a/ref_vrsqrts.c b/ref_vrsqrts.c new file mode 100644 index 0000000..a3f3d00 --- /dev/null +++ b/ref_vrsqrts.c @@ -0,0 +1,76 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VRSQRTS/VRSQRTSQ" +void exec_vrsqrts(void) +{ + int i; + + /* Basic test: y=vrsqrts(x), then store the result. */ +#define TEST_VRSQRTS(Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrsqrts##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* No need for integer variants */ + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector2, float, 32, 4); + + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + clean_results (); + + /* Choose init value arbitrarily */ + TEST_VDUP(vector, , float, f, 32, 2, 12.9); + TEST_VDUP(vector, q, float, f, 32, 4, 9.1); + + TEST_VDUP(vector2, , float, f, 32, 2, 9.9); + TEST_VDUP(vector2, q, float, f, 32, 4, 1.9); + + /* Apply the operator */ + TEST_VRSQRTS(, float, f, 32, 2); + TEST_VRSQRTS(q, float, f, 32, 4); + + fprintf (ref_file, "\n%s output:\n", TEST_MSG); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vrsra_n.c b/ref_vrsra_n.c new file mode 100644 index 0000000..f9e8df3 --- /dev/null +++ b/ref_vrsra_n.c @@ -0,0 +1,238 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VRSRA_N" +void exec_vrsra_n (void) +{ + /* Basic test: y=vrsra_n(x,v), then store the result. */ +#define TEST_VRSRA_N(Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vrsra##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Choose arbitrary initialization values */ + TEST_VDUP(vector2, , int, s, 8, 8, 0x11); + TEST_VDUP(vector2, , int, s, 16, 4, 0x22); + TEST_VDUP(vector2, , int, s, 32, 2, 0x33); + TEST_VDUP(vector2, , int, s, 64, 1, 0x44); + TEST_VDUP(vector2, , uint, u, 8, 8, 0x55); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x66); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x77); + TEST_VDUP(vector2, , uint, u, 64, 1, 0x88); + + TEST_VDUP(vector2, q, int, s, 8, 16, 0x11); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x22); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x33); + TEST_VDUP(vector2, q, int, s, 64, 2, 0x44); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0x55); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0x66); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0x77); + TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88); + + /* Choose shift amount arbitrarily */ + TEST_VRSRA_N(, int, s, 8, 8, 1); + TEST_VRSRA_N(, int, s, 16, 4, 12); + TEST_VRSRA_N(, int, s, 32, 2, 2); + TEST_VRSRA_N(, int, s, 64, 1, 32); + TEST_VRSRA_N(, uint, u, 8, 8, 2); + TEST_VRSRA_N(, uint, u, 16, 4, 3); + TEST_VRSRA_N(, uint, u, 32, 2, 5); + TEST_VRSRA_N(, uint, u, 64, 1, 33); + + TEST_VRSRA_N(q, int, s, 8, 16, 1); + TEST_VRSRA_N(q, int, s, 16, 8, 12); + TEST_VRSRA_N(q, int, s, 32, 4, 2); + TEST_VRSRA_N(q, int, s, 64, 2, 32); + TEST_VRSRA_N(q, uint, u, 8, 16, 2); + TEST_VRSRA_N(q, uint, u, 16, 8, 3); + TEST_VRSRA_N(q, uint, u, 32, 4, 5); + TEST_VRSRA_N(q, uint, u, 64, 2, 33); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); + + /* Initialize the accumulator with 0 */ + TEST_VDUP(vector, , int, s, 8, 8, 0); + TEST_VDUP(vector, , int, s, 16, 4, 0); + TEST_VDUP(vector, , int, s, 32, 2, 0); + TEST_VDUP(vector, , int, s, 64, 1, 0); + TEST_VDUP(vector, , uint, u, 8, 8, 0); + TEST_VDUP(vector, , uint, u, 16, 4, 0); + TEST_VDUP(vector, , uint, u, 32, 2, 0); + TEST_VDUP(vector, , uint, u, 64, 1, 0); + TEST_VDUP(vector, q, int, s, 8, 16, 0); + TEST_VDUP(vector, q, int, s, 16, 8, 0); + TEST_VDUP(vector, q, int, s, 32, 4, 0); + TEST_VDUP(vector, q, int, s, 64, 2, 0); + TEST_VDUP(vector, q, uint, u, 8, 16, 0); + TEST_VDUP(vector, q, uint, u, 16, 8, 0); + TEST_VDUP(vector, q, uint, u, 32, 4, 0); + TEST_VDUP(vector, q, uint, u, 64, 2, 0); + + /* Initialize with max values to check overflow */ + TEST_VDUP(vector2, , int, s, 8, 8, 0x7F); + TEST_VDUP(vector2, , int, s, 16, 4, 0x7FFF); + TEST_VDUP(vector2, , int, s, 32, 2, 0x7FFFFFFF); + TEST_VDUP(vector2, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector2, , uint, u, 8, 8, 0xFF); + TEST_VDUP(vector2, , uint, u, 16, 4, 0xFFFF); + TEST_VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFFF); + TEST_VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL); + TEST_VDUP(vector2, q, int, s, 8, 16, 0x7F); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x7FFF); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF); + TEST_VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0xFF); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0xFFFF); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFFF); + TEST_VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + + /* Shift by 1 to check overflow with rounding constant */ + TEST_VRSRA_N(, int, s, 8, 8, 1); + TEST_VRSRA_N(, int, s, 16, 4, 1); + TEST_VRSRA_N(, int, s, 32, 2, 1); + TEST_VRSRA_N(, int, s, 64, 1, 1); + TEST_VRSRA_N(, uint, u, 8, 8, 1); + TEST_VRSRA_N(, uint, u, 16, 4, 1); + TEST_VRSRA_N(, uint, u, 32, 2, 1); + TEST_VRSRA_N(, uint, u, 64, 1, 1); + TEST_VRSRA_N(q, int, s, 8, 16, 1); + TEST_VRSRA_N(q, int, s, 16, 8, 1); + TEST_VRSRA_N(q, int, s, 32, 4, 1); + TEST_VRSRA_N(q, int, s, 64, 2, 1); + TEST_VRSRA_N(q, uint, u, 8, 16, 1); + TEST_VRSRA_N(q, uint, u, 16, 8, 1); + TEST_VRSRA_N(q, uint, u, 32, 4, 1); + TEST_VRSRA_N(q, uint, u, 64, 2, 1); + + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by 1, positive input)"); + + /* Shift by 3 to check overflow with rounding constant */ + TEST_VRSRA_N(, int, s, 8, 8, 3); + TEST_VRSRA_N(, int, s, 16, 4, 3); + TEST_VRSRA_N(, int, s, 32, 2, 3); + TEST_VRSRA_N(, int, s, 64, 1, 3); + TEST_VRSRA_N(, uint, u, 8, 8, 3); + TEST_VRSRA_N(, uint, u, 16, 4, 3); + TEST_VRSRA_N(, uint, u, 32, 2, 3); + TEST_VRSRA_N(, uint, u, 64, 1, 3); + TEST_VRSRA_N(q, int, s, 8, 16, 3); + TEST_VRSRA_N(q, int, s, 16, 8, 3); + TEST_VRSRA_N(q, int, s, 32, 4, 3); + TEST_VRSRA_N(q, int, s, 64, 2, 3); + TEST_VRSRA_N(q, uint, u, 8, 16, 3); + TEST_VRSRA_N(q, uint, u, 16, 8, 3); + TEST_VRSRA_N(q, uint, u, 32, 4, 3); + TEST_VRSRA_N(q, uint, u, 64, 2, 3); + + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by 3, positive input)"); + + /* Shift by max to check overflow with rounding constant */ + TEST_VRSRA_N(, int, s, 8, 8, 8); + TEST_VRSRA_N(, int, s, 16, 4, 16); + TEST_VRSRA_N(, int, s, 32, 2, 32); + TEST_VRSRA_N(, int, s, 64, 1, 64); + TEST_VRSRA_N(, uint, u, 8, 8, 8); + TEST_VRSRA_N(, uint, u, 16, 4, 16); + TEST_VRSRA_N(, uint, u, 32, 2, 32); + TEST_VRSRA_N(, uint, u, 64, 1, 64); + TEST_VRSRA_N(q, int, s, 8, 16, 8); + TEST_VRSRA_N(q, int, s, 16, 8, 16); + TEST_VRSRA_N(q, int, s, 32, 4, 32); + TEST_VRSRA_N(q, int, s, 64, 2, 64); + TEST_VRSRA_N(q, uint, u, 8, 16, 8); + TEST_VRSRA_N(q, uint, u, 16, 8, 16); + TEST_VRSRA_N(q, uint, u, 32, 4, 32); + TEST_VRSRA_N(q, uint, u, 64, 2, 64); + + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by max, positive input)"); + /* Initialize with min values to check overflow */ + TEST_VDUP(vector2, , int, s, 8, 8, 0x80); + TEST_VDUP(vector2, , int, s, 16, 4, 0x8000); + TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000); + TEST_VDUP(vector2, , int, s, 64, 1, 0x8000000000000000LL); + TEST_VDUP(vector2, q, int, s, 8, 16, 0x80); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x8000); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000000); + TEST_VDUP(vector2, q, int, s, 64, 2, 0x8000000000000000ULL); + + /* Shift by 1 to check overflow with rounding constant */ + TEST_VRSRA_N(, int, s, 8, 8, 1); + TEST_VRSRA_N(, int, s, 16, 4, 1); + TEST_VRSRA_N(, int, s, 32, 2, 1); + TEST_VRSRA_N(, int, s, 64, 1, 1); + TEST_VRSRA_N(q, int, s, 8, 16, 1); + TEST_VRSRA_N(q, int, s, 16, 8, 1); + TEST_VRSRA_N(q, int, s, 32, 4, 1); + TEST_VRSRA_N(q, int, s, 64, 2, 1); + + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by 1, negative input)"); + + /* Shift by 3 to check overflow with rounding constant */ + TEST_VRSRA_N(, int, s, 8, 8, 3); + TEST_VRSRA_N(, int, s, 16, 4, 3); + TEST_VRSRA_N(, int, s, 32, 2, 3); + TEST_VRSRA_N(, int, s, 64, 1, 3); + TEST_VRSRA_N(q, int, s, 8, 16, 3); + TEST_VRSRA_N(q, int, s, 16, 8, 3); + TEST_VRSRA_N(q, int, s, 32, 4, 3); + TEST_VRSRA_N(q, int, s, 64, 2, 3); + + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by max, negative input)"); + + /* Shift by max to check overflow with rounding constant */ + TEST_VRSRA_N(, int, s, 8, 8, 8); + TEST_VRSRA_N(, int, s, 16, 4, 16); + TEST_VRSRA_N(, int, s, 32, 2, 32); + TEST_VRSRA_N(, int, s, 64, 1, 64); + TEST_VRSRA_N(q, int, s, 8, 16, 8); + TEST_VRSRA_N(q, int, s, 16, 8, 16); + TEST_VRSRA_N(q, int, s, 32, 4, 32); + TEST_VRSRA_N(q, int, s, 64, 2, 64); + + dump_results_hex2 (TEST_MSG, " (checking overflow: shift by max, negative input)"); +} diff --git a/ref_vrsubhn.c b/ref_vrsubhn.c new file mode 100644 index 0000000..e972d82 --- /dev/null +++ b/ref_vrsubhn.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vrsubhn +#define TEST_MSG "VRSUBHN" + +#include "ref_vaddhn.c" diff --git a/ref_vset_lane.c b/ref_vset_lane.c new file mode 100644 index 0000000..a938469 --- /dev/null +++ b/ref_vset_lane.c @@ -0,0 +1,78 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSET_LANE/VSET_LANEQ" +void exec_vset_lane (void) +{ + /* vec=vset_lane(val, vec, lane), then store the result. */ +#define TEST_VSET_LANE_HERE(Q, T1, T2, W, N, V, L) \ + VECT_VAR(vector, T1, W, N) = \ + vset##Q##_lane_##T2##W(V, \ + VECT_VAR(vector, T1, W, N), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + TEST_VLOAD(vector, buffer, , float, f, 32, 2); + TEST_VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose value and lane arbitrarily */ + TEST_VSET_LANE_HERE(, int, s, 8, 8, 0x11, 7); + TEST_VSET_LANE_HERE(, int, s, 16, 4, 0x22, 3); + TEST_VSET_LANE_HERE(, int, s, 32, 2, 0x33, 1); + TEST_VSET_LANE_HERE(, int, s, 64, 1, 0x44, 0); + TEST_VSET_LANE_HERE(, uint, u, 8, 8, 0x55, 6); + TEST_VSET_LANE_HERE(, uint, u, 16, 4, 0x66, 2); + TEST_VSET_LANE_HERE(, uint, u, 32, 2, 0x77, 1); + TEST_VSET_LANE_HERE(, uint, u, 64, 1, 0x88, 0); + TEST_VSET_LANE_HERE(, float, f, 32, 2, 33.2, 1); + + TEST_VSET_LANE_HERE(q, int, s, 8, 16, 0x99, 15); + TEST_VSET_LANE_HERE(q, int, s, 16, 8, 0xAA, 5); + TEST_VSET_LANE_HERE(q, int, s, 32, 4, 0xBB, 3); + TEST_VSET_LANE_HERE(q, int, s, 64, 2, 0xCC, 1); + TEST_VSET_LANE_HERE(q, uint, u, 8, 16, 0xDD, 14); + TEST_VSET_LANE_HERE(q, uint, u, 16, 8, 0xEE, 6); + TEST_VSET_LANE_HERE(q, uint, u, 32, 4, 0xFF, 2); + TEST_VSET_LANE_HERE(q, uint, u, 64, 2, 0x11, 1); + TEST_VSET_LANE_HERE(q, float, f, 32, 4, 11.2, 3); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vshl.c b/ref_vshl.c new file mode 100644 index 0000000..2af3f80 --- /dev/null +++ b/ref_vshl.c @@ -0,0 +1,98 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSHL/VSHLQ" +void exec_vshl (void) +{ + /* Basic test: v3=vshl(v1,v2), then store the result. */ +#define TEST_VSHL(T3, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + vshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector_shift, T3, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + DECL_VARIABLE_SIGNED_VARIANTS(vector_shift); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Choose init value arbitrarily, will be used as shift amount */ + TEST_VDUP(vector_shift, , int, s, 8, 8, 1); + TEST_VDUP(vector_shift, , int, s, 16, 4, 3); + TEST_VDUP(vector_shift, , int, s, 32, 2, 8); + TEST_VDUP(vector_shift, , int, s, 64, 1, 3); + TEST_VDUP(vector_shift, q, int, s, 8, 16, 5); + TEST_VDUP(vector_shift, q, int, s, 16, 8, 12); + TEST_VDUP(vector_shift, q, int, s, 32, 4, 30); + TEST_VDUP(vector_shift, q, int, s, 64, 2, 63); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int); + + dump_results_hex (TEST_MSG); + + /* Test large shift amount */ + TEST_VDUP(vector_shift, , int, s, 8, 8, 8); + TEST_VDUP(vector_shift, , int, s, 16, 4, 16); + TEST_VDUP(vector_shift, , int, s, 32, 2, 32); + TEST_VDUP(vector_shift, , int, s, 64, 1, 64); + TEST_VDUP(vector_shift, q, int, s, 8, 16, 8); + TEST_VDUP(vector_shift, q, int, s, 16, 8, 17); + TEST_VDUP(vector_shift, q, int, s, 32, 4, 33); + TEST_VDUP(vector_shift, q, int, s, 64, 2, 65); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int); + + dump_results_hex2 (TEST_MSG, " (large shift amount)"); + + + /* Test negative shift amount */ + TEST_VDUP(vector_shift, , int, s, 8, 8, -1); + TEST_VDUP(vector_shift, , int, s, 16, 4, -1); + TEST_VDUP(vector_shift, , int, s, 32, 2, -2); + TEST_VDUP(vector_shift, , int, s, 64, 1, -4); + TEST_VDUP(vector_shift, q, int, s, 8, 16, -2); + TEST_VDUP(vector_shift, q, int, s, 16, 8, -5); + TEST_VDUP(vector_shift, q, int, s, 32, 4, -3); + TEST_VDUP(vector_shift, q, int, s, 64, 2, -5); + + TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int); + + dump_results_hex2 (TEST_MSG, " (negative shift amount)"); +} diff --git a/ref_vshl_n.c b/ref_vshl_n.c new file mode 100644 index 0000000..92f0fa1 --- /dev/null +++ b/ref_vshl_n.c @@ -0,0 +1,75 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSHL_N" +void exec_vshl_n (void) +{ + /* Basic test: v2=vshl_n(v1,v), then store the result. */ +#define TEST_VSHL_N(Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vshl##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Choose shift amount arbitrarily */ + TEST_VSHL_N(, int, s, 8, 8, 1); + TEST_VSHL_N(, int, s, 16, 4, 1); + TEST_VSHL_N(, int, s, 32, 2, 3); + TEST_VSHL_N(, int, s, 64, 1, 2); + TEST_VSHL_N(, uint, u, 8, 8, 2); + TEST_VSHL_N(, uint, u, 16, 4, 4); + TEST_VSHL_N(, uint, u, 32, 2, 3); + TEST_VSHL_N(, uint, u, 64, 1, 1); + + TEST_VSHL_N(q, int, s, 8, 16, 5); + TEST_VSHL_N(q, int, s, 16, 8, 1); + TEST_VSHL_N(q, int, s, 32, 4, 2); + TEST_VSHL_N(q, int, s, 64, 2, 2); + TEST_VSHL_N(q, uint, u, 8, 16, 2); + TEST_VSHL_N(q, uint, u, 16, 8, 3); + TEST_VSHL_N(q, uint, u, 32, 4, 2); + TEST_VSHL_N(q, uint, u, 64, 2, 1); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vshll_n.c b/ref_vshll_n.c new file mode 100644 index 0000000..e942adc --- /dev/null +++ b/ref_vshll_n.c @@ -0,0 +1,64 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSHLL_N" +void exec_vshll_n (void) +{ + /* Basic test: v2=vshll_n(v1,v), then store the result. */ +#define TEST_VSHLL_N(T1, T2, W, W2, N, V) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vshll##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1q##_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Choose shift amount arbitrarily */ + TEST_VSHLL_N(int, s, 8, 16, 8, 1); + TEST_VSHLL_N(int, s, 16, 32, 4, 1); + TEST_VSHLL_N(int, s, 32, 64, 2, 3); + TEST_VSHLL_N(uint, u, 8, 16, 8, 2); + TEST_VSHLL_N(uint, u, 16, 32, 4, 4); + TEST_VSHLL_N(uint, u, 32, 64, 2, 3); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vshr_n.c b/ref_vshr_n.c new file mode 100644 index 0000000..d9e905b --- /dev/null +++ b/ref_vshr_n.c @@ -0,0 +1,76 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSHR_N" +void exec_vshr_n (void) +{ + /* Basic test: y=vshr_n(x,v), then store the result. */ +#define TEST_VSHR_N(Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vshr##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Choose shift amount arbitrarily */ + TEST_VSHR_N(, int, s, 8, 8, 1); + TEST_VSHR_N(, int, s, 16, 4, 12); + TEST_VSHR_N(, int, s, 32, 2, 2); + TEST_VSHR_N(, int, s, 64, 1, 32); + TEST_VSHR_N(, uint, u, 8, 8, 2); + TEST_VSHR_N(, uint, u, 16, 4, 3); + TEST_VSHR_N(, uint, u, 32, 2, 5); + TEST_VSHR_N(, uint, u, 64, 1, 33); + + TEST_VSHR_N(q, int, s, 8, 16, 1); + TEST_VSHR_N(q, int, s, 16, 8, 12); + TEST_VSHR_N(q, int, s, 32, 4, 2); + TEST_VSHR_N(q, int, s, 64, 2, 32); + TEST_VSHR_N(q, uint, u, 8, 16, 2); + TEST_VSHR_N(q, uint, u, 16, 8, 3); + TEST_VSHR_N(q, uint, u, 32, 4, 5); + TEST_VSHR_N(q, uint, u, 64, 2, 33); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vshrn_n.c b/ref_vshrn_n.c new file mode 100644 index 0000000..c520fbf --- /dev/null +++ b/ref_vshrn_n.c @@ -0,0 +1,81 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSHRN_N" +void exec_vshrn_n (void) +{ + /* Basic test: y=vshrn_n(x,v), then store the result. */ +#define TEST_VSHRN_N(T1, T2, W, W2, N, V) \ + VECT_VAR(vector_res, T1, W2, N) = \ + vshrn_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* vector is twice as large as vector_res */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector, uint, 64, 2); + + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 2); + DECL_VARIABLE(vector_res, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 2); + + clean_results (); + + TEST_VLOAD(vector, buffer, q, int, s, 16, 8); + TEST_VLOAD(vector, buffer, q, int, s, 32, 4); + TEST_VLOAD(vector, buffer, q, int, s, 64, 2); + TEST_VLOAD(vector, buffer, q, uint, u, 16, 8); + TEST_VLOAD(vector, buffer, q, uint, u, 32, 4); + TEST_VLOAD(vector, buffer, q, uint, u, 64, 2); + + /* Choose shift amount arbitrarily */ + TEST_VSHRN_N(int, s, 16, 8, 8, 1); + TEST_VSHRN_N(int, s, 32, 16, 4, 1); + TEST_VSHRN_N(int, s, 64, 32, 2, 2); + TEST_VSHRN_N(uint, u, 16, 8, 8, 2); + TEST_VSHRN_N(uint, u, 32, 16, 4, 3); + TEST_VSHRN_N(uint, u, 64, 32, 2, 3); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vsli_n.c b/ref_vsli_n.c new file mode 100644 index 0000000..2666af6 --- /dev/null +++ b/ref_vsli_n.c @@ -0,0 +1,104 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vsli +#define TEST_MSG "VSLI_N" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME ##_n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = vmlx_n(vector, vector2, val), + then store the result. */ +#define TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLX_N(INSN, Q, T1, T2, W, N, V) \ + TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Fill input vector2 with arbitrary values */ + TEST_VDUP(vector2, , int, s, 8, 8, 2); + TEST_VDUP(vector2, , int, s, 16, 4, -4); + TEST_VDUP(vector2, , int, s, 32, 2, 3); + TEST_VDUP(vector2, , int, s, 64, 1, 100); + TEST_VDUP(vector2, , uint, u, 8, 8, 20); + TEST_VDUP(vector2, , uint, u, 16, 4, 30); + TEST_VDUP(vector2, , uint, u, 32, 2, 40); + TEST_VDUP(vector2, , uint, u, 64, 1, 2); + TEST_VDUP(vector2, q, int, s, 8, 16, -10); + TEST_VDUP(vector2, q, int, s, 16, 8, -20); + TEST_VDUP(vector2, q, int, s, 32, 4, -30); + TEST_VDUP(vector2, q, int, s, 64, 2, 24); + TEST_VDUP(vector2, q, uint, u, 8, 16, 12); + TEST_VDUP(vector2, q, uint, u, 16, 8, 3); + TEST_VDUP(vector2, q, uint, u, 32, 4, 55); + TEST_VDUP(vector2, q, uint, u, 64, 2, 3); + + /* Choose shift amount arbitrarily */ + TEST_VMLX_N(INSN_NAME, , int, s, 8, 8, 4); + TEST_VMLX_N(INSN_NAME, , int, s, 16, 4, 3); + TEST_VMLX_N(INSN_NAME, , int, s, 32, 2, 1); + TEST_VMLX_N(INSN_NAME, , int, s, 64, 1, 32); + TEST_VMLX_N(INSN_NAME, , uint, u, 8, 8, 2); + TEST_VMLX_N(INSN_NAME, , uint, u, 16, 4, 10); + TEST_VMLX_N(INSN_NAME, , uint, u, 32, 2, 30); + TEST_VMLX_N(INSN_NAME, , uint, u, 64, 1, 3); + TEST_VMLX_N(INSN_NAME, q, int, s, 8, 16, 5); + TEST_VMLX_N(INSN_NAME, q, int, s, 16, 8, 3); + TEST_VMLX_N(INSN_NAME, q, int, s, 32, 4, 20); + TEST_VMLX_N(INSN_NAME, q, int, s, 64, 2, 16); + TEST_VMLX_N(INSN_NAME, q, uint, u, 8, 16, 3); + TEST_VMLX_N(INSN_NAME, q, uint, u, 16, 8, 12); + TEST_VMLX_N(INSN_NAME, q, uint, u, 32, 4, 23); + TEST_VMLX_N(INSN_NAME, q, uint, u, 64, 2, 53); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vsra_n.c b/ref_vsra_n.c new file mode 100644 index 0000000..aa9bdcc --- /dev/null +++ b/ref_vsra_n.c @@ -0,0 +1,97 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VSRA_N" +void exec_vsra_n (void) +{ + /* Basic test: y=vsra_n(x,v), then store the result. */ +#define TEST_VSRA_N(Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + vsra##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Choose arbitrary initialization values */ + TEST_VDUP(vector2, , int, s, 8, 8, 0x11); + TEST_VDUP(vector2, , int, s, 16, 4, 0x22); + TEST_VDUP(vector2, , int, s, 32, 2, 0x33); + TEST_VDUP(vector2, , int, s, 64, 1, 0x44); + TEST_VDUP(vector2, , uint, u, 8, 8, 0x55); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x66); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x77); + TEST_VDUP(vector2, , uint, u, 64, 1, 0x88); + + TEST_VDUP(vector2, q, int, s, 8, 16, 0x11); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x22); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x33); + TEST_VDUP(vector2, q, int, s, 64, 2, 0x44); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0x55); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0x66); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0x77); + TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88); + + /* Choose shift amount arbitrarily */ + TEST_VSRA_N(, int, s, 8, 8, 1); + TEST_VSRA_N(, int, s, 16, 4, 12); + TEST_VSRA_N(, int, s, 32, 2, 2); + TEST_VSRA_N(, int, s, 64, 1, 32); + TEST_VSRA_N(, uint, u, 8, 8, 2); + TEST_VSRA_N(, uint, u, 16, 4, 3); + TEST_VSRA_N(, uint, u, 32, 2, 5); + TEST_VSRA_N(, uint, u, 64, 1, 33); + + TEST_VSRA_N(q, int, s, 8, 16, 1); + TEST_VSRA_N(q, int, s, 16, 8, 12); + TEST_VSRA_N(q, int, s, 32, 4, 2); + TEST_VSRA_N(q, int, s, 64, 2, 32); + TEST_VSRA_N(q, uint, u, 8, 16, 2); + TEST_VSRA_N(q, uint, u, 16, 8, 3); + TEST_VSRA_N(q, uint, u, 32, 4, 5); + TEST_VSRA_N(q, uint, u, 64, 2, 33); + + /* FIXME: only a few result buffers are used, but we output all of them */ + dump_results_hex (TEST_MSG); +} diff --git a/ref_vsri_n.c b/ref_vsri_n.c new file mode 100644 index 0000000..a4e916a --- /dev/null +++ b/ref_vsri_n.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vsri +#define TEST_MSG "VSRI_N" + +#include "ref_vsli_n.c" diff --git a/ref_vst1_lane.c b/ref_vst1_lane.c new file mode 100644 index 0000000..2c14c1c --- /dev/null +++ b/ref_vst1_lane.c @@ -0,0 +1,71 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#define TEST_MSG "VST1_LANE/VST1_LANEQ" +void exec_vst1_lane (void) +{ +#define TEST_VST1_LANE(Q, T1, T2, W, N, L) \ + VECT_VAR(vector, T1, W, N) = \ + vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \ + vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector, T1, W, N), L) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + + clean_results (); + + /* Choose lane arbitrarily */ + TEST_VST1_LANE(, int, s, 8, 8, 7); + TEST_VST1_LANE(, int, s, 16, 4, 3); + TEST_VST1_LANE(, int, s, 32, 2, 1); + TEST_VST1_LANE(, int, s, 64, 1, 0); + TEST_VST1_LANE(, uint, u, 8, 8, 6); + TEST_VST1_LANE(, uint, u, 16, 4, 2); + TEST_VST1_LANE(, uint, u, 32, 2, 0); + TEST_VST1_LANE(, uint, u, 64, 1, 0); + TEST_VST1_LANE(, float, f, 32, 2, 1); + + TEST_VST1_LANE(q, int, s, 8, 16, 15); + TEST_VST1_LANE(q, int, s, 16, 8, 5); + TEST_VST1_LANE(q, int, s, 32, 4, 1); + TEST_VST1_LANE(q, int, s, 64, 2, 1); + TEST_VST1_LANE(q, uint, u, 8, 16, 10); + TEST_VST1_LANE(q, uint, u, 16, 8, 4); + TEST_VST1_LANE(q, uint, u, 32, 4, 3); + TEST_VST1_LANE(q, uint, u, 64, 2, 0); + TEST_VST1_LANE(q, float, f, 32, 4, 1); + + dump_results_hex (TEST_MSG); +} diff --git a/ref_vstX_lane.c b/ref_vstX_lane.c new file mode 100644 index 0000000..b12fa80 --- /dev/null +++ b/ref_vstX_lane.c @@ -0,0 +1,176 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vstX_lane (void) +{ + /* In this case, input variables are arrays of vectors */ +#define DECL_VSTX_LANE(T1, W, N, X) \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \ + VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] + + /* We need to use a temporary result buffer (result_bis), because + the one used for other tests is not large enough. A subset of the + result data is moved from result_bis to result, and it is this + subset which is used to check the actual behaviour. The next + macro enables to move another chunk of data from result_bis to + result. */ +#define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L) \ + memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \ + sizeof(VECT_VAR(buffer_src, T1, W, N))); \ + memset (VECT_VAR(result_bis_##X, T1, W, N), 0, \ + sizeof(VECT_VAR(result_bis_##X, T1, W, N))); \ + \ + VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \ + vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \ + \ + VECT_ARRAY_VAR(vector, T1, W, N, X) = \ + vld##X##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N), \ + VECT_ARRAY_VAR(vector_src, T1, W, N, X), \ + L); \ + vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ + VECT_ARRAY_VAR(vector, T1, W, N, X), \ + L); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* Overwrite "result" with the contents of "result_bis"[Y] */ +#define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \ + memcpy(VECT_VAR(result, T1, W, N), \ + &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* We need all variants in 64 bits, but there is no 64x2 variant */ +#define DECL_ALL_VSTX_LANE(X) \ + DECL_VSTX_LANE(int, 8, 8, X); \ + DECL_VSTX_LANE(int, 16, 4, X); \ + DECL_VSTX_LANE(int, 32, 2, X); \ + DECL_VSTX_LANE(uint, 8, 8, X); \ + DECL_VSTX_LANE(uint, 16, 4, X); \ + DECL_VSTX_LANE(uint, 32, 2, X); \ + DECL_VSTX_LANE(float, 32, 2, X); \ + DECL_VSTX_LANE(int, 16, 8, X); \ + DECL_VSTX_LANE(int, 32, 4, X); \ + DECL_VSTX_LANE(uint, 16, 8, X); \ + DECL_VSTX_LANE(uint, 32, 4, X); \ + DECL_VSTX_LANE(float, 32, 4, X) + +#define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L] + + /* Use the same lanes regardless of the size of the array (X), for + simplicity */ +#define TEST_ALL_VSTX_LANE(X) \ + TEST_VSTX_LANE(, int, s, 8, 8, X, 7); \ + TEST_VSTX_LANE(, int, s, 16, 4, X, 2); \ + TEST_VSTX_LANE(, int, s, 32, 2, X, 0); \ + TEST_VSTX_LANE(, float, f, 32, 2, X, 0); \ + TEST_VSTX_LANE(, uint, u, 8, 8, X, 4); \ + TEST_VSTX_LANE(, uint, u, 16, 4, X, 3); \ + TEST_VSTX_LANE(, uint, u, 32, 2, X, 1); \ + TEST_VSTX_LANE(q, int, s, 16, 8, X, 6); \ + TEST_VSTX_LANE(q, int, s, 32, 4, X, 2); \ + TEST_VSTX_LANE(q, uint, u, 16, 8, X, 5); \ + TEST_VSTX_LANE(q, uint, u, 32, 4, X, 0); \ + TEST_VSTX_LANE(q, float, f, 32, 4, X, 2) + +#define TEST_ALL_EXTRA_CHUNKS(X, Y) \ + TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ + TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \ + TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \ + TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \ + TEST_EXTRA_CHUNK(float, 32, 4, X, Y) + + /* Declare the temporary buffers / variables */ + DECL_ALL_VSTX_LANE(2); + DECL_ALL_VSTX_LANE(3); + DECL_ALL_VSTX_LANE(4); + + /* Define dummy input arrays, large enough for x4 vectors */ + DUMMY_ARRAY(buffer_src, int, 8, 8, 4); + DUMMY_ARRAY(buffer_src, int, 16, 4, 4); + DUMMY_ARRAY(buffer_src, int, 32, 2, 4); + DUMMY_ARRAY(buffer_src, uint, 8, 8, 4); + DUMMY_ARRAY(buffer_src, uint, 16, 4, 4); + DUMMY_ARRAY(buffer_src, uint, 32, 2, 4); + DUMMY_ARRAY(buffer_src, float, 32, 2, 4); + DUMMY_ARRAY(buffer_src, int, 16, 8, 4); + DUMMY_ARRAY(buffer_src, int, 32, 4, 4); + DUMMY_ARRAY(buffer_src, uint, 16, 8, 4); + DUMMY_ARRAY(buffer_src, uint, 32, 4, 4); + DUMMY_ARRAY(buffer_src, float, 32, 4, 4); + + /* Check vst2_lane/vst2q_lane */ + clean_results (); +#define TEST_MSG "VST2_LANE/VST2Q_LANE" + TEST_ALL_VSTX_LANE(2); + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(2, 1); + dump_results_hex2 (TEST_MSG, " chunk 1"); + + /* Check vst3_lane/vst3q_lane */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VST3_LANE/VST3Q_LANE" + TEST_ALL_VSTX_LANE(3); + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(3, 1); + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(3, 2); + dump_results_hex2 (TEST_MSG, " chunk 2"); + + /* Check vst4_lane/vst4q_lane */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VST4_LANE/VST4Q_LANE" + TEST_ALL_VSTX_LANE(4); + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(4, 1); + dump_results_hex2 (TEST_MSG, " chunk 1"); + TEST_ALL_EXTRA_CHUNKS(4, 2); + dump_results_hex2 (TEST_MSG, " chunk 2"); + TEST_ALL_EXTRA_CHUNKS(4, 3); + dump_results_hex2 (TEST_MSG, " chunk 3"); +} diff --git a/ref_vsub.c b/ref_vsub.c new file mode 100644 index 0000000..1f81cd0 --- /dev/null +++ b/ref_vsub.c @@ -0,0 +1,60 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vsub +#define TEST_MSG "VSUB/VSUBQ" + +/* Extra tests for functions requiring floating-point types */ +void exec_vsub_f32(void); +#define EXTRA_TESTS exec_vsub_f32 + +#include "ref_v_binary_op.c" + +void exec_vsub_f32(void) +{ + int i; + + DECL_VARIABLE(vector, float, 32, 2); + DECL_VARIABLE(vector, float, 32, 4); + + DECL_VARIABLE(vector2, float, 32, 2); + DECL_VARIABLE(vector2, float, 32, 4); + + DECL_VARIABLE(vector_res, float, 32, 2); + DECL_VARIABLE(vector_res, float, 32, 4); + + TEST_VDUP(vector, , float, f, 32, 2, 2.3); + TEST_VDUP(vector, q, float, f, 32, 4, 3.4); + + TEST_VDUP(vector2, , float, f, 32, 2, 4.5); + TEST_VDUP(vector2, q, float, f, 32, 4, 5.6); + + TEST_BINARY_OP(INSN_NAME, , float, f, 32, 2); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + + fprintf(ref_file, "\nfloat32:\n"); + DUMP_FP(TEST_MSG, float, 32, 2, PRIx32); + DUMP_FP(TEST_MSG, float, 32, 4, PRIx32); +} diff --git a/ref_vsubhn.c b/ref_vsubhn.c new file mode 100644 index 0000000..ef68d60 --- /dev/null +++ b/ref_vsubhn.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vsubhn +#define TEST_MSG "VSUBHN" + +#include "ref_vaddhn.c" diff --git a/ref_vsubl.c b/ref_vsubl.c new file mode 100644 index 0000000..093ab53 --- /dev/null +++ b/ref_vsubl.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vsubl +#define TEST_MSG "VSUBL" + +#include "ref_vaddl.c" diff --git a/ref_vsubw.c b/ref_vsubw.c new file mode 100644 index 0000000..1df07d1 --- /dev/null +++ b/ref_vsubw.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vsubw +#define TEST_MSG "VSUBW" + +#include "ref_vaddw.c" diff --git a/ref_vtbX.c b/ref_vtbX.c new file mode 100644 index 0000000..26f0b3d --- /dev/null +++ b/ref_vtbX.c @@ -0,0 +1,213 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +void exec_vtbX (void) +{ + int i; + + /* In this case, input variables are arrays of vectors */ +#define DECL_VTBX(T1, W, N, X) \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(table_vector, T1, W, N, X) + + /* The vtbl1 variant is different from vtbl{2,3,4} because it takes a + vector as 1st param, instead of an array of vectors */ +#define TEST_VTBL1(T1, T2, W, N) \ + VECT_VAR(table_vector, T1, W, N) = \ + vld1##_##T2##W((T1##W##_t *)lookup_table); \ + \ + VECT_VAR(vector_res, T1, W, N) = \ + vtbl1_##T2##W(VECT_VAR(table_vector, T1, W, N), \ + VECT_VAR(vector, T1, W, N)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); + +#define TEST_VTBLX(T1, T2, W, N, X) \ + VECT_ARRAY_VAR(table_vector, T1, W, N, X) = \ + vld##X##_##T2##W((T1##W##_t *)lookup_table); \ + \ + VECT_VAR(vector_res, T1, W, N) = \ + vtbl##X##_##T2##W(VECT_ARRAY_VAR(table_vector, T1, W, N, X), \ + VECT_VAR(vector, T1, W, N)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + + /* We need to define a lookup table */ + uint8_t lookup_table[32]; + + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector_res, int, 8, 8); + DECL_VARIABLE(vector_res, uint, 8, 8); + + /* For vtbl1 */ + DECL_VARIABLE(table_vector, int, 8, 8); + DECL_VARIABLE(table_vector, uint, 8, 8); + + /* For vtbx* */ + DECL_VARIABLE(default_vector, int, 8, 8); + DECL_VARIABLE(default_vector, uint, 8, 8); + + /* We need only 8 bits variants */ +#define DECL_ALL_VTBLX(X) \ + DECL_VTBX(int, 8, 8, X); \ + DECL_VTBX(uint, 8, 8, X) + +#define TEST_ALL_VTBL1() \ + TEST_VTBL1(int, s, 8, 8); \ + TEST_VTBL1(uint, u, 8, 8) + +#define TEST_ALL_VTBLX(X) \ + TEST_VTBLX(int, s, 8, 8, X); \ + TEST_VTBLX(uint, u, 8, 8, X) + + /* Declare the temporary buffers / variables */ + DECL_ALL_VTBLX(2); + DECL_ALL_VTBLX(3); + DECL_ALL_VTBLX(4); + + /* Fill the lookup table */ + for (i=0; i<32; i++) { + lookup_table[i] = i-15; + } + + /* Choose init value arbitrarily, will be used as table index */ + TEST_VDUP(vector, , int, s, 8, 8, 1); + TEST_VDUP(vector, , uint, u, 8, 8, 2); + + /* To ensure code coverage of lib, add some indexes larger than 8,16 and 32 */ + /* except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9) */ + TEST_VSET_LANE(vector, , int, s, 8, 8, 0, 10); + TEST_VSET_LANE(vector, , int, s, 8, 8, 4, 20); + TEST_VSET_LANE(vector, , int, s, 8, 8, 5, 40); + TEST_VSET_LANE(vector, , uint, u, 8, 8, 0, 10); + TEST_VSET_LANE(vector, , uint, u, 8, 8, 4, 20); + TEST_VSET_LANE(vector, , uint, u, 8, 8, 5, 40); + + + /* Check vtbl1 */ + clean_results (); +#define TEST_MSG "VTBL1" + TEST_ALL_VTBL1(); + dump_results_hex (TEST_MSG); + + /* Check vtbl2 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBL2" + TEST_ALL_VTBLX(2); + dump_results_hex (TEST_MSG); + + /* Check vtbl3 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBL3" + TEST_ALL_VTBLX(3); + dump_results_hex (TEST_MSG); + + /* Check vtbl4 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBL4" + TEST_ALL_VTBLX(4); + dump_results_hex (TEST_MSG); + + + /* Now test VTBX */ + + /* The vtbx1 variant is different from vtbx{2,3,4} because it takes a + vector as 1st param, instead of an array of vectors */ +#define TEST_VTBX1(T1, T2, W, N) \ + VECT_VAR(table_vector, T1, W, N) = \ + vld1##_##T2##W((T1##W##_t *)lookup_table); \ + \ + VECT_VAR(vector_res, T1, W, N) = \ + vtbx1_##T2##W(VECT_VAR(default_vector, T1, W, N), \ + VECT_VAR(table_vector, T1, W, N), \ + VECT_VAR(vector, T1, W, N)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); + +#define TEST_VTBXX(T1, T2, W, N, X) \ + VECT_ARRAY_VAR(table_vector, T1, W, N, X) = \ + vld##X##_##T2##W((T1##W##_t *)lookup_table); \ + \ + VECT_VAR(vector_res, T1, W, N) = \ + vtbx##X##_##T2##W(VECT_VAR(default_vector, T1, W, N), \ + VECT_ARRAY_VAR(table_vector, T1, W, N, X), \ + VECT_VAR(vector, T1, W, N)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); + +#define TEST_ALL_VTBX1() \ + TEST_VTBX1(int, s, 8, 8); \ + TEST_VTBX1(uint, u, 8, 8) + +#define TEST_ALL_VTBXX(X) \ + TEST_VTBXX(int, s, 8, 8, X); \ + TEST_VTBXX(uint, u, 8, 8, X) + + /* Choose init value arbitrarily, will be used as default value */ + TEST_VDUP(default_vector, , int, s, 8, 8, 0x33); + TEST_VDUP(default_vector, , uint, u, 8, 8, 0xCC); + + /* Check vtbx1 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBX1" + TEST_ALL_VTBX1(); + dump_results_hex (TEST_MSG); + + /* Check vtbx2 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBX2" + TEST_ALL_VTBXX(2); + dump_results_hex (TEST_MSG); + + /* Check vtbx3 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBX3" + TEST_ALL_VTBXX(3); + dump_results_hex (TEST_MSG); + + /* Check vtbx4 */ + clean_results (); +#undef TEST_MSG +#define TEST_MSG "VTBX4" + TEST_ALL_VTBXX(4); + dump_results_hex (TEST_MSG); +} diff --git a/ref_vtrn.c b/ref_vtrn.c new file mode 100644 index 0000000..4ac3e84 --- /dev/null +++ b/ref_vtrn.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vtrn +#define TEST_MSG "VTRN/VTRNQ" + +#include "ref_vuzp.c" diff --git a/ref_vtst.c b/ref_vtst.c new file mode 100644 index 0000000..2aaeaae --- /dev/null +++ b/ref_vtst.c @@ -0,0 +1,99 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vtst +#define TEST_MSG "VTST/VTSTQ" +#endif + +/* Can't use the standard ref_v_binary_op.c template because vtst has + no 64 bits variant, and outputs are always of uint type */ +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* Basic test: y=OP(x,x), then store the result. */ +#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, uint, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_u##W(VECT_VAR(result, uint, W, N), \ + VECT_VAR(vector_res, uint, W, N)) + +#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_UNSIGNED_VARIANTS(vector_res); + + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Choose init value arbitrarily, will be used as comparison value */ + TEST_VDUP(vector2, , int, s, 8, 8, 15); + TEST_VDUP(vector2, , int, s, 16, 4, 5); + TEST_VDUP(vector2, , int, s, 32, 2, 1); + TEST_VDUP(vector2, , uint, u, 8, 8, 15); + TEST_VDUP(vector2, , uint, u, 16, 4, 5); + TEST_VDUP(vector2, , uint, u, 32, 2, 1); + TEST_VDUP(vector2, q, int, s, 8, 16, 15); + TEST_VDUP(vector2, q, int, s, 16, 8, 5); + TEST_VDUP(vector2, q, int, s, 32, 4, 1); + TEST_VDUP(vector2, q, uint, u, 8, 16, 15); + TEST_VDUP(vector2, q, uint, u, 16, 8, 5); + TEST_VDUP(vector2, q, uint, u, 32, 4, 1); + +#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR, T1, T2) \ + MACRO(VAR, , T1, T2, 8, 8); \ + MACRO(VAR, , T1, T2, 16, 4); \ + MACRO(VAR, , T1, T2, 32, 2); \ + MACRO(VAR, q, T1, T2, 8, 16); \ + MACRO(VAR, q, T1, T2, 16, 8); \ + MACRO(VAR, q, T1, T2, 32, 4) + + /* Split the test, as both signed and unsigned variants output their + result in an unsigned form (thus the same output variable is used + in these tests) */ + TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME, int, s); + dump_results_hex2 (TEST_MSG, " (signed input)"); + + TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME, uint, u); + dump_results_hex2 (TEST_MSG, " (unsigned input)"); +} diff --git a/ref_vuzp.c b/ref_vuzp.c new file mode 100644 index 0000000..aa5854d --- /dev/null +++ b/ref_vuzp.c @@ -0,0 +1,155 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vuzp +#define TEST_MSG "VUZP/VUZPQ" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* In this case, output variables are arrays of vectors */ +#define DECL_VUZP(T1, W, N) \ + VECT_ARRAY_TYPE(T1, W, N, 2) VECT_ARRAY_VAR(result_vec, T1, W, N, 2); \ + VECT_VAR_DECL(result_bis, T1, W, N)[2 * N] + + /* We need to use a temporary result buffer (result_bis), because + the one used for other tests is not large enough. A subset of the + result data is moved from result_bis to result, and it is this + subset which is used to check the actual behaviour. The next + macro enables to move another chunk of data from result_bis to + result. */ +#define TEST_VUZP(INSN, Q, T1, T2, W, N) \ + VECT_ARRAY_VAR(result_vec, T1, W, N, 2) = \ + INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst2##Q##_##T2##W(VECT_VAR(result_bis, T1, W, N), \ + VECT_ARRAY_VAR(result_vec, T1, W, N, 2)); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis, T1, W, N), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* Overwrite "result" with the contents of "result_bis"[X] */ +#define TEST_EXTRA_CHUNK(T1, W, N, X) \ + memcpy(VECT_VAR(result, T1, W, N), &(VECT_VAR(result_bis, T1, W, N)[X]), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector1); + DECL_VARIABLE_ALL_VARIANTS(vector2); + + /* We don't need 64 bits variants */ +#define DECL_ALL_VUZP() \ + DECL_VUZP(int, 8, 8); \ + DECL_VUZP(int, 16, 4); \ + DECL_VUZP(int, 32, 2); \ + DECL_VUZP(uint, 8, 8); \ + DECL_VUZP(uint, 16, 4); \ + DECL_VUZP(uint, 32, 2); \ + DECL_VUZP(float, 32, 2); \ + DECL_VUZP(int, 8, 16); \ + DECL_VUZP(int, 16, 8); \ + DECL_VUZP(int, 32, 4); \ + DECL_VUZP(uint, 8, 16); \ + DECL_VUZP(uint, 16, 8); \ + DECL_VUZP(uint, 32, 4); \ + DECL_VUZP(float, 32, 4) + + DECL_ALL_VUZP(); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector1, buffer); + TEST_VLOAD(vector1, buffer, , float, f, 32, 2); + TEST_VLOAD(vector1, buffer, q, float, f, 32, 4); + + /* Choose arbitrary initialization values */ + TEST_VDUP(vector2, , int, s, 8, 8, 0x11); + TEST_VDUP(vector2, , int, s, 16, 4, 0x22); + TEST_VDUP(vector2, , int, s, 32, 2, 0x33); + TEST_VDUP(vector2, , uint, u, 8, 8, 0x55); + TEST_VDUP(vector2, , uint, u, 16, 4, 0x66); + TEST_VDUP(vector2, , uint, u, 32, 2, 0x77); + TEST_VDUP(vector2, , float, f, 32, 2, 33.6); + + TEST_VDUP(vector2, q, int, s, 8, 16, 0x11); + TEST_VDUP(vector2, q, int, s, 16, 8, 0x22); + TEST_VDUP(vector2, q, int, s, 32, 4, 0x33); + TEST_VDUP(vector2, q, uint, u, 8, 16, 0x55); + TEST_VDUP(vector2, q, uint, u, 16, 8, 0x66); + TEST_VDUP(vector2, q, uint, u, 32, 4, 0x77); + TEST_VDUP(vector2, q, float, f, 32, 4, 33.8); + +#define TEST_ALL_VUZP(INSN) \ + TEST_VUZP(INSN, , int, s, 8, 8); \ + TEST_VUZP(INSN, , int, s, 16, 4); \ + TEST_VUZP(INSN, , int, s, 32, 2); \ + TEST_VUZP(INSN, , uint, u, 8, 8); \ + TEST_VUZP(INSN, , uint, u, 16, 4); \ + TEST_VUZP(INSN, , uint, u, 32, 2); \ + TEST_VUZP(INSN, , float, f, 32, 2); \ + TEST_VUZP(INSN, q, int, s, 8, 16); \ + TEST_VUZP(INSN, q, int, s, 16, 8); \ + TEST_VUZP(INSN, q, int, s, 32, 4); \ + TEST_VUZP(INSN, q, uint, u, 8, 16); \ + TEST_VUZP(INSN, q, uint, u, 16, 8); \ + TEST_VUZP(INSN, q, uint, u, 32, 4); \ + TEST_VUZP(INSN, q, float, f, 32, 4) + +#define TEST_ALL_EXTRA_CHUNKS() \ + TEST_EXTRA_CHUNK(int, 8, 8, 1); \ + TEST_EXTRA_CHUNK(int, 16, 4, 1); \ + TEST_EXTRA_CHUNK(int, 32, 2, 1); \ + TEST_EXTRA_CHUNK(uint, 8, 8, 1); \ + TEST_EXTRA_CHUNK(uint, 16, 4, 1); \ + TEST_EXTRA_CHUNK(uint, 32, 2, 1); \ + TEST_EXTRA_CHUNK(float, 32, 2, 1); \ + TEST_EXTRA_CHUNK(int, 8, 16, 1); \ + TEST_EXTRA_CHUNK(int, 16, 8, 1); \ + TEST_EXTRA_CHUNK(int, 32, 4, 1); \ + TEST_EXTRA_CHUNK(uint, 8, 16, 1); \ + TEST_EXTRA_CHUNK(uint, 16, 8, 1); \ + TEST_EXTRA_CHUNK(uint, 32, 4, 1); \ + TEST_EXTRA_CHUNK(float, 32, 4, 1) + + /* Check vuzp/vuzpq */ + clean_results (); + TEST_ALL_VUZP(INSN_NAME); + + dump_results_hex2 (TEST_MSG, " chunk 0"); + + TEST_ALL_EXTRA_CHUNKS(); + dump_results_hex2 (TEST_MSG, " chunk 1"); +} diff --git a/ref_vzip.c b/ref_vzip.c new file mode 100644 index 0000000..8c0b363 --- /dev/null +++ b/ref_vzip.c @@ -0,0 +1,29 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#define INSN_NAME vzip +#define TEST_MSG "VZIP/VZIPQ" + +#include "ref_vuzp.c" diff --git a/retarget.c b/retarget.c new file mode 100644 index 0000000..7fb7fdf --- /dev/null +++ b/retarget.c @@ -0,0 +1,42 @@ +/* +** Copyright (C) ARM Limited, 2005. All rights reserved. +*/ + +#include <stdio.h> +#include <rt_misc.h> + +// Heap base from scatter file +extern int Image$$HEAP$$ZI$$Base; +//#pragma import(__use_two_region_memory) + +extern void core_init(void); + +/* +The functions below are patched onto main. +*/ + +extern void $Super$$main(void); + +void $Sub$$main(void) +{ + core_init(); // does some extra setup work + + $Super$$main(); // calls the original function +} + + +/* +This function re-implements the C Library semihosted function. The stack pointer +has aready been set and is passed back to the function, The base of the heap is +set from the scatter file +*/ +__value_in_regs struct __initial_stackheap __user_initial_stackheap( + unsigned R0, unsigned SP, unsigned R2, unsigned SL) +{ + struct __initial_stackheap config; + + config.heap_base = (unsigned int)&Image$$HEAP$$ZI$$Base; // placed by scatterfile + config.stack_base = SP; // inherit SP from the execution environment + + return config; +} diff --git a/scatter.scat b/scatter.scat new file mode 100644 index 0000000..1f03141 --- /dev/null +++ b/scatter.scat @@ -0,0 +1,29 @@ +;; Copyright ARM Ltd 2005. All rights reserved. + +ROM_LOAD 0x2000 +{ + + ROM_EXEC 0x2000 + { + init.o (CortexA8, +First) ; Create Translation Table + * (InRoot$$Sections) ; this section must be in a root region + } + + I-TCM 0x30000 FIXED ; 0x1E000 ; built at 0x100 to avoid vector space + { ; assumes 32K I-TCM + + * (+RO) ; any remaining code inc C lib. + } + + D-TCM 0x200000 0x40000 ; 8 Kb of D-TCM used for RW/ZI + { + * (+RW,+ZI) + } + + HEAP 0x4E0000 EMPTY 0x100000 {} ; 8Kb Heap follows direcly after RW/ZI + + STACK 0x300000 EMPTY -0x8000 {} ; 32KB Stack, starts after DTCM block. + + TTB 0x20000 EMPTY 0x4000 {} ; place translation table at 0x28000, 16Kb required + +} diff --git a/stm-arm-neon-ref.h b/stm-arm-neon-ref.h new file mode 100644 index 0000000..ae24127 --- /dev/null +++ b/stm-arm-neon-ref.h @@ -0,0 +1,438 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifndef _STM_ARM_NEON_REF_H_ +#define _STM_ARM_NEON_REF_H_ + +#include <stdio.h> +#include <inttypes.h> +#include <string.h> + +#define xSTR(X) #X +#define STR(X) xSTR(X) + +#define xNAME1(V,T) V ## _ ## T +#define xNAME(V,T) xNAME1(V,T) + +#define VAR(V,T,W) xNAME(V,T##W) +#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W) + +#define VECT_NAME(T, W, N) T##W##x##N +#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L +#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t) +#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t) + +#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N)) +#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N) +#define VECT_VAR_DECL_INIT(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N) INIT_TAB(T##W##_t) +#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N] + +#define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L)) + +static int result_idx = 0; +#define DUMP(MSG,T,W,N,FMT) \ + fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \ + STR(VECT_VAR(result, T, W, N))); \ + for(i=0; i<N ; i++) \ + { \ + fprintf(ref_file, "%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \ + } \ + fprintf(ref_file, " }\n"); + +#define DUMP_FP(MSG,T,W,N,FMT) \ + fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \ + STR(VECT_VAR(result, T, W, N))); \ + for(i=0; i<N ; i++) \ + { \ + union fp_operand { \ + uint##W##_t i; \ + float##W##_t f; \ + } tmp; \ + tmp.f = VECT_VAR(result, T, W, N)[i]; \ + fprintf(ref_file, "%" FMT " %a %g, ", tmp.i, tmp.f, tmp.f); \ + } \ + fprintf(ref_file, " }\n"); + +#define CLEAN_PATTERN_8 0x33 +#define CLEAN_PATTERN_16 0x3333 +#define CLEAN_PATTERN_32 0x33333333 +#define CLEAN_PATTERN_64 0x3333333333333333 + +#define CLEAN(VAR,T,W,N) \ + memset(VECT_VAR(VAR, T, W, N), \ + CLEAN_PATTERN_8, \ + sizeof(VECT_VAR(VAR, T, W, N))); + +#define CHECK_INIT(VAR,Q,T1,T2,W,N) \ + { \ + ARRAY(check_result, T1, W, N); \ + int i; \ + \ + vst1##Q##_##T2##W(VECT_VAR(check_result, T1, W, N), \ + VECT_VAR(VAR, T1, W, N)); \ + for(i=0; i<N ; i++) \ + { \ + /*if (VECT_VAR(check_result, T1, W, N)[i] == CLEAN_PATTERN_##W)*/ { \ + fprintf(stdout, "%s:%d: %s[%d] unintialized! %#x\n", \ + __FUNCTION__, __LINE__, \ + STR(VECT_VAR(VAR, T1, W, N)), i, \ + VECT_VAR(check_result, T1, W, N)[i]); \ + } \ + } \ + } + +/* Generic declarations: */ +extern FILE* log_file; +extern FILE* ref_file; + +/* Sample initialization vectors */ +#define INIT_TAB(T) [] = { (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, \ + (T)-10, (T)-9, (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, \ + (T)-3, (T)-2, (T)-1, (T)0, (T)1, (T)2, (T)3, (T)4, \ + (T)5, (T)6, (T)7, (T)8, (T)9, (T)10, (T)11, (T)12, \ + (T)13, (T)14, (T)15 } + +/* Input buffers, 1 of each size */ +static VECT_VAR_DECL_INIT(buffer, int, 8, 8); +static VECT_VAR_DECL_INIT(buffer, int, 16, 4); +static VECT_VAR_DECL_INIT(buffer, int, 32, 2); +static VECT_VAR_DECL_INIT(buffer, int, 64, 1); +static VECT_VAR_DECL_INIT(buffer, uint, 8, 8); +static VECT_VAR_DECL_INIT(buffer, uint, 16, 4); +static VECT_VAR_DECL_INIT(buffer, uint, 32, 2); +static VECT_VAR_DECL_INIT(buffer, uint, 64, 1); +static VECT_VAR_DECL_INIT(buffer, float, 32, 2); +static VECT_VAR_DECL_INIT(buffer, int, 8, 16); +static VECT_VAR_DECL_INIT(buffer, int, 16, 8); +static VECT_VAR_DECL_INIT(buffer, int, 32, 4); +static VECT_VAR_DECL_INIT(buffer, int, 64, 2); +static VECT_VAR_DECL_INIT(buffer, uint, 8, 16); +static VECT_VAR_DECL_INIT(buffer, uint, 16, 8); +static VECT_VAR_DECL_INIT(buffer, uint, 32, 4); +static VECT_VAR_DECL_INIT(buffer, uint, 64, 2); +static VECT_VAR_DECL_INIT(buffer, float, 32, 4); + +/* Output buffers, 1 of each size */ +static ARRAY(result, int, 8, 8); +static ARRAY(result, int, 16, 4); +static ARRAY(result, int, 32, 2); +static ARRAY(result, int, 64, 1); +static ARRAY(result, uint, 8, 8); +static ARRAY(result, uint, 16, 4); +static ARRAY(result, uint, 32, 2); +static ARRAY(result, uint, 64, 1); +static ARRAY(result, float, 32, 2); +static ARRAY(result, int, 8, 16); +static ARRAY(result, int, 16, 8); +static ARRAY(result, int, 32, 4); +static ARRAY(result, int, 64, 2); +static ARRAY(result, uint, 8, 16); +static ARRAY(result, uint, 16, 8); +static ARRAY(result, uint, 32, 4); +static ARRAY(result, uint, 64, 2); +static ARRAY(result, float, 32, 4); + +/* Dump results (generic function) */ +static void dump_results (char *test_name) +{ + int i; + + fprintf(ref_file, "\n%s output:\n", test_name); + + DUMP(test_name, int, 8, 8, PRId8); + DUMP(test_name, int, 16, 4, PRId16); + DUMP(test_name, int, 32, 2, PRId32); + DUMP(test_name, int, 64, 1, PRId64); + DUMP(test_name, uint, 8, 8, PRIu8); + DUMP(test_name, uint, 16, 4, PRIu16); + DUMP(test_name, uint, 32, 2, PRIu32); + DUMP(test_name, uint, 64, 1, PRIu64); + DUMP_FP(test_name, float, 32, 2, PRIx32); + + DUMP(test_name, int, 8, 16, PRId8); + DUMP(test_name, int, 16, 8, PRId16); + DUMP(test_name, int, 32, 4, PRId32); + DUMP(test_name, int, 64, 2, PRId64); + DUMP(test_name, uint, 8, 16, PRIu8); + DUMP(test_name, uint, 16, 8, PRIu16); + DUMP(test_name, uint, 32, 4, PRIu32); + DUMP(test_name, uint, 64, 2, PRIu64); + DUMP_FP(test_name, float, 32, 4, PRIx32); +} + +/* Dump results in hex (generic function) */ +static void dump_results_hex2 (char *test_name, char* comment) +{ + int i; + + fprintf(ref_file, "\n%s%s output:\n", test_name, comment); + + DUMP(test_name, int, 8, 8, PRIx8); + DUMP(test_name, int, 16, 4, PRIx16); + DUMP(test_name, int, 32, 2, PRIx32); + DUMP(test_name, int, 64, 1, PRIx64); + DUMP(test_name, uint, 8, 8, PRIx8); + DUMP(test_name, uint, 16, 4, PRIx16); + DUMP(test_name, uint, 32, 2, PRIx32); + DUMP(test_name, uint, 64, 1, PRIx64); + DUMP_FP(test_name, float, 32, 2, PRIx32); + + DUMP(test_name, int, 8, 16, PRIx8); + DUMP(test_name, int, 16, 8, PRIx16); + DUMP(test_name, int, 32, 4, PRIx32); + DUMP(test_name, int, 64, 2, PRIx64); + DUMP(test_name, uint, 8, 16, PRIx8); + DUMP(test_name, uint, 16, 8, PRIx16); + DUMP(test_name, uint, 32, 4, PRIx32); + DUMP(test_name, uint, 64, 2, PRIx64); + DUMP_FP(test_name, float, 32, 4, PRIx32); +} + +static void dump_results_hex (char *test_name) +{ + dump_results_hex2(test_name, ""); +} + +#ifndef STM_ARM_NEON_MODELS + +#ifndef __BIG_ENDIAN + +typedef union { + struct { + int _xxx:27; + int QC:1; + int V:1; + int C:1; + int Z:1; + int N:1; + } b; + unsigned int word; +} _ARM_FPSCR; + +#else /* __BIG_ENDIAN */ + +typedef union { + struct { + int N:1; + int Z:1; + int C:1; + int V:1; + int QC:1; + int _dnm:27; + } b; + unsigned int word; +} _ARM_FPSCR; + +#endif /* __BIG_ENDIAN */ + +#ifdef __ARMCC_VERSION +register _ARM_FPSCR _afpscr_for_qc __asm("fpscr"); +#define Neon_Overflow _afpscr_for_qc.b.QC +#else +/* Fake declaration because GCC/ARM does not know this register */ +extern int errno; +#define Neon_Overflow errno +#endif + +#endif /* STM_ARM_NEON_MODELS */ + +static void dump_neon_overflow(char* msg, char *name) +{ + fprintf(ref_file, "%s:%d:%s Neon overflow %d\n", msg, result_idx++, + name, Neon_Overflow); +} + +/* Clean output buffers before execution */ +static void clean_results (void) +{ + result_idx = 0; + CLEAN(result, int, 8, 8); + CLEAN(result, int, 16, 4); + CLEAN(result, int, 32, 2); + CLEAN(result, int, 64, 1); + CLEAN(result, uint, 8, 8); + CLEAN(result, uint, 16, 4); + CLEAN(result, uint, 32, 2); + CLEAN(result, uint, 64, 1); + CLEAN(result, float, 32, 2); + + CLEAN(result, int, 8, 16); + CLEAN(result, int, 16, 8); + CLEAN(result, int, 32, 4); + CLEAN(result, int, 64, 2); + CLEAN(result, uint, 8, 16); + CLEAN(result, uint, 16, 8); + CLEAN(result, uint, 32, 4); + CLEAN(result, uint, 64, 2); + CLEAN(result, float, 32, 4); +} + + +/* Helpers to declare variables of various types */ +#define DECL_VARIABLE(VAR, T1, W, N) \ + VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N) + +#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \ + DECL_VARIABLE(VAR, int, 8, 8); \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, int, 64, 1) + +#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR) \ + DECL_VARIABLE(VAR, uint, 8, 8); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, uint, 64, 1) + +#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR) \ + DECL_VARIABLE(VAR, int, 8, 16); \ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, int, 64, 2) + +#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR) \ + DECL_VARIABLE(VAR, uint, 8, 16); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, uint, 64, 2) + +#define DECL_VARIABLE_64BITS_VARIANTS(VAR) \ + DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \ + DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \ + DECL_VARIABLE(VAR, float, 32, 2) + +#define DECL_VARIABLE_128BITS_VARIANTS(VAR) \ + DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR); \ + DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \ + DECL_VARIABLE(VAR, float, 32, 4) + +#define DECL_VARIABLE_ALL_VARIANTS(VAR) \ + DECL_VARIABLE_64BITS_VARIANTS(VAR); \ + DECL_VARIABLE_128BITS_VARIANTS(VAR) + +#define DECL_VARIABLE_SIGNED_VARIANTS(VAR) \ + DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \ + DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR) + +#define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR) \ + DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \ + DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR) + +/* Helpers to initialize vectors */ +#define TEST_VDUP(VAR, Q, T1, T2, W, N, V) \ + VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V) + +#define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \ + VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \ + VECT_VAR(VAR, T1, W, N), \ + L) + +/* We need to load initial values first, so rely on VLD1 */ +#define TEST_VLOAD(VAR, BUF, Q, T1, T2, W, N) \ + VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)) + +/* Helpers for macros with 1 constant and 5 variable arguments */ +#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \ + MACRO(VAR, , int, s, 8, 8); \ + MACRO(VAR, , int, s, 16, 4); \ + MACRO(VAR, , int, s, 32, 2); \ + MACRO(VAR, , int, s, 64, 1) + +#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) \ + MACRO(VAR, , uint, u, 8, 8); \ + MACRO(VAR, , uint, u, 16, 4); \ + MACRO(VAR, , uint, u, 32, 2); \ + MACRO(VAR, , uint, u, 64, 1) + +#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \ + MACRO(VAR, q, int, s, 8, 16); \ + MACRO(VAR, q, int, s, 16, 8); \ + MACRO(VAR, q, int, s, 32, 4); \ + MACRO(VAR, q, int, s, 64, 2) + +#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR) \ + MACRO(VAR, q, uint, u, 8, 16); \ + MACRO(VAR, q, uint, u, 16, 8); \ + MACRO(VAR, q, uint, u, 32, 4); \ + MACRO(VAR, q, uint, u, 64, 2) + +#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR) \ + TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \ + TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) + +#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR) \ + TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \ + TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) + +#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR) \ + TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR); \ + TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR) + +#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR) \ + TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \ + TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) + +/* Helpers for macros with 2 constant and 5 variable arguments */ +#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + MACRO(VAR1, VAR2, , int, s, 8, 8); \ + MACRO(VAR1, VAR2, , int, s, 16, 4); \ + MACRO(VAR1, VAR2, , int, s, 32, 2); \ + MACRO(VAR1, VAR2 , , int, s, 64, 1) + +#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + MACRO(VAR1, VAR2, , uint, u, 8, 8); \ + MACRO(VAR1, VAR2, , uint, u, 16, 4); \ + MACRO(VAR1, VAR2, , uint, u, 32, 2); \ + MACRO(VAR1, VAR2, , uint, u, 64, 1) + +#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + MACRO(VAR1, VAR2, q, int, s, 8, 16); \ + MACRO(VAR1, VAR2, q, int, s, 16, 8); \ + MACRO(VAR1, VAR2, q, int, s, 32, 4); \ + MACRO(VAR1, VAR2, q, int, s, 64, 2) + +#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + MACRO(VAR1, VAR2, q, uint, u, 8, 16); \ + MACRO(VAR1, VAR2, q, uint, u, 16, 8); \ + MACRO(VAR1, VAR2, q, uint, u, 32, 4); \ + MACRO(VAR1, VAR2, q, uint, u, 64, 2) + +#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ + TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) + +#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ + TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) + +#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \ + TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) + +#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ + TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ + TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) + +#endif /* _STM_ARM_NEON_REF_H_ */ |