First public release of the ARM/Neon tests.

author: Christophe Lyon <christophe.lyon@st.com> 2011-01-24 17:37:40 +0100
committer: Christophe Lyon <christophe.lyon@st.com> 2011-01-24 17:37:40 +0100
commit: 073831adf9442c019e8d34b18b0c04b1d780a19b (patch)
tree: 391f6efd9ceabde2554e9d2c637c9fdafdc9617a
download: platform_external_arm-neon-tests-073831adf9442c019e8d34b18b0c04b1d780a19b.tar.gz
platform_external_arm-neon-tests-073831adf9442c019e8d34b18b0c04b1d780a19b.tar.bz2
platform_external_arm-neon-tests-073831adf9442c019e8d34b18b0c04b1d780a19b.zip
155 files changed, 23034 insertions, 0 deletions
diff --git a/Init.s b/Init.s
new file mode 100644
index 0000000..962996b
--- /dev/null
+++ b/Init.s
@@ -0,0 +1,259 @@
+;==================================================================
+; Copyright ARM Ltd 2005. All rights reserved.
+;
+; Cortex-A8 Dhrystone example - Startup Code
+;==================================================================
+
+        PRESERVE8
+        AREA   CORTEXA8, CODE, READONLY
+
+        ENTRY
+
+; Standard definitions of mode bits and interrupt (I & F) flags in PSRs
+
+Mode_USR        EQU     0x10
+Mode_FIQ        EQU     0x11
+Mode_IRQ        EQU     0x12
+Mode_SVC        EQU     0x13
+Mode_ABT        EQU     0x17
+Mode_UNDEF      EQU     0x1B
+Mode_SYS        EQU     0x1F
+
+I_Bit           EQU     0x80 ; when I bit is set, IRQ is disabled
+F_Bit           EQU     0x40 ; when F bit is set, FIQ is disabled
+
+;==================================================================
+; Disable Cortex-A8 MMU if enabled
+;==================================================================
+
+        EXPORT Start
+
+Start
+
+        MRC     p15, 0, r0, c1, c0, 0       ; Read CP15 Control Register into r0
+        TST     r0, #0x1                    ; Is the MMU enabled?
+        BICNE   r0, r0, #0x1                ; Clear bit 0
+        MCRNE   p15, 0, r0, c1, c0, 0       ; Write value back
+
+;==================================================================
+; Initialise Supervisor Mode Stack
+; Note stack must be 8 byte aligned.
+;==================================================================
+
+        IMPORT  ||Image$$STACK$$ZI$$Limit|| ; Linker symbol from scatter file
+        LDR     SP, =||Image$$STACK$$ZI$$Limit||
+
+;==================================================================
+; TLB maintenance, Invalidate Data and Instruction TLB's
+;==================================================================
+
+        MOV    r0,#0
+        MCR    p15, 0, r0, c8, c7, 0 ; Cortex-A8 I-TLB and D-TLB invalidation
+
+;==================================================================
+; Cache Invalidation code for Cortex-A8
+;==================================================================
+
+        ; Invalidate L1 Instruction Cache
+
+        MRC p15, 1, r0, c0, c0, 1   ; Read CLIDR
+        TST r0, #0x3                ; Harvard Cache?
+        MOV r0, #0
+        MCRNE p15, 0, r0, c7, c5, 0 ; Invalidate Instruction Cache
+
+        ; Invalidate Data/Unified Caches
+
+        MRC p15, 1, r0, c0, c0, 1   ; Read CLIDR
+        ANDS r3, r0, #&7000000
+        MOV r3, r3, LSR #23         ; Total cache levels << 1
+        BEQ Finished
+
+        MOV r10, #0                 ; R10 holds current cache level << 1
+Loop1   ADD r2, r10, r10, LSR #1    ; R2 holds cache "Set" position
+        MOV r1, r0, LSR r2          ; Bottom 3 bits are the Cache-type for this level
+        AND r1, R1, #7              ; Get those 3 bits alone
+        CMP r1, #2
+        BLT Skip                    ; No cache or only instruction cache at this level
+
+        MCR p15, 2, r10, c0, c0, 0  ; Write the Cache Size selection register
+        MOV r1, #0
+        MCR p15, 0, r1, c7, c5, 4   ; PrefetchFlush to sync the change to the CacheSizeID reg
+        MRC p15, 1, r1, c0, c0, 0   ; Reads current Cache Size ID register
+        AND r2, r1, #&7             ; Extract the line length field
+        ADD r2, r2, #4              ; Add 4 for the line length offset (log2 16 bytes)
+        LDR r4, =0x3FF
+        ANDS r4, r4, r1, LSR #3     ; R4 is the max number on the way size (right aligned)
+        CLZ r5, r4                  ; R5 is the bit position of the way size increment
+        LDR r7, =0x00007FFF
+        ANDS r7, r7, r1, LSR #13    ; R7 is the max number of the index size (right aligned)
+
+Loop2   MOV r9, r4                  ; R9 working copy of the max way size (right aligned)
+
+Loop3   ORR r11, r10, r9, LSL r5    ; Factor in the Way number and cache number into R11
+        ORR r11, r11, r7, LSL r2    ; Factor in the Set number
+        MCR p15, 0, r11, c7, c14, 2 ; Clean and Invalidate by set/way
+        SUBS r9, r9, #1             ; Decrement the Way number
+        BGE Loop3
+        SUBS r7, r7, #1             ; Decrement the Set number
+        BGE Loop2
+Skip    ADD r10, r10, #2            ; increment the cache number
+        CMP r3, r10
+        BGT Loop1
+
+Finished
+
+
+;===================================================================
+; Cortex-A8 MMU Configuration
+; Set translation table base
+;===================================================================
+
+
+        IMPORT ||Image$$TTB$$ZI$$Base||  ; from scatter file.;
+
+        ; Cortex-A8 supports two translation tables
+        ; Configure translation table base (TTB) control register cp15,c2
+        ; to a value of all zeros, indicates we are using TTB register 0.
+
+        MOV     r0,#0x0
+        MCR     p15, 0, r0, c2, c0, 2
+
+        ; write the address of our page table base to TTB register 0.;
+        ; We are setting to outer-noncachable [4:3] is zero
+
+        LDR     r0,=||Image$$TTB$$ZI$$Base||
+        MCR     p15, 0, r0, c2, c0, 0
+
+
+;===================================================================
+; Cortex-A8 PAGE TABLE generation, using standard Arch v6 tables
+;
+; AP[11:10]   - Access Permissions = b11, Read/Write Access
+; Domain[8:5] - Domain = b1111, Domain 15
+; Type[1:0]   - Descriptor Type = b10, 1Mb descriptors
+;
+; TEX  C  B
+; 000  0  0  Strongly Ordered
+; 001  1  1  Outer and inner write back, write allocate Normal
+;===================================================================
+
+        LDR     r1,=0xfff                   ; loop counter
+        LDR     r2,=2_00000000000000000000110111100010
+
+        ; r0 contains the address of the translation table base
+        ; r1 is loop counter
+        ; r2 is level1 descriptor (bits 19:0)
+
+        ; use loop counter to create 4096 individual table entries
+        ; this writes from address 0x7FFC down to 0x4000 in word steps (4bytes).
+
+init_ttb_1
+
+        ORR     r3, r2, r1, LSL#20          ; r3 now contains full level1 descriptor to write
+        STR     r3, [r0, r1, LSL#2]         ; str table entry at TTB base + loopcount*4
+        SUBS    r1, r1, #1                  ; decrement loop counter
+        BPL     init_ttb_1
+
+        ; In this example we will change the cacheable attribute in the first descriptor.
+        ; Virtual memory from 0 to 1MB will be cacheable (write back mode).
+        ; TEX[14:12]=001 and CB[3:2]= 11, Outer and inner write back, write allocate.
+
+        ORR     r3,r3,#2_0000000001100      ; Set CB bits
+        ORR     r3,r3,#2_1000000000000      ; Set TEX bits
+        STR     r3,[r0]
+
+	ADD r2, r3, #0x100000               ; alter r3 to have correct base address for second descriptor (flat mapping)
+	STR r2, [r0, #4]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
+
+	ADD r2, r3, #0x200000               ; alter r3 to have correct base address for 3 descriptor (flat mapping)
+	STR r2, [r0, #8]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
+
+	ADD r2, r3, #0x300000               ; alter r3 to have correct base address for 4 descriptor (flat mapping)
+	STR r2, [r0, #0xc]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
+
+	ADD r2, r3, #0x400000               ; alter r3 to have correct base address for 5 descriptor (flat mapping)
+	STR r2, [r0, #0x10]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
+
+	ADD r2, r3, #0x500000               ; alter r3 to have correct base address for 6 descriptor (flat mapping)
+	STR r2, [r0, #0x14]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
+
+	ADD r2, r3, #0x600000               ; alter r3 to have correct base address for 7 descriptor (flat mapping)
+	STR r2, [r0, #0x18]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
+
+	ADD r2, r3, #0x700000               ; alter r3 to have correct base address for 8 descriptor (flat mapping)
+	STR r2, [r0, #0x1c]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
+
+	ADD r2, r3, #0x800000               ; alter r3 to have correct base address for 9 descriptor (flat mapping)
+	STR r2, [r0, #0x20]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
+
+	ADD r2, r3, #0x900000               ; alter r3 to have correct base address for 10 descriptor (flat mapping)
+	STR r2, [r0, #0x24]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
+
+	ADD r2, r3, #0xa00000               ; alter r3 to have correct base address for 11 descriptor (flat mapping)
+	STR r2, [r0, #0x28]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
+
+	ADD r2, r3, #0xb00000               ; alter r3 to have correct base address for 12 descriptor (flat mapping)
+	STR r2, [r0, #0x2c]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
+
+	ADD r2, r3, #0xc00000               ; alter r3 to have correct base address for 13 descriptor (flat mapping)
+	STR r2, [r0, #0x30]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
+
+;===================================================================
+; Setup domain control register - Enable all domains to client mode
+;===================================================================
+
+        MRC     p15, 0, r0, c3, c0, 0     ; Read Domain Access Control Register
+        LDR     r0, =0x55555555           ; Initialize every domain entry to b01 (client)
+        MCR     p15, 0, r0, c3, c0, 0     ; Write Domain Access Control Register
+
+;===================================================================
+; Setup L2 Cache - L2 Cache Auxiliary Control
+;===================================================================
+
+        MOV     r0, #0
+        ;MCR     p15, 1, r0, c9, c0, 2      ; Write L2 Auxilary Control Register
+
+;==================================================================
+; Enable access to NEON/VFP by enabling access to Coprocessors 10 and 11.
+; Enables Full Access i.e. in both priv and non priv modes
+;==================================================================
+
+        MRC     p15, 0, r0, c1, c0, 2      ; read CP access register
+        ORR     r0, r0, #(0x3  <<20)       ; enable access CP 10
+        ORR     r0, r0, #(0x3  <<22)       ; enable access CP 11
+        MCR     p15, 0, r0, c1, c0, 2      ; write CP access register back
+
+;==================================================================
+; Switch on the VFP and Neon Hardware
+;=================================================================
+
+        MOV     r0, #0                      ; Set up a register
+        ORR     r0, r0, #(0x1 << 30)
+        FMXR    FPEXC, r0                   ; Write FPEXC register, EN bit set.
+
+;===================================================================
+; Enable MMU and Branch to __main
+;===================================================================
+
+        IMPORT  __main                      ; before MMU enabled import label to __main
+        LDR     r12,=__main                 ; save this in register for possible long jump
+
+
+        MRC     p15, 0, r0, c1, c0, 0       ; read CP15 register 1 into r0
+        ORR     r0, r0, #0x1                ; enable MMU before scatter loading
+        MCR     p15, 0, r0, c1, c0, 0       ; write CP15 register 1
+
+
+; Now the MMU is enabled, virtual to physical address translations will occur.
+; This will affect the next instruction fetches.
+;
+; The two instructions currently in the ARM pipeline will have been fetched
+; before the MMU was enabled. This property is useful because the next two
+; instructions are safe even if new instruction fetches fail. If this routine
+; was mapped out of the new virtual memory map, the branch to __main would
+; still succeed.
+
+        BX      r12                 ; branch to __main  C library entry point
+
+        END                         ; mark the end of this file
+
diff --git a/InitCache.s b/InitCache.s
new file mode 100644
index 0000000..250652a
--- /dev/null
+++ b/InitCache.s
@@ -0,0 +1,52 @@
+; Copyright ARM Ltd 2005. All rights reserved.
+
+;==================================================================
+; This code provides basic global enable for a Cortex-A8 cache
+; and program flow prediction
+; This code must be run from a privileged mode
+;==================================================================
+
+        AREA   CORTEXA8CACHE, CODE, READONLY
+        EXPORT core_init
+
+core_init
+
+;==================================================================
+; Global Enable for Cortex-A8 Instruction and Data Caching
+;==================================================================
+
+        MRC     p15, 0, r0, c1, c0, 0       ; read CP15 register 1 into r0
+        ORR     r0, r0, #(0x1  <<12)        ; enable I Cache
+	;BIC   r0, r0, #(0x1  <<12)         ; Clear bit 0
+        ORR     r0, r0, #(0x1  <<2)         ; enable D Cache
+	;BIC   r0, r0, #(0x1  << 2)         ; Clear bit 0
+	ORR     r0, r0, #0x1                ; enable MMU
+        MCR     p15, 0, r0, c1, c0, 0       ; write CP15 register 1
+
+;==================================================================
+; Enable Cortex-A8 Level2 Unified Cache
+;==================================================================
+
+        MRC p15, 0, r0, c1, c0, 1           ; Read Auxiliary Control Register
+        ORR r0, r0, #2                      ; L2EN bit, enable L2 cache
+	;BIC   r0, r0, #(0x1  << 1)         ; L2EN bit, disable L2 cache
+	;ORR     r0, r0, #(0x1  << 4)        ;Enables speculative accesses on AXI
+	ORR     r0, r0, #(0x1  << 4)        ;Enables speculative accesses on AXI
+	ORR     r0, r0, #(0x1  << 5)        ;Enables caching NEON data within the L1 data cache
+        MCR p15, 0, r0, c1, c0, 1           ; Write Auxiliary Control Register
+
+;==================================================================
+; Cortex-A8 program flow prediction
+;==================================================================
+
+        MRC     p15, 0, r0, c1, c0, 0       ; read CP15 register 1 into r0
+        ORR     r0, r0, #(0x1  <<11)        ; Enable all forms of branch prediction
+	;BIC   	r0, r0, #(0x1  << 11)	    ; Disable all forms of branch prediction
+        MCR     p15, 0, r0, c1, c0, 0       ; write CP15 register 1
+
+;==================================================================
+
+        BX    lr
+
+        END                                 ; mark the end of this file
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..e27425a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,133 @@
+# Copyright (c) 2009, 2010, 2011 STMicroelectronics
+# Written by Christophe Lyon
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# ARM RVCT
+CC.rvct := armcc
+CFLAGS.rvct = -g --cpu=cortex-a9 -Ono_special_regs_postregalloc -I.
+LD.rvct := armlink
+LDFLAGS.rvct := --cpu=cortex-a9 --entry 0x2000
+
+# GCC/ARM cross compiler
+CC.gccarm := arm-none-eabi-gcc
+CFLAGS.gccarm := -g -Wall -mcpu=cortex-a9 -mfloat-abi=softfp -mfpu=neon -fshort-wchar -Wno-unused-variable -Wno-unused-function
+LD.gccarm := armlink
+LDFLAGS.gccarm := --cpu=cortex-a9 --entry 0x2000
+
+# List of validated intrinsics
+REFNAMES = vld1 vadd vld1_lane vld1_dup vdup vget_high vget_low		\
+	   vqdmlal_lane vqdmlsl_lane vext vshrn_n vset_lane vget_lane	\
+	   vqsub vqdmulh_lane vqdmull vqdmlal vqdmlsl vceq vcge vcle	\
+	   vcgt vclt vbsl vshl vldX vdup_lane vrshrn_n vqdmull_lane	\
+	   vst1_lane vqshl vqshl_n vqrshrn_n vsub vqadd vabs vqabs	\
+	   vcombine vmax vmin vneg vqneg vmlal vmlal_lane vmlsl		\
+	   vmlsl_lane vmovl vmovn vmull vmull_lane vrev vrshl vshl_n	\
+	   vshr_n vsra_n vtrn vuzp vzip vreinterpret vqdmulh vqrdmulh	\
+	   vqrdmulh_lane vqrshl vaba vabal vabd vabdl vand vorr vorn	\
+	   veor vbic vcreate vldX_lane vldX_dup vmla vmls vmul		\
+	   vmul_lane vmul_n vmull_n vqdmulh_n vqdmull_n vqrdmulh_n	\
+	   vmla_lane vmls_lane vmla_n vmls_n vmlal_n vmlsl_n vqdmlal_n	\
+	   vqdmlsl_n vsri_n vsli_n vtst vaddhn vraddhn vaddl vaddw	\
+	   vhadd vrhadd vhsub vsubl vsubw vsubhn vrsubhn vmvn vqmovn	\
+	   vqmovun vrshr_n vrsra_n vshll_n vpaddl vpadd vpadal		\
+	   vqshlu_n vclz vcls vcnt vqshrn_n vpmax vpmin vqshrun_n	\
+	   vqrshrun_n vstX_lane vtbX vrecpe vrsqrte integer vcage	\
+	   vcagt vcale vcalt vrecps vrsqrts vcvt dsp dspfns
+REFLIST = $(addprefix ref_, $(REFNAMES))
+
+all: ref-rvct.qemu
+
+check:
+	diff stm-arm-neon.refrvct ref-rvct.txt
+
+# Building reference files with RVCT
+REFOBJS.rvct = $(addsuffix .rvct.o, $(REFLIST))
+REFRVCT=stm-arm-neon.refrvct
+ref-rvct: $(REFRVCT)
+ref-rvct.qemu: $(REFRVCT).qemu
+
+$(REFRVCT): compute_ref.axf
+	rvdebug -stdiolog=stdio.log -jou=journal.log -log=log.log -nologo -cmd -init @coretile.core.cpu0@RTSM -inc armscript.inc -exec $^
+
+$(REFRVCT).qemu: compute_ref.axf
+	qemu-system-arm -cpu cortex-a9 -semihosting -nographic -kernel $^
+
+compute_ref.axf: scatter.scat compute_ref.rvct.o retarget.rvct.o	\
+	InitCache.o Init.o $(REFOBJS.rvct)
+	$(LD.rvct) $(LDFLAGS.rvct) --scatter $^ -o $@
+
+compute_ref.rvct.o retarget.rvct.o: %.rvct.o: %.c
+	$(CC.rvct) $(CFLAGS.rvct) -c $^ -o $@ -DREFFILE=\"$(REFRVCT)\"
+
+ref_%.rvct.o: ref_%.c stm-arm-neon-ref.h $(NEONINCLUDE)
+	$(CC.rvct) $(CFLAGS.rvct) -c $< -o $@
+
+InitCache.o Init.o: %.o: %.s
+	$(CC.rvct) $(CFLAGS.rvct) -c $^ -o $@
+
+
+# Building reference files with GCC/ARM
+REFOBJS.gccarm = $(addsuffix .gccarm.o, $(REFLIST))
+REFGCCARM=stm-arm-neon.gccarm
+ref-gccarm: $(REFGCCARM)
+
+$(REFGCCARM): compute_ref.gccarm
+	rvdebug -stdiolog=stdio.log -jou=journal.log -log=log.log -nologo -cmd -init @coretile.core.cpu0@RTSM -inc armscript.inc -exec $^
+
+compute_ref.gccarm: scatter.scat compute_ref.gccarm.o retarget.rvct.o	\
+	InitCache.o Init.o $(REFOBJS.gccarm)
+	$(LD.rvct) $(LDFLAG.rvct) --scatter $^ -o $@
+
+compute_ref.gccarm.o: %.gccarm.o: %.c
+	$(CC.gccarm) $(CFLAGS.gccarm) -c $^ -o $@ -DREFFILE=\"$(REFGCCARM)\"
+
+ref_%.gccarm.o: ref_%.c stm-arm-neon-ref.h
+	$(CC.gccarm) $(CFLAGS.gccarm) -c $< -o $@
+
+# Use '*' rather than '%' in these rules:
+# - using '%' does not make them add to the implicit rules above (they
+#   are different rules, only the 1st one matches)
+# - they are needed only when the target already exists, so the
+#   wildcard matches when needed.
+# - if the target does not already exist, the implicit rules apply.
+ref_vadd.*.o ref_vsub.*.o ref_vand.*.o ref_vbic.*.o ref_veor.*.o ref_vorn.*.o ref_vorr.*.o: ref_v_binary_op.c
+ref_vqadd.*.o ref_vqsub.*.o: ref_v_binary_sat_op.c
+ref_vabs.*.o ref_vneg.*.o  ref_vmvn.*.o: ref_v_unary_op.c
+ref_vqabs.*.o ref_vqneg.*.o: ref_v_unary_sat_op.c
+ref_vceq.*.o ref_vcge.*.o ref_vcle.*.o ref_vcgt.*.o ref_vclt.*.o: ref_v_comp_op.c
+ref_vhadd.*.o ref_vrhadd.*.o ref_vhsub.*.o ref_vmin.*.o: ref_vmax.c
+ref_vmls.*.o: ref_vmla.c
+ref_vmls_lane.*.o: ref_vmla_lane.c
+ref_vmls_n.*.o: ref_vmla_n.c
+ref_vmlsl.*.o: ref_vmlal.c
+ref_vmlsl_lane.*.o: ref_vmlal_lane.c
+ref_vmlsl_n.*.o: ref_vmlal_n.c
+ref_vqdmlsl.*.o: ref_vqdmlal.c
+ref_vqdmlsl_lane.*.o: ref_vqdmlal_lane.c
+ref_vqdmlsl_n.*.o: ref_vqdmlal_n.c
+ref_vtrn.*.o ref_vzip.*.o: ref_vuzp.c
+ref_vsri_n.*.o: ref_vsli_n.c
+ref_vraddhn.*.o ref_vsubhn.*.o ref_vrsubhn.*.o: ref_vaddhn.c
+ref_vsubl.*.o: ref_vaddl.c
+ref_vsubw.*.o: ref_vaddw.c
+ref_vcage.*.o ref_vcale.*.o ref_vcagt.*.o ref_vcalt.*.o: ref_v_comp_f_op.c
+
+clean:
+	rm -f *.o *.log stm-arm-neon.refrvct
diff --git a/README b/README
new file mode 100644
index 0000000..b926924
--- /dev/null
+++ b/README
@@ -0,0 +1,56 @@
+ARM Neon reference tests
+========================
+This package contains extensive tests for the ARM/Neon instructions.
+
+It works by building a program which uses all of them, and then
+executing it on an actual target or a simulator.
+
+It can be used to validate the simulator against an actual HW target,
+or to validate C compilers in presence of Neon intrinsics calls.
+
+The supplied Makefile enables to build with both ARM RVCT compiler and
+GNU GCC (for the ARM target), and supports execution with ARM RVDEBUG
+on an ARM simulator and with QEMU.
+
+For convenience, the ARM ELF binary file (as compiled with RVCT) is
+supplied (compute_ref.axf), as well as expected outputs
+(ref-rvct.txt).
+
+Typical usage when used to debug QEmu:
+$ make all # to build the test program with ARM rvct and execute with QEmu
+$ make check # to compare the results with the expected output
+
+
+Known issues:
+-------------
+The tests currently fail to build with GCC/ARM:
+- no support for Neon_Overflow/fpsrc register
+- ICE when compiling ref_vldX.c, ref_vldX_lane.c, ref_vstX_lane.c
+- fails to compile vst1_lane.c
+- missing include files: dspfns.h, armdsp.h
+
+VS[LR]I.64 tests are disabled because QEmu aborts.
+
+Engineering:
+------------
+In order to cover all the Neon instructions extensively, these tests
+make intensive use of the C-preprocessor, to save maintenance efforts.
+
+Most tests (the more regular ones) share a common basic structure. In
+general, variable names are suffixed by their type name, so as to
+differentiate variables with the same purpose but of differente types.
+Hence vector1_int8x8, vector1_int16x4 etc...
+
+For instance in ref_vmul.c the layout of the code is as follows:
+
+- declare input and output vectors (named 'vector1', 'vector2' and
+  'vector_res') of each possible type (s/u, 8/16/32/64 bits).
+
+- clean the result buffers.
+
+- initialize input vectors 'vector1' and 'vector2'.
+
+- call each variant of the intrinsic and store the result in a buffer
+  named 'buffer', whose contents is printed after execution.
+
+One can then compare the actual result with the expected one.
diff --git a/armscript.inc b/armscript.inc
new file mode 100644
index 0000000..ad53a5c
--- /dev/null
+++ b/armscript.inc
@@ -0,0 +1,14 @@
+ERROR=ABORT // Abort if error occurs when processing the script
+WAIT=ON // Wait for each command to finish
+
+GO
+
+STATS
+
+  //STDIOLOG OFF // Close the log file
+
+  //UNLOAD 1 // Unload the image
+  //DELFILE 1 // Remove the symbol definitions
+    //DISCONNECT  // Disconnect from the target
+  //WAIT=OFF
+QUIT Y
diff --git a/compute_ref.axf b/compute_ref.axf
new file mode 100644
index 0000000..4db6e19
--- /dev/null
+++ b/compute_ref.axf
diff --git a/compute_ref.c b/compute_ref.c
new file mode 100644
index 0000000..e1109b9
--- /dev/null
+++ b/compute_ref.c
@@ -0,0 +1,345 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+FILE* log_file = NULL;
+FILE* ref_file = NULL;
+
+#define LOGFILE "stm-arm-neon-ref.log"
+
+void cleanup ()
+{
+  if (log_file) fclose (log_file);
+  if (ref_file) fclose (ref_file);
+  exit (1);
+}
+
+extern void exec_vld1(void);
+extern void exec_vadd(void);
+extern void exec_vld1_lane(void);
+extern void exec_vld1_dup(void);
+extern void exec_vdup(void);
+extern void exec_vget_high(void);
+extern void exec_vget_low(void);
+extern void exec_vqdmlal_lane(void);
+extern void exec_vqdmlsl_lane(void);
+extern void exec_vqdmlal_n(void);
+extern void exec_vqdmlsl_n(void);
+extern void exec_vext(void);
+extern void exec_vshr_n(void);
+extern void exec_vshrn_n(void);
+extern void exec_vrshrn_n(void);
+extern void exec_vqrshrn_n(void);
+extern void exec_vset_lane(void);
+extern void exec_vget_lane(void);
+extern void exec_vqsub(void);
+extern void exec_vqdmulh(void);
+extern void exec_vqdmulh_lane(void);
+extern void exec_vqdmulh_n(void);
+extern void exec_vqdmull(void);
+extern void exec_vqdmlal(void);
+extern void exec_vqdmlsl(void);
+extern void exec_vceq(void);
+extern void exec_vcge(void);
+extern void exec_vcle(void);
+extern void exec_vcgt(void);
+extern void exec_vclt(void);
+extern void exec_vbsl(void);
+extern void exec_vshl(void);
+extern void exec_vqshl(void);
+extern void exec_vqshl_n(void);
+extern void exec_vrshl(void);
+extern void exec_vshl_n(void);
+extern void exec_vldX(void);
+extern void exec_vdup_lane(void);
+extern void exec_vqdmull_lane(void);
+extern void exec_vqdmull_n(void);
+extern void exec_vst1_lane(void);
+extern void exec_vsub(void);
+extern void exec_vqadd(void);
+extern void exec_vabs(void);
+extern void exec_vqabs(void);
+extern void exec_vcombine(void);
+extern void exec_vmax(void);
+extern void exec_vmin(void);
+extern void exec_vneg(void);
+extern void exec_vqneg(void);
+extern void exec_vmlal(void);
+extern void exec_vmlal_lane(void);
+extern void exec_vmlal_n(void);
+extern void exec_vmlsl(void);
+extern void exec_vmlsl_lane(void);
+extern void exec_vmlsl_n(void);
+extern void exec_vmovl(void);
+extern void exec_vmovn(void);
+extern void exec_vmull(void);
+extern void exec_vmull_lane(void);
+extern void exec_vrev(void);
+extern void exec_vsra_n(void);
+extern void exec_vtrn(void);
+extern void exec_vuzp(void);
+extern void exec_vzip(void);
+extern void exec_vreinterpret(void);
+extern void exec_vqrdmulh(void);
+extern void exec_vqrdmulh_lane(void);
+extern void exec_vqrdmulh_n(void);
+extern void exec_vqrshl(void);
+extern void exec_vaba(void);
+extern void exec_vabal(void);
+extern void exec_vabd(void);
+extern void exec_vabdl(void);
+extern void exec_vand(void);
+extern void exec_vorr(void);
+extern void exec_vorn(void);
+extern void exec_veor(void);
+extern void exec_vbic(void);
+extern void exec_vcreate(void);
+extern void exec_vldX_lane(void);
+extern void exec_vldX_dup(void);
+extern void exec_vmla(void);
+extern void exec_vmls(void);
+extern void exec_vmul(void);
+extern void exec_vmul_lane(void);
+extern void exec_vmul_n(void);
+extern void exec_vmull_n(void);
+extern void exec_vmla_lane(void);
+extern void exec_vmls_lane(void);
+extern void exec_vmla_n(void);
+extern void exec_vmls_n(void);
+extern void exec_vsli_n(void);
+extern void exec_vsri_n(void);
+extern void exec_vtst(void);
+extern void exec_vaddhn(void);
+extern void exec_vraddhn(void);
+extern void exec_vaddl(void);
+extern void exec_vaddw(void);
+extern void exec_vhadd(void);
+extern void exec_vrhadd(void);
+extern void exec_vhsub(void);
+extern void exec_vsubl(void);
+extern void exec_vsubw(void);
+extern void exec_vsubhn(void);
+extern void exec_vrsubhn(void);
+extern void exec_vmvn(void);
+extern void exec_vqmovn(void);
+extern void exec_vqmovun(void);
+extern void exec_vrshr_n(void);
+extern void exec_vrsra_n(void);
+extern void exec_vshll_n(void);
+extern void exec_vpaddl(void);
+extern void exec_vpadd(void);
+extern void exec_vpadal(void);
+extern void exec_vqshlu_n(void);
+extern void exec_vclz(void);
+extern void exec_vcls(void);
+extern void exec_vcnt(void);
+extern void exec_vqshrn_n(void);
+extern void exec_vpmax(void);
+extern void exec_vpmin(void);
+extern void exec_vqshrun_n(void);
+extern void exec_vqrshrun_n(void);
+extern void exec_vstX_lane(void);
+extern void exec_vtbX(void);
+extern void exec_vrecpe(void);
+extern void exec_vrsqrte(void);
+extern void exec_integer(void); /* Integer (non-NEON) intrinsics */
+
+extern void exec_vcage(void);
+extern void exec_vcagt(void);
+extern void exec_vcale(void);
+extern void exec_vcalt(void);
+extern void exec_vcvt(void);
+extern void exec_vrecps(void);
+extern void exec_vrsqrts(void);
+
+extern void exec_dsp(void); /* DSP (non-NEON) intrinsics */
+extern void exec_dspfns(void); /* DSP FNS (non-NEON/ITU) intrinsics */
+
+int main ()
+{
+  log_file = fopen (LOGFILE, "w");
+  if (log_file == NULL) {
+    fprintf (stderr, "Error opening log file "LOGFILE"\n");
+    cleanup ();
+  }
+
+  ref_file = fopen (REFFILE, "w");
+  if (ref_file == NULL) {
+    fprintf (log_file, "Error opening ref file %s\n", REFFILE);
+    cleanup ();
+  }
+
+  fprintf (log_file, "Computing refs....\n");
+
+  exec_vld1 ();
+  exec_vadd ();
+  exec_vld1_lane ();
+  exec_vld1_dup ();
+  exec_vdup ();
+  exec_vget_high ();
+  exec_vget_low ();
+  exec_vqdmlal_lane ();
+  exec_vqdmlsl_lane ();
+  exec_vqdmlal_n ();
+  exec_vqdmlsl_n ();
+  exec_vext ();
+  exec_vshr_n ();
+  exec_vshrn_n ();
+  exec_vrshrn_n ();
+  exec_vqrshrn_n ();
+  exec_vset_lane ();
+  exec_vget_lane ();
+  exec_vqsub ();
+  exec_vqdmulh ();
+  exec_vqdmulh_lane ();
+  exec_vqdmulh_n ();
+  exec_vqdmull ();
+  exec_vqdmlal ();
+  exec_vqdmlsl ();
+  exec_vceq ();
+  exec_vcge ();
+  exec_vcle ();
+  exec_vcgt ();
+  exec_vclt ();
+  exec_vbsl ();
+  exec_vshl ();
+  exec_vshl_n ();
+  exec_vqshl ();
+  exec_vqshl_n ();
+  exec_vrshl ();
+  exec_vldX ();
+  exec_vdup_lane ();
+  exec_vqdmull_lane ();
+  exec_vqdmull_n ();
+  exec_vst1_lane ();
+  exec_vsub ();
+  exec_vqadd ();
+  exec_vabs ();
+  exec_vqabs ();
+  exec_vcombine ();
+  exec_vmax ();
+  exec_vmin ();
+  exec_vneg ();
+  exec_vqneg ();
+  exec_vmlal ();
+  exec_vmlsl ();
+  exec_vmlal_lane ();
+  exec_vmlsl_lane ();
+  exec_vmlal_n ();
+  exec_vmlsl_n ();
+  exec_vmovl ();
+  exec_vmovn ();
+  exec_vmull ();
+  exec_vmull_lane ();
+  exec_vrev ();
+  exec_vsra_n ();
+  exec_vtrn ();
+  exec_vuzp ();
+  exec_vzip ();
+  exec_vreinterpret ();
+  exec_vqrdmulh ();
+  exec_vqrdmulh_lane ();
+  exec_vqrdmulh_n ();
+  exec_vqrshl ();
+  exec_vaba ();
+  exec_vabal ();
+  exec_vabd ();
+  exec_vabdl ();
+  exec_vand ();
+  exec_vorr ();
+  exec_vorn ();
+  exec_veor ();
+  exec_vbic ();
+  exec_vcreate ();
+  exec_vldX_lane ();
+  exec_vldX_dup ();
+  exec_vmla ();
+  exec_vmls ();
+  exec_vmul ();
+  exec_vmul_lane ();
+  exec_vmul_n ();
+  exec_vmull_n ();
+  exec_vmla_lane ();
+  exec_vmls_lane ();
+  exec_vmla_n ();
+  exec_vmls_n ();
+#if 0
+  exec_vsli_n ();
+  exec_vsri_n ();
+#endif
+  exec_vtst ();
+  exec_vaddhn ();
+  exec_vraddhn ();
+  exec_vaddl ();
+  exec_vaddw ();
+  exec_vhadd ();
+  exec_vrhadd ();
+  exec_vhsub ();
+  exec_vsubl ();
+  exec_vsubw ();
+  exec_vsubhn ();
+  exec_vrsubhn ();
+  exec_vmvn ();
+  exec_vqmovn ();
+  exec_vqmovun ();
+  exec_vrshr_n ();
+  exec_vrsra_n ();
+  exec_vshll_n ();
+  exec_vpaddl ();
+  exec_vpadd ();
+  exec_vpadal ();
+  exec_vqshlu_n ();
+  exec_vclz ();
+  exec_vcls ();
+  exec_vcnt ();
+  exec_vqshrn_n ();
+  exec_vpmax ();
+  exec_vpmin ();
+  exec_vqshrun_n ();
+  exec_vqrshrun_n ();
+  exec_vstX_lane ();
+  exec_vtbX ();
+  exec_vrecpe ();
+  exec_vrsqrte ();
+
+  exec_integer ();
+
+  exec_vcage ();
+  exec_vcale ();
+  exec_vcagt ();
+  exec_vcalt ();
+  exec_vcvt ();
+  exec_vrecps ();
+  exec_vrsqrts ();
+
+  exec_dsp ();
+  exec_dspfns ();
+
+  fprintf (log_file, "Finished\n");
+
+  return 0;
+}
diff --git a/ref-rvct.txt b/ref-rvct.txt
new file mode 100644
index 0000000..8c95fbd
--- /dev/null
+++ b/ref-rvct.txt
@@ -0,0 +1,6849 @@
+
+VLD1/VLD1Q output:
+VLD1/VLD1Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD1/VLD1Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD1/VLD1Q:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VLD1/VLD1Q:3:result_int64x1 [] = { fffffffffffffff0,  }
+VLD1/VLD1Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7,  }
+VLD1/VLD1Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VLD1/VLD1Q:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VLD1/VLD1Q:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VLD1/VLD1Q:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VLD1/VLD1Q:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD1/VLD1Q:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD1/VLD1Q:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD1/VLD1Q:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VLD1/VLD1Q:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff,  }
+VLD1/VLD1Q:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7,  }
+VLD1/VLD1Q:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD1/VLD1Q:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VLD1/VLD1Q:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+
+VADD/VADDQ output:
+VADD/VADDQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9,  }
+VADD/VADDQ:1:result_int16x4 [] = { ffffffec, ffffffed, ffffffee, ffffffef,  }
+VADD/VADDQ:2:result_int32x2 [] = { fffffff3, fffffff4,  }
+VADD/VADDQ:3:result_int64x1 [] = { 54,  }
+VADD/VADDQ:4:result_uint8x8 [] = { 4, 5, 6, 7, 8, 9, a, b,  }
+VADD/VADDQ:5:result_uint16x4 [] = { e, f, 10, 11,  }
+VADD/VADDQ:6:result_uint32x2 [] = { 18, 19,  }
+VADD/VADDQ:7:result_uint64x1 [] = { fffffffffffffff2,  }
+VADD/VADDQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VADD/VADDQ:9:result_int8x16 [] = { ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, ffffffeb, ffffffec, ffffffed, ffffffee, ffffffef, fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5,  }
+VADD/VADDQ:10:result_int16x8 [] = { ffffffdc, ffffffdd, ffffffde, ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3,  }
+VADD/VADDQ:11:result_int32x4 [] = { ffffffd2, ffffffd3, ffffffd4, ffffffd5,  }
+VADD/VADDQ:12:result_int64x2 [] = { 8, 9,  }
+VADD/VADDQ:13:result_uint8x16 [] = { fc, fd, fe, ff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b,  }
+VADD/VADDQ:14:result_uint16x8 [] = { fff3, fff4, fff5, fff6, fff7, fff8, fff9, fffa,  }
+VADD/VADDQ:15:result_uint32x4 [] = { 27, 28, 29, 2a,  }
+VADD/VADDQ:16:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff4,  }
+VADD/VADDQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+float32:
+VADD/VADDQ:18:result_float32x2 [] = { 40d9999a 0x1.b33334p+2 6.8, 40d9999a 0x1.b33334p+2 6.8,  }
+VADD/VADDQ:19:result_float32x4 [] = { 41100000 0x1.2p+3 9, 41100000 0x1.2p+3 9, 41100000 0x1.2p+3 9, 41100000 0x1.2p+3 9,  }
+
+VLD1_LANE/VLD1_LANEQ output:
+VLD1_LANE/VLD1_LANEQ:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, ffffffaa,  }
+VLD1_LANE/VLD1_LANEQ:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, fffffff0,  }
+VLD1_LANE/VLD1_LANEQ:2:result_int32x2 [] = { aaaaaaaa, fffffff0,  }
+VLD1_LANE/VLD1_LANEQ:3:result_int64x1 [] = { fffffffffffffff0,  }
+VLD1_LANE/VLD1_LANEQ:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, f0,  }
+VLD1_LANE/VLD1_LANEQ:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, fff0,  }
+VLD1_LANE/VLD1_LANEQ:6:result_uint32x2 [] = { aaaaaaaa, fffffff0,  }
+VLD1_LANE/VLD1_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VLD1_LANE/VLD1_LANEQ:8:result_float32x2 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, c1800000 -0x1p+4 -16,  }
+VLD1_LANE/VLD1_LANEQ:9:result_int8x16 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0,  }
+VLD1_LANE/VLD1_LANEQ:10:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, ffffaaaa, ffffaaaa,  }
+VLD1_LANE/VLD1_LANEQ:11:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, aaaaaaaa,  }
+VLD1_LANE/VLD1_LANEQ:12:result_int64x2 [] = { aaaaaaaaaaaaaaaa, fffffffffffffff0,  }
+VLD1_LANE/VLD1_LANEQ:13:result_uint8x16 [] = { aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, f0, aa, aa, aa,  }
+VLD1_LANE/VLD1_LANEQ:14:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0, aaaa,  }
+VLD1_LANE/VLD1_LANEQ:15:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, aaaaaaaa,  }
+VLD1_LANE/VLD1_LANEQ:16:result_uint64x2 [] = { fffffffffffffff0, aaaaaaaaaaaaaaaa,  }
+VLD1_LANE/VLD1_LANEQ:17:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, c1800000 -0x1p+4 -16, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+
+VLD1_DUP/VLD1_DUPQ output:
+VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0,  }
+VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff0, fffffff0,  }
+VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff0,  }
+VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0,  }
+VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0,  }
+VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff0, fffffff0,  }
+VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VLD1_DUP/VLD1_DUPQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16,  }
+VLD1_DUP/VLD1_DUPQ:9:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VLD1_DUP/VLD1_DUPQ:10:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VLD1_DUP/VLD1_DUPQ:11:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0,  }
+VLD1_DUP/VLD1_DUPQ:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0,  }
+VLD1_DUP/VLD1_DUPQ:13:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0,  }
+VLD1_DUP/VLD1_DUPQ:14:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0,  }
+VLD1_DUP/VLD1_DUPQ:15:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0,  }
+VLD1_DUP/VLD1_DUPQ:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0,  }
+VLD1_DUP/VLD1_DUPQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16,  }
+
+VLD1_DUP/VLD1_DUPQ output:
+VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1,  }
+VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1,  }
+VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff1, fffffff1,  }
+VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff1,  }
+VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1,  }
+VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1,  }
+VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff1, fffffff1,  }
+VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff1,  }
+VLD1_DUP/VLD1_DUPQ:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15,  }
+VLD1_DUP/VLD1_DUPQ:9:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1,  }
+VLD1_DUP/VLD1_DUPQ:10:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1,  }
+VLD1_DUP/VLD1_DUPQ:11:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1,  }
+VLD1_DUP/VLD1_DUPQ:12:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1,  }
+VLD1_DUP/VLD1_DUPQ:13:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1,  }
+VLD1_DUP/VLD1_DUPQ:14:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1,  }
+VLD1_DUP/VLD1_DUPQ:15:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1,  }
+VLD1_DUP/VLD1_DUPQ:16:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1,  }
+VLD1_DUP/VLD1_DUPQ:17:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15,  }
+
+VLD1_DUP/VLD1_DUPQ output:
+VLD1_DUP/VLD1_DUPQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2,  }
+VLD1_DUP/VLD1_DUPQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2,  }
+VLD1_DUP/VLD1_DUPQ:2:result_int32x2 [] = { fffffff2, fffffff2,  }
+VLD1_DUP/VLD1_DUPQ:3:result_int64x1 [] = { fffffffffffffff2,  }
+VLD1_DUP/VLD1_DUPQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2,  }
+VLD1_DUP/VLD1_DUPQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2,  }
+VLD1_DUP/VLD1_DUPQ:6:result_uint32x2 [] = { fffffff2, fffffff2,  }
+VLD1_DUP/VLD1_DUPQ:7:result_uint64x1 [] = { fffffffffffffff2,  }
+VLD1_DUP/VLD1_DUPQ:8:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14,  }
+VLD1_DUP/VLD1_DUPQ:9:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2,  }
+VLD1_DUP/VLD1_DUPQ:10:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2,  }
+VLD1_DUP/VLD1_DUPQ:11:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2,  }
+VLD1_DUP/VLD1_DUPQ:12:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2,  }
+VLD1_DUP/VLD1_DUPQ:13:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2,  }
+VLD1_DUP/VLD1_DUPQ:14:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2,  }
+VLD1_DUP/VLD1_DUPQ:15:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2,  }
+VLD1_DUP/VLD1_DUPQ:16:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2,  }
+VLD1_DUP/VLD1_DUPQ:17:result_float32x4 [] = { c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14,  }
+
+VDUP/VDUPQ output:
+VDUP/VDUPQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VDUP/VDUPQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0,  }
+VDUP/VDUPQ:2:result_int32x2 [] = { fffffff0, fffffff0,  }
+VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff0,  }
+VDUP/VDUPQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0,  }
+VDUP/VDUPQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0,  }
+VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff0, fffffff0,  }
+VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VDUP/VDUPQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16,  }
+VDUP/VDUPQ:9:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VDUP/VDUPQ:10:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VDUP/VDUPQ:11:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0,  }
+VDUP/VDUPQ:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0,  }
+VDUP/VDUPQ:13:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0,  }
+VDUP/VDUPQ:14:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0,  }
+VDUP/VDUPQ:15:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0,  }
+VDUP/VDUPQ:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0,  }
+VDUP/VDUPQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16,  }
+
+VDUP/VDUPQ output:
+VDUP/VDUPQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1,  }
+VDUP/VDUPQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1,  }
+VDUP/VDUPQ:2:result_int32x2 [] = { fffffff1, fffffff1,  }
+VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff1,  }
+VDUP/VDUPQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1,  }
+VDUP/VDUPQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1,  }
+VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff1, fffffff1,  }
+VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff1,  }
+VDUP/VDUPQ:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15,  }
+VDUP/VDUPQ:9:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1,  }
+VDUP/VDUPQ:10:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1,  }
+VDUP/VDUPQ:11:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1,  }
+VDUP/VDUPQ:12:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1,  }
+VDUP/VDUPQ:13:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1,  }
+VDUP/VDUPQ:14:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1,  }
+VDUP/VDUPQ:15:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1,  }
+VDUP/VDUPQ:16:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1,  }
+VDUP/VDUPQ:17:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15,  }
+
+VDUP/VDUPQ output:
+VDUP/VDUPQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2,  }
+VDUP/VDUPQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2,  }
+VDUP/VDUPQ:2:result_int32x2 [] = { fffffff2, fffffff2,  }
+VDUP/VDUPQ:3:result_int64x1 [] = { fffffffffffffff2,  }
+VDUP/VDUPQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2,  }
+VDUP/VDUPQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2,  }
+VDUP/VDUPQ:6:result_uint32x2 [] = { fffffff2, fffffff2,  }
+VDUP/VDUPQ:7:result_uint64x1 [] = { fffffffffffffff2,  }
+VDUP/VDUPQ:8:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14,  }
+VDUP/VDUPQ:9:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2,  }
+VDUP/VDUPQ:10:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2,  }
+VDUP/VDUPQ:11:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2,  }
+VDUP/VDUPQ:12:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2,  }
+VDUP/VDUPQ:13:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2,  }
+VDUP/VDUPQ:14:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2,  }
+VDUP/VDUPQ:15:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2,  }
+VDUP/VDUPQ:16:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2,  }
+VDUP/VDUPQ:17:result_float32x4 [] = { c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14,  }
+
+VMOV/VMOVQ output:
+VMOV/VMOVQ:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VMOV/VMOVQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0,  }
+VMOV/VMOVQ:2:result_int32x2 [] = { fffffff0, fffffff0,  }
+VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff0,  }
+VMOV/VMOVQ:4:result_uint8x8 [] = { f0, f0, f0, f0, f0, f0, f0, f0,  }
+VMOV/VMOVQ:5:result_uint16x4 [] = { fff0, fff0, fff0, fff0,  }
+VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff0, fffffff0,  }
+VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VMOV/VMOVQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16,  }
+VMOV/VMOVQ:9:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VMOV/VMOVQ:10:result_int16x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VMOV/VMOVQ:11:result_int32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0,  }
+VMOV/VMOVQ:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0,  }
+VMOV/VMOVQ:13:result_uint8x16 [] = { f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0, f0,  }
+VMOV/VMOVQ:14:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0,  }
+VMOV/VMOVQ:15:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0,  }
+VMOV/VMOVQ:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0,  }
+VMOV/VMOVQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16,  }
+
+VMOV/VMOVQ output:
+VMOV/VMOVQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1,  }
+VMOV/VMOVQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1,  }
+VMOV/VMOVQ:2:result_int32x2 [] = { fffffff1, fffffff1,  }
+VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff1,  }
+VMOV/VMOVQ:4:result_uint8x8 [] = { f1, f1, f1, f1, f1, f1, f1, f1,  }
+VMOV/VMOVQ:5:result_uint16x4 [] = { fff1, fff1, fff1, fff1,  }
+VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff1, fffffff1,  }
+VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff1,  }
+VMOV/VMOVQ:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15,  }
+VMOV/VMOVQ:9:result_int8x16 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1,  }
+VMOV/VMOVQ:10:result_int16x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1,  }
+VMOV/VMOVQ:11:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1,  }
+VMOV/VMOVQ:12:result_int64x2 [] = { fffffffffffffff1, fffffffffffffff1,  }
+VMOV/VMOVQ:13:result_uint8x16 [] = { f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1, f1,  }
+VMOV/VMOVQ:14:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1,  }
+VMOV/VMOVQ:15:result_uint32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1,  }
+VMOV/VMOVQ:16:result_uint64x2 [] = { fffffffffffffff1, fffffffffffffff1,  }
+VMOV/VMOVQ:17:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15,  }
+
+VMOV/VMOVQ output:
+VMOV/VMOVQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2,  }
+VMOV/VMOVQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2,  }
+VMOV/VMOVQ:2:result_int32x2 [] = { fffffff2, fffffff2,  }
+VMOV/VMOVQ:3:result_int64x1 [] = { fffffffffffffff2,  }
+VMOV/VMOVQ:4:result_uint8x8 [] = { f2, f2, f2, f2, f2, f2, f2, f2,  }
+VMOV/VMOVQ:5:result_uint16x4 [] = { fff2, fff2, fff2, fff2,  }
+VMOV/VMOVQ:6:result_uint32x2 [] = { fffffff2, fffffff2,  }
+VMOV/VMOVQ:7:result_uint64x1 [] = { fffffffffffffff2,  }
+VMOV/VMOVQ:8:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14,  }
+VMOV/VMOVQ:9:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2,  }
+VMOV/VMOVQ:10:result_int16x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2,  }
+VMOV/VMOVQ:11:result_int32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2,  }
+VMOV/VMOVQ:12:result_int64x2 [] = { fffffffffffffff2, fffffffffffffff2,  }
+VMOV/VMOVQ:13:result_uint8x16 [] = { f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2, f2,  }
+VMOV/VMOVQ:14:result_uint16x8 [] = { fff2, fff2, fff2, fff2, fff2, fff2, fff2, fff2,  }
+VMOV/VMOVQ:15:result_uint32x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2,  }
+VMOV/VMOVQ:16:result_uint64x2 [] = { fffffffffffffff2, fffffffffffffff2,  }
+VMOV/VMOVQ:17:result_float32x4 [] = { c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14, c1600000 -0x1.cp+3 -14,  }
+
+VGET_HIGH output:
+VGET_HIGH:0:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VGET_HIGH:1:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7,  }
+VGET_HIGH:2:result_int32x2 [] = { fffffff2, fffffff3,  }
+VGET_HIGH:3:result_int64x1 [] = { fffffffffffffff1,  }
+VGET_HIGH:4:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff,  }
+VGET_HIGH:5:result_uint16x4 [] = { fff4, fff5, fff6, fff7,  }
+VGET_HIGH:6:result_uint32x2 [] = { fffffff2, fffffff3,  }
+VGET_HIGH:7:result_uint64x1 [] = { fffffffffffffff1,  }
+VGET_HIGH:8:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+VGET_HIGH:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VGET_HIGH:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VGET_HIGH:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VGET_HIGH:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VGET_HIGH:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VGET_HIGH:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VGET_HIGH:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VGET_HIGH:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VGET_HIGH:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VGET_LOW output:
+VGET_LOW:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VGET_LOW:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VGET_LOW:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VGET_LOW:3:result_int64x1 [] = { fffffffffffffff0,  }
+VGET_LOW:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7,  }
+VGET_LOW:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VGET_LOW:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VGET_LOW:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VGET_LOW:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VGET_LOW:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VGET_LOW:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VGET_LOW:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VGET_LOW:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VGET_LOW:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VGET_LOW:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VGET_LOW:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VGET_LOW:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VGET_LOW:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLAL_LANE overflow output:
+VQDMLAL_LANE:0:vqdmlal_lane_s16 Neon overflow 0
+VQDMLAL_LANE:1:vqdmlal_lane_s32 Neon overflow 0
+
+VQDMLAL_LANE output:
+VQDMLAL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_LANE:3:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL_LANE:4:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLAL_LANE:5:result_int64x1 [] = { 3333333333333333,  }
+VQDMLAL_LANE:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_LANE:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL_LANE:8:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLAL_LANE:9:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLAL_LANE:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLAL_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_LANE:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL_LANE:13:result_int32x4 [] = { 7c1e, 7c1f, 7c20, 7c21,  }
+VQDMLAL_LANE:14:result_int64x2 [] = { 7c1e, 7c1f,  }
+VQDMLAL_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_LANE:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL_LANE:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLAL_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLAL_LANE:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLAL_LANE (mul with input=0) overflow output:
+VQDMLAL_LANE:20:vqdmlal_lane_s16 Neon overflow 0
+VQDMLAL_LANE:21:vqdmlal_lane_s32 Neon overflow 0
+
+VQDMLAL_LANE (mul with input=0) output:
+VQDMLAL_LANE:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_LANE:23:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL_LANE:24:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLAL_LANE:25:result_int64x1 [] = { 3333333333333333,  }
+VQDMLAL_LANE:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_LANE:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL_LANE:28:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLAL_LANE:29:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLAL_LANE:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLAL_LANE:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_LANE:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL_LANE:33:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VQDMLAL_LANE:34:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VQDMLAL_LANE:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_LANE:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL_LANE:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLAL_LANE:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLAL_LANE:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLAL_LANE (check mul overflow) overflow output:
+VQDMLAL_LANE:40:vqdmlal_lane_s16 Neon overflow 1
+VQDMLAL_LANE:41:vqdmlal_lane_s32 Neon overflow 1
+
+VQDMLAL_LANE (check mul overflow) output:
+VQDMLAL_LANE:42:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_LANE:43:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL_LANE:44:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLAL_LANE:45:result_int64x1 [] = { 3333333333333333,  }
+VQDMLAL_LANE:46:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_LANE:47:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL_LANE:48:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLAL_LANE:49:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLAL_LANE:50:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLAL_LANE:51:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_LANE:52:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL_LANE:53:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2,  }
+VQDMLAL_LANE:54:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0,  }
+VQDMLAL_LANE:55:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_LANE:56:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL_LANE:57:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLAL_LANE:58:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLAL_LANE:59:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLSL_LANE overflow output:
+VQDMLSL_LANE:0:vqdmlsl_lane_s16 Neon overflow 0
+VQDMLSL_LANE:1:vqdmlsl_lane_s32 Neon overflow 0
+
+VQDMLSL_LANE output:
+VQDMLSL_LANE:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_LANE:3:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL_LANE:4:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLSL_LANE:5:result_int64x1 [] = { 3333333333333333,  }
+VQDMLSL_LANE:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_LANE:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL_LANE:8:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLSL_LANE:9:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLSL_LANE:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLSL_LANE:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_LANE:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL_LANE:13:result_int32x4 [] = { ffff83c2, ffff83c3, ffff83c4, ffff83c5,  }
+VQDMLSL_LANE:14:result_int64x2 [] = { ffffffffffff83c2, ffffffffffff83c3,  }
+VQDMLSL_LANE:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_LANE:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL_LANE:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLSL_LANE:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLSL_LANE:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLSL_LANE (mul with input=0) overflow output:
+VQDMLSL_LANE:20:vqdmlsl_lane_s16 Neon overflow 0
+VQDMLSL_LANE:21:vqdmlsl_lane_s32 Neon overflow 0
+
+VQDMLSL_LANE (mul with input=0) output:
+VQDMLSL_LANE:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_LANE:23:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL_LANE:24:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLSL_LANE:25:result_int64x1 [] = { 3333333333333333,  }
+VQDMLSL_LANE:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_LANE:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL_LANE:28:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLSL_LANE:29:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLSL_LANE:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLSL_LANE:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_LANE:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL_LANE:33:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VQDMLSL_LANE:34:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VQDMLSL_LANE:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_LANE:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL_LANE:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLSL_LANE:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLSL_LANE:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLSL_LANE (check mul overflow) overflow output:
+VQDMLSL_LANE:40:vqdmlsl_lane_s16 Neon overflow 1
+VQDMLSL_LANE:41:vqdmlsl_lane_s32 Neon overflow 1
+
+VQDMLSL_LANE (check mul overflow) output:
+VQDMLSL_LANE:42:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_LANE:43:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL_LANE:44:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLSL_LANE:45:result_int64x1 [] = { 3333333333333333,  }
+VQDMLSL_LANE:46:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_LANE:47:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL_LANE:48:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLSL_LANE:49:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLSL_LANE:50:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLSL_LANE:51:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_LANE:52:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL_LANE:53:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+VQDMLSL_LANE:54:result_int64x2 [] = { 8000000000000000, 8000000000000000,  }
+VQDMLSL_LANE:55:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_LANE:56:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL_LANE:57:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLSL_LANE:58:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLSL_LANE:59:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLAL_N overflow output:
+VQDMLAL_N:0:vqdmlal_n_s16 Neon overflow 0
+VQDMLAL_N:1:vqdmlal_n_s32 Neon overflow 0
+
+VQDMLAL_N output:
+VQDMLAL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_N:3:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL_N:4:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLAL_N:5:result_int64x1 [] = { 3333333333333333,  }
+VQDMLAL_N:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_N:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL_N:8:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLAL_N:9:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLAL_N:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLAL_N:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_N:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL_N:13:result_int32x4 [] = { 1684, 1685, 1686, 1687,  }
+VQDMLAL_N:14:result_int64x2 [] = { 21ce, 21cf,  }
+VQDMLAL_N:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_N:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL_N:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLAL_N:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLAL_N:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLAL_N (check mul overflow) overflow output:
+VQDMLAL_N:20:vqdmlal_n_s16 Neon overflow 1
+VQDMLAL_N:21:vqdmlal_n_s32 Neon overflow 1
+
+VQDMLAL_N (check mul overflow) output:
+VQDMLAL_N:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_N:23:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL_N:24:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLAL_N:25:result_int64x1 [] = { 3333333333333333,  }
+VQDMLAL_N:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_N:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL_N:28:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLAL_N:29:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLAL_N:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLAL_N:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_N:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL_N:33:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2,  }
+VQDMLAL_N:34:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0,  }
+VQDMLAL_N:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL_N:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL_N:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLAL_N:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLAL_N:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLSL_N overflow output:
+VQDMLSL_N:0:vqdmlsl_n_s16 Neon overflow 0
+VQDMLSL_N:1:vqdmlsl_n_s32 Neon overflow 0
+
+VQDMLSL_N output:
+VQDMLSL_N:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_N:3:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL_N:4:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLSL_N:5:result_int64x1 [] = { 3333333333333333,  }
+VQDMLSL_N:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_N:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL_N:8:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLSL_N:9:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLSL_N:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLSL_N:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_N:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL_N:13:result_int32x4 [] = { ffffe95c, ffffe95d, ffffe95e, ffffe95f,  }
+VQDMLSL_N:14:result_int64x2 [] = { ffffffffffffde12, ffffffffffffde13,  }
+VQDMLSL_N:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_N:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL_N:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLSL_N:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLSL_N:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLSL_N (check mul overflow) overflow output:
+VQDMLSL_N:20:vqdmlsl_n_s16 Neon overflow 1
+VQDMLSL_N:21:vqdmlsl_n_s32 Neon overflow 1
+
+VQDMLSL_N (check mul overflow) output:
+VQDMLSL_N:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_N:23:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL_N:24:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLSL_N:25:result_int64x1 [] = { 3333333333333333,  }
+VQDMLSL_N:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_N:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL_N:28:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLSL_N:29:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLSL_N:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLSL_N:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_N:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL_N:33:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+VQDMLSL_N:34:result_int64x2 [] = { 8000000000000000, 8000000000000000,  }
+VQDMLSL_N:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL_N:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL_N:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLSL_N:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLSL_N:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VEXT/VEXTQ output:
+VEXT/VEXTQ:0:result_int8x8 [] = { fffffff7, 11, 11, 11, 11, 11, 11, 11,  }
+VEXT/VEXTQ:1:result_int16x4 [] = { fffffff3, 22, 22, 22,  }
+VEXT/VEXTQ:2:result_int32x2 [] = { fffffff1, 33,  }
+VEXT/VEXTQ:3:result_int64x1 [] = { fffffffffffffff0,  }
+VEXT/VEXTQ:4:result_uint8x8 [] = { f6, f7, 55, 55, 55, 55, 55, 55,  }
+VEXT/VEXTQ:5:result_uint16x4 [] = { fff2, fff3, 66, 66,  }
+VEXT/VEXTQ:6:result_uint32x2 [] = { fffffff1, 77,  }
+VEXT/VEXTQ:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VEXT/VEXTQ:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, 42066666 0x1.0cccccp+5 33.6,  }
+VEXT/VEXTQ:9:result_int8x16 [] = { fffffffe, ffffffff, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,  }
+VEXT/VEXTQ:10:result_int16x8 [] = { fffffff7, 22, 22, 22, 22, 22, 22, 22,  }
+VEXT/VEXTQ:11:result_int32x4 [] = { fffffff3, 33, 33, 33,  }
+VEXT/VEXTQ:12:result_int64x2 [] = { fffffffffffffff1, 44,  }
+VEXT/VEXTQ:13:result_uint8x16 [] = { fc, fd, fe, ff, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,  }
+VEXT/VEXTQ:14:result_uint16x8 [] = { fff6, fff7, 66, 66, 66, 66, 66, 66,  }
+VEXT/VEXTQ:15:result_uint32x4 [] = { fffffff3, 77, 77, 77,  }
+VEXT/VEXTQ:16:result_uint64x2 [] = { fffffffffffffff1, 88,  }
+VEXT/VEXTQ:17:result_float32x4 [] = { c1500000 -0x1.ap+3 -13, 4204cccd 0x1.09999ap+5 33.2, 4204cccd 0x1.09999ap+5 33.2, 4204cccd 0x1.09999ap+5 33.2,  }
+
+VSHR_N output:
+VSHR_N:0:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb,  }
+VSHR_N:1:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VSHR_N:2:result_int32x2 [] = { fffffffc, fffffffc,  }
+VSHR_N:3:result_int64x1 [] = { ffffffffffffffff,  }
+VSHR_N:4:result_uint8x8 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d,  }
+VSHR_N:5:result_uint16x4 [] = { 1ffe, 1ffe, 1ffe, 1ffe,  }
+VSHR_N:6:result_uint32x2 [] = { 7ffffff, 7ffffff,  }
+VSHR_N:7:result_uint64x1 [] = { 7fffffff,  }
+VSHR_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSHR_N:9:result_int8x16 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, fffffffc, fffffffd, fffffffd, fffffffe, fffffffe, ffffffff, ffffffff,  }
+VSHR_N:10:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff,  }
+VSHR_N:11:result_int32x4 [] = { fffffffc, fffffffc, fffffffc, fffffffc,  }
+VSHR_N:12:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+VSHR_N:13:result_uint8x16 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f,  }
+VSHR_N:14:result_uint16x8 [] = { 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe, 1ffe,  }
+VSHR_N:15:result_uint32x4 [] = { 7ffffff, 7ffffff, 7ffffff, 7ffffff,  }
+VSHR_N:16:result_uint64x2 [] = { 7fffffff, 7fffffff,  }
+VSHR_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VSHRN_N output:
+VSHRN_N:0:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb,  }
+VSHRN_N:1:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9,  }
+VSHRN_N:2:result_int32x2 [] = { fffffffc, fffffffc,  }
+VSHRN_N:3:result_int64x1 [] = { 3333333333333333,  }
+VSHRN_N:4:result_uint8x8 [] = { fc, fc, fc, fc, fd, fd, fd, fd,  }
+VSHRN_N:5:result_uint16x4 [] = { fffe, fffe, fffe, fffe,  }
+VSHRN_N:6:result_uint32x2 [] = { fffffffe, fffffffe,  }
+VSHRN_N:7:result_uint64x1 [] = { 3333333333333333,  }
+VSHRN_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSHRN_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSHRN_N:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VSHRN_N:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VSHRN_N:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VSHRN_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSHRN_N:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VSHRN_N:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VSHRN_N:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VSHRN_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHRN_N (with input = 0) output:
+VRSHRN_N:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHRN_N:1:result_int16x4 [] = { 0, 0, 0, 0,  }
+VRSHRN_N:2:result_int32x2 [] = { 0, 0,  }
+VRSHRN_N:3:result_int64x1 [] = { 3333333333333333,  }
+VRSHRN_N:4:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHRN_N:5:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VRSHRN_N:6:result_uint32x2 [] = { 0, 0,  }
+VRSHRN_N:7:result_uint64x1 [] = { 3333333333333333,  }
+VRSHRN_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHRN_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VRSHRN_N:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VRSHRN_N:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VRSHRN_N:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VRSHRN_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VRSHRN_N:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VRSHRN_N:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VRSHRN_N:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VRSHRN_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHRN_N output:
+VRSHRN_N:18:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc,  }
+VRSHRN_N:19:result_int16x4 [] = { fffffff8, fffffff9, fffffff9, fffffffa,  }
+VRSHRN_N:20:result_int32x2 [] = { fffffffc, fffffffc,  }
+VRSHRN_N:21:result_int64x1 [] = { 3333333333333333,  }
+VRSHRN_N:22:result_uint8x8 [] = { fc, fc, fd, fd, fd, fd, fe, fe,  }
+VRSHRN_N:23:result_uint16x4 [] = { fffe, fffe, fffe, fffe,  }
+VRSHRN_N:24:result_uint32x2 [] = { fffffffe, fffffffe,  }
+VRSHRN_N:25:result_uint64x1 [] = { 3333333333333333,  }
+VRSHRN_N:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHRN_N:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VRSHRN_N:28:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VRSHRN_N:29:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VRSHRN_N:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VRSHRN_N:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VRSHRN_N:32:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VRSHRN_N:33:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VRSHRN_N:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VRSHRN_N:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHRN_N (with large shift amount) output:
+VRSHRN_N:36:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHRN_N:37:result_int16x4 [] = { 0, 0, 0, 0,  }
+VRSHRN_N:38:result_int32x2 [] = { 0, 0,  }
+VRSHRN_N:39:result_int64x1 [] = { 3333333333333333,  }
+VRSHRN_N:40:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHRN_N:41:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VRSHRN_N:42:result_uint32x2 [] = { 0, 0,  }
+VRSHRN_N:43:result_uint64x1 [] = { 3333333333333333,  }
+VRSHRN_N:44:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHRN_N:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VRSHRN_N:46:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VRSHRN_N:47:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VRSHRN_N:48:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VRSHRN_N:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VRSHRN_N:50:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VRSHRN_N:51:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VRSHRN_N:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VRSHRN_N:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHRN_N overflow output:
+VQRSHRN_N:0:vqrshrn_n_s16 Neon overflow 0
+VQRSHRN_N:1:vqrshrn_n_s32 Neon overflow 0
+VQRSHRN_N:2:vqrshrn_n_s64 Neon overflow 0
+VQRSHRN_N:3:vqrshrn_n_u16 Neon overflow 1
+VQRSHRN_N:4:vqrshrn_n_u32 Neon overflow 1
+VQRSHRN_N:5:vqrshrn_n_u64 Neon overflow 1
+
+VQRSHRN_N output:
+VQRSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc,  }
+VQRSHRN_N:7:result_int16x4 [] = { fffffff8, fffffff9, fffffff9, fffffffa,  }
+VQRSHRN_N:8:result_int32x2 [] = { fffffffc, fffffffc,  }
+VQRSHRN_N:9:result_int64x1 [] = { 3333333333333333,  }
+VQRSHRN_N:10:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQRSHRN_N:11:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQRSHRN_N:12:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQRSHRN_N:13:result_uint64x1 [] = { 3333333333333333,  }
+VQRSHRN_N:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHRN_N:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRN_N:16:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRN_N:17:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRN_N:18:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRN_N:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRN_N:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRN_N:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRN_N:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRN_N:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHRN_N (check saturation: shift by 3) overflow output:
+VQRSHRN_N:24:vqrshrn_n_s16 Neon overflow 1
+VQRSHRN_N:25:vqrshrn_n_s32 Neon overflow 1
+VQRSHRN_N:26:vqrshrn_n_s64 Neon overflow 1
+VQRSHRN_N:27:vqrshrn_n_u16 Neon overflow 1
+VQRSHRN_N:28:vqrshrn_n_u32 Neon overflow 1
+VQRSHRN_N:29:vqrshrn_n_u64 Neon overflow 1
+
+VQRSHRN_N (check saturation: shift by 3) output:
+VQRSHRN_N:30:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQRSHRN_N:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQRSHRN_N:32:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQRSHRN_N:33:result_int64x1 [] = { 3333333333333333,  }
+VQRSHRN_N:34:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQRSHRN_N:35:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQRSHRN_N:36:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQRSHRN_N:37:result_uint64x1 [] = { 3333333333333333,  }
+VQRSHRN_N:38:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHRN_N:39:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRN_N:40:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRN_N:41:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRN_N:42:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRN_N:43:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRN_N:44:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRN_N:45:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRN_N:46:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRN_N:47:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHRN_N (check saturation: shift by max) overflow output:
+VQRSHRN_N:48:vqrshrn_n_s16 Neon overflow 1
+VQRSHRN_N:49:vqrshrn_n_s32 Neon overflow 1
+VQRSHRN_N:50:vqrshrn_n_s64 Neon overflow 1
+VQRSHRN_N:51:vqrshrn_n_u16 Neon overflow 1
+VQRSHRN_N:52:vqrshrn_n_u32 Neon overflow 1
+VQRSHRN_N:53:vqrshrn_n_u64 Neon overflow 1
+
+VQRSHRN_N (check saturation: shift by max) output:
+VQRSHRN_N:54:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQRSHRN_N:55:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQRSHRN_N:56:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQRSHRN_N:57:result_int64x1 [] = { 3333333333333333,  }
+VQRSHRN_N:58:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQRSHRN_N:59:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQRSHRN_N:60:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQRSHRN_N:61:result_uint64x1 [] = { 3333333333333333,  }
+VQRSHRN_N:62:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHRN_N:63:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRN_N:64:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRN_N:65:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRN_N:66:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRN_N:67:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRN_N:68:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRN_N:69:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRN_N:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRN_N:71:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VSET_LANE/VSET_LANEQ output:
+VSET_LANE/VSET_LANEQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, 11,  }
+VSET_LANE/VSET_LANEQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, 22,  }
+VSET_LANE/VSET_LANEQ:2:result_int32x2 [] = { fffffff0, 33,  }
+VSET_LANE/VSET_LANEQ:3:result_int64x1 [] = { 44,  }
+VSET_LANE/VSET_LANEQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, 55, f7,  }
+VSET_LANE/VSET_LANEQ:5:result_uint16x4 [] = { fff0, fff1, 66, fff3,  }
+VSET_LANE/VSET_LANEQ:6:result_uint32x2 [] = { fffffff0, 77,  }
+VSET_LANE/VSET_LANEQ:7:result_uint64x1 [] = { 88,  }
+VSET_LANE/VSET_LANEQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, 4204cccd 0x1.09999ap+5 33.2,  }
+VSET_LANE/VSET_LANEQ:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffff99,  }
+VSET_LANE/VSET_LANEQ:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, aa, fffffff6, fffffff7,  }
+VSET_LANE/VSET_LANEQ:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, bb,  }
+VSET_LANE/VSET_LANEQ:12:result_int64x2 [] = { fffffffffffffff0, cc,  }
+VSET_LANE/VSET_LANEQ:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, dd, ff,  }
+VSET_LANE/VSET_LANEQ:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, ee, fff7,  }
+VSET_LANE/VSET_LANEQ:15:result_uint32x4 [] = { fffffff0, fffffff1, ff, fffffff3,  }
+VSET_LANE/VSET_LANEQ:16:result_uint64x2 [] = { fffffffffffffff0, 11,  }
+VSET_LANE/VSET_LANEQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, 41333333 0x1.666666p+3 11.2,  }
+
+VGET_LANE/VGETQ_LANE output:
+fffffff7, fffffff3, fffffff1, fffffffffffffff0, f6, fff2, fffffff1, fffffffffffffff0, c1700000, ffffffff, fffffff5, fffffff3, fffffffffffffff1, fe, fff6, fffffff2, fffffffffffffff1, c1500000, 
+
+VQSUB/VQSUBQ overflow output:
+VQSUB/VQSUBQ:0:vqsub_s8 Neon overflow 0
+VQSUB/VQSUBQ:1:vqsub_s16 Neon overflow 0
+VQSUB/VQSUBQ:2:vqsub_s32 Neon overflow 0
+VQSUB/VQSUBQ:3:vqsub_s64 Neon overflow 0
+VQSUB/VQSUBQ:4:vqsub_u8 Neon overflow 0
+VQSUB/VQSUBQ:5:vqsub_u16 Neon overflow 0
+VQSUB/VQSUBQ:6:vqsub_u32 Neon overflow 0
+VQSUB/VQSUBQ:7:vqsub_u64 Neon overflow 0
+VQSUB/VQSUBQ:8:vqsubq_s8 Neon overflow 0
+VQSUB/VQSUBQ:9:vqsubq_s16 Neon overflow 0
+VQSUB/VQSUBQ:10:vqsubq_s32 Neon overflow 0
+VQSUB/VQSUBQ:11:vqsubq_s64 Neon overflow 0
+VQSUB/VQSUBQ:12:vqsubq_u8 Neon overflow 0
+VQSUB/VQSUBQ:13:vqsubq_u16 Neon overflow 0
+VQSUB/VQSUBQ:14:vqsubq_u32 Neon overflow 0
+VQSUB/VQSUBQ:15:vqsubq_u64 Neon overflow 0
+
+VQSUB/VQSUBQ output:
+VQSUB/VQSUBQ:16:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6,  }
+VQSUB/VQSUBQ:17:result_int16x4 [] = { ffffffce, ffffffcf, ffffffd0, ffffffd1,  }
+VQSUB/VQSUBQ:18:result_int32x2 [] = { ffffffbd, ffffffbe,  }
+VQSUB/VQSUBQ:19:result_int64x1 [] = { ffffffffffffffac,  }
+VQSUB/VQSUBQ:20:result_uint8x8 [] = { 9b, 9c, 9d, 9e, 9f, a0, a1, a2,  }
+VQSUB/VQSUBQ:21:result_uint16x4 [] = { ff8a, ff8b, ff8c, ff8d,  }
+VQSUB/VQSUBQ:22:result_uint32x2 [] = { ffffff79, ffffff7a,  }
+VQSUB/VQSUBQ:23:result_uint64x1 [] = { ffffffffffffff68,  }
+VQSUB/VQSUBQ:24:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSUB/VQSUBQ:25:result_int8x16 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea, ffffffeb, ffffffec, ffffffed, ffffffee,  }
+VQSUB/VQSUBQ:26:result_int16x8 [] = { ffffffce, ffffffcf, ffffffd0, ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5,  }
+VQSUB/VQSUBQ:27:result_int32x4 [] = { ffffffbd, ffffffbe, ffffffbf, ffffffc0,  }
+VQSUB/VQSUBQ:28:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad,  }
+VQSUB/VQSUBQ:29:result_uint8x16 [] = { 9b, 9c, 9d, 9e, 9f, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa,  }
+VQSUB/VQSUBQ:30:result_uint16x8 [] = { ff8a, ff8b, ff8c, ff8d, ff8e, ff8f, ff90, ff91,  }
+VQSUB/VQSUBQ:31:result_uint32x4 [] = { ffffff79, ffffff7a, ffffff7b, ffffff7c,  }
+VQSUB/VQSUBQ:32:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69,  }
+VQSUB/VQSUBQ:33:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSUB/VQSUBQ 64 bits saturation overflow output:
+VQSUB/VQSUBQ:34:vqsub_s64 Neon overflow 0
+VQSUB/VQSUBQ:35:vqsub_u64 Neon overflow 0
+VQSUB/VQSUBQ:36:vqsubq_s64 Neon overflow 0
+VQSUB/VQSUBQ:37:vqsubq_u64 Neon overflow 0
+
+64 bits saturation:
+VQSUB/VQSUBQ:38:result_int64x1 [] = { fffffffffffffff0,  }
+VQSUB/VQSUBQ:39:result_uint64x1 [] = { fffffffffffffff0,  }
+VQSUB/VQSUBQ:40:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VQSUB/VQSUBQ:41:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+
+VQSUB/VQSUBQ 64 bits saturation overflow output:
+VQSUB/VQSUBQ:42:vqsub_s64 Neon overflow 0
+VQSUB/VQSUBQ:43:vqsub_u64 Neon overflow 0
+VQSUB/VQSUBQ:44:vqsubq_s64 Neon overflow 0
+VQSUB/VQSUBQ:45:vqsubq_u64 Neon overflow 0
+VQSUB/VQSUBQ:46:result_int64x1 [] = { ffffffffffffffac,  }
+VQSUB/VQSUBQ:47:result_uint64x1 [] = { ffffffffffffff68,  }
+VQSUB/VQSUBQ:48:result_int64x2 [] = { ffffffffffffffac, ffffffffffffffad,  }
+VQSUB/VQSUBQ:49:result_uint64x2 [] = { ffffffffffffff68, ffffffffffffff69,  }
+
+VQSUB/VQSUBQ 64 bits saturation overflow output:
+VQSUB/VQSUBQ:50:vqsub_s64 Neon overflow 1
+VQSUB/VQSUBQ:51:vqsub_u64 Neon overflow 1
+VQSUB/VQSUBQ:52:vqsubq_s64 Neon overflow 1
+VQSUB/VQSUBQ:53:vqsubq_u64 Neon overflow 1
+VQSUB/VQSUBQ:54:result_int64x1 [] = { 8000000000000000,  }
+VQSUB/VQSUBQ:55:result_uint64x1 [] = { 0,  }
+VQSUB/VQSUBQ:56:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff,  }
+VQSUB/VQSUBQ:57:result_uint64x2 [] = { 0, 0,  }
+
+less than 64 bits saturation:
+VQSUB/VQSUBQ:58:vqsub_s8 Neon overflow 1
+VQSUB/VQSUBQ:59:vqsub_s16 Neon overflow 1
+VQSUB/VQSUBQ:60:vqsub_s32 Neon overflow 1
+VQSUB/VQSUBQ:61:vqsubq_s8 Neon overflow 1
+VQSUB/VQSUBQ:62:vqsubq_s16 Neon overflow 1
+VQSUB/VQSUBQ:63:vqsubq_s32 Neon overflow 1
+VQSUB/VQSUBQ:64:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80,  }
+VQSUB/VQSUBQ:65:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000,  }
+VQSUB/VQSUBQ:66:result_int32x2 [] = { 80000000, 80000000,  }
+VQSUB/VQSUBQ:67:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80,  }
+VQSUB/VQSUBQ:68:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000,  }
+VQSUB/VQSUBQ:69:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+
+VQSUB/VQSUBQ less than 64 bits saturation overflow output:
+VQSUB/VQSUBQ:70:vqsub_u8 Neon overflow 1
+VQSUB/VQSUBQ:71:vqsub_u16 Neon overflow 1
+VQSUB/VQSUBQ:72:vqsub_u32 Neon overflow 1
+VQSUB/VQSUBQ:73:vqsubq_u8 Neon overflow 1
+VQSUB/VQSUBQ:74:vqsubq_u16 Neon overflow 1
+VQSUB/VQSUBQ:75:vqsubq_u32 Neon overflow 1
+VQSUB/VQSUBQ:76:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSUB/VQSUBQ:77:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VQSUB/VQSUBQ:78:result_uint32x2 [] = { 0, 0,  }
+VQSUB/VQSUBQ:79:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSUB/VQSUBQ:80:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSUB/VQSUBQ:81:result_uint32x4 [] = { 0, 0, 0, 0,  }
+
+VQDMULH overflow output:
+VQDMULH:0:vqdmulh_s16 Neon overflow 0
+VQDMULH:1:vqdmulh_s32 Neon overflow 0
+VQDMULH:2:vqdmulhq_s16 Neon overflow 0
+VQDMULH:3:vqdmulhq_s32 Neon overflow 0
+
+VQDMULH output:
+VQDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH:5:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQDMULH:6:result_int32x2 [] = { ffffffff, ffffffff,  }
+VQDMULH:7:result_int64x1 [] = { 3333333333333333,  }
+VQDMULH:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMULH:10:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMULH:11:result_uint64x1 [] = { 3333333333333333,  }
+VQDMULH:12:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMULH:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH:14:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQDMULH:15:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQDMULH:16:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMULH:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMULH:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMULH:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMULH:21:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMULH overflow output:
+VQDMULH:22:vqdmulh_s16 Neon overflow 1
+VQDMULH:23:vqdmulh_s32 Neon overflow 1
+VQDMULH:24:vqdmulhq_s16 Neon overflow 1
+VQDMULH:25:vqdmulhq_s32 Neon overflow 1
+
+VQDMULH output:
+VQDMULH:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH:27:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQDMULH:28:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQDMULH:29:result_int64x1 [] = { 3333333333333333,  }
+VQDMULH:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMULH:32:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMULH:33:result_uint64x1 [] = { 3333333333333333,  }
+VQDMULH:34:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMULH:35:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH:36:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQDMULH:37:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQDMULH:38:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMULH:39:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH:40:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMULH:41:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMULH:42:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMULH:43:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMULH_LANE overflow output:
+VQDMULH_LANE:0:vqdmulh_lane_s16 Neon overflow 0
+VQDMULH_LANE:1:vqdmulh_lane_s32 Neon overflow 0
+VQDMULH_LANE:2:vqdmulhq_lane_s16 Neon overflow 0
+VQDMULH_LANE:3:vqdmulhq_lane_s32 Neon overflow 0
+
+VQDMULH_LANE output:
+VQDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH_LANE:5:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQDMULH_LANE:6:result_int32x2 [] = { ffffffff, ffffffff,  }
+VQDMULH_LANE:7:result_int64x1 [] = { 3333333333333333,  }
+VQDMULH_LANE:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH_LANE:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMULH_LANE:10:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMULH_LANE:11:result_uint64x1 [] = { 3333333333333333,  }
+VQDMULH_LANE:12:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMULH_LANE:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH_LANE:14:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQDMULH_LANE:15:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQDMULH_LANE:16:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMULH_LANE:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH_LANE:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMULH_LANE:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMULH_LANE:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMULH_LANE:21:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMULH_LANE (check mul overflow) overflow output:
+VQDMULH_LANE:22:vqdmulh_lane_s16 Neon overflow 1
+VQDMULH_LANE:23:vqdmulh_lane_s32 Neon overflow 1
+VQDMULH_LANE:24:vqdmulhq_lane_s16 Neon overflow 1
+VQDMULH_LANE:25:vqdmulhq_lane_s32 Neon overflow 1
+
+VQDMULH_LANE (check mul overflow) output:
+VQDMULH_LANE:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH_LANE:27:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQDMULH_LANE:28:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQDMULH_LANE:29:result_int64x1 [] = { 3333333333333333,  }
+VQDMULH_LANE:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH_LANE:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMULH_LANE:32:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMULH_LANE:33:result_uint64x1 [] = { 3333333333333333,  }
+VQDMULH_LANE:34:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMULH_LANE:35:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH_LANE:36:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQDMULH_LANE:37:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQDMULH_LANE:38:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMULH_LANE:39:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH_LANE:40:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMULH_LANE:41:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMULH_LANE:42:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMULH_LANE:43:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMULH_N overflow output:
+VQDMULH_N:0:vqdmulh_n_s16 Neon overflow 0
+VQDMULH_N:1:vqdmulh_n_s32 Neon overflow 0
+VQDMULH_N:2:vqdmulhq_n_s16 Neon overflow 0
+VQDMULH_N:3:vqdmulhq_n_s32 Neon overflow 0
+
+VQDMULH_N output:
+VQDMULH_N:4:result_int16x4 [] = { 19, 19, 19, 19,  }
+VQDMULH_N:5:result_int32x2 [] = { 4, 4,  }
+VQDMULH_N:6:result_int16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10,  }
+VQDMULH_N:7:result_int32x4 [] = { a, a, a, a,  }
+
+VQDMULH_N (check mul overflow) overflow output:
+VQDMULH_N:8:vqdmulh_n_s16 Neon overflow 1
+VQDMULH_N:9:vqdmulh_n_s32 Neon overflow 1
+VQDMULH_N:10:vqdmulhq_n_s16 Neon overflow 1
+VQDMULH_N:11:vqdmulhq_n_s32 Neon overflow 1
+
+VQDMULH_N (check mul overflow) output:
+VQDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQDMULH_N:15:result_int64x1 [] = { 3333333333333333,  }
+VQDMULH_N:16:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH_N:17:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMULH_N:18:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMULH_N:19:result_uint64x1 [] = { 3333333333333333,  }
+VQDMULH_N:20:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMULH_N:21:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH_N:22:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQDMULH_N:23:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQDMULH_N:24:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMULH_N:25:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULH_N:26:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMULH_N:27:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMULH_N:28:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMULH_N:29:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMULL overflow output:
+VQDMULL:0:vqdmull_s16 Neon overflow 0
+VQDMULL:1:vqdmull_s32 Neon overflow 0
+
+VQDMULL output:
+VQDMULL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMULL:4:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMULL:5:result_int64x1 [] = { 3333333333333333,  }
+VQDMULL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMULL:8:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMULL:9:result_uint64x1 [] = { 3333333333333333,  }
+VQDMULL:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMULL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULL:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMULL:13:result_int32x4 [] = { 200, 1c2, 188, 152,  }
+VQDMULL:14:result_int64x2 [] = { 200, 1c2,  }
+VQDMULL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULL:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMULL:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMULL:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMULL:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMULL (check mul overflow) overflow output:
+VQDMULL:20:vqdmull_s16 Neon overflow 1
+VQDMULL:21:vqdmull_s32 Neon overflow 1
+
+VQDMULL (check mul overflow) output:
+VQDMULL:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULL:23:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMULL:24:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMULL:25:result_int64x1 [] = { 3333333333333333,  }
+VQDMULL:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULL:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMULL:28:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMULL:29:result_uint64x1 [] = { 3333333333333333,  }
+VQDMULL:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMULL:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULL:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMULL:33:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQDMULL:34:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff,  }
+VQDMULL:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMULL:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMULL:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMULL:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMULL:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLAL overflow output:
+VQDMLAL:0:vqdmlal_s16 Neon overflow 0
+VQDMLAL:1:vqdmlal_s32 Neon overflow 0
+
+VQDMLAL output:
+VQDMLAL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL:4:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLAL:5:result_int64x1 [] = { 3333333333333333,  }
+VQDMLAL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL:8:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLAL:9:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLAL:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLAL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL:13:result_int32x4 [] = { 7c1e, 7c1f, 7c20, 7c21,  }
+VQDMLAL:14:result_int64x2 [] = { 7c1e, 7c1f,  }
+VQDMLAL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLAL:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLAL:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLAL (check mul overflow) overflow output:
+VQDMLAL:20:vqdmlal_s16 Neon overflow 1
+VQDMLAL:21:vqdmlal_s32 Neon overflow 1
+
+VQDMLAL (check mul overflow) output:
+VQDMLAL:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL:23:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL:24:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLAL:25:result_int64x1 [] = { 3333333333333333,  }
+VQDMLAL:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLAL:28:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLAL:29:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLAL:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLAL:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL:33:result_int32x4 [] = { 7fffffef, 7ffffff0, 7ffffff1, 7ffffff2,  }
+VQDMLAL:34:result_int64x2 [] = { 7fffffffffffffef, 7ffffffffffffff0,  }
+VQDMLAL:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLAL:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLAL:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLAL:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLAL:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLSL overflow output:
+VQDMLSL:0:vqdmlsl_s16 Neon overflow 0
+VQDMLSL:1:vqdmlsl_s32 Neon overflow 0
+
+VQDMLSL output:
+VQDMLSL:2:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL:3:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL:4:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLSL:5:result_int64x1 [] = { 3333333333333333,  }
+VQDMLSL:6:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL:7:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL:8:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLSL:9:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLSL:10:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLSL:11:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL:12:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL:13:result_int32x4 [] = { ffff83c2, ffff83c3, ffff83c4, ffff83c5,  }
+VQDMLSL:14:result_int64x2 [] = { ffffffffffff83c2, ffffffffffff83c3,  }
+VQDMLSL:15:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL:16:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL:17:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLSL:18:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLSL:19:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQDMLSL (check mul overflow) overflow output:
+VQDMLSL:20:vqdmlsl_s16 Neon overflow 1
+VQDMLSL:21:vqdmlsl_s32 Neon overflow 1
+
+VQDMLSL (check mul overflow) output:
+VQDMLSL:22:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL:23:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL:24:result_int32x2 [] = { 33333333, 33333333,  }
+VQDMLSL:25:result_int64x1 [] = { 3333333333333333,  }
+VQDMLSL:26:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL:27:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQDMLSL:28:result_uint32x2 [] = { 33333333, 33333333,  }
+VQDMLSL:29:result_uint64x1 [] = { 3333333333333333,  }
+VQDMLSL:30:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQDMLSL:31:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL:32:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL:33:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+VQDMLSL:34:result_int64x2 [] = { 8000000000000000, 8000000000000000,  }
+VQDMLSL:35:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQDMLSL:36:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQDMLSL:37:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQDMLSL:38:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQDMLSL:39:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VCEQ/VCEQQ output:
+VCEQ/VCEQQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, ff, 0,  }
+VCEQ/VCEQQ:1:result_uint16x4 [] = { 0, 0, ffff, 0,  }
+VCEQ/VCEQQ:2:result_uint32x2 [] = { ffffffff, 0,  }
+VCEQ/VCEQQ:3:result_uint8x8 [] = { 0, 0, 0, ff, 0, 0, 0, 0,  }
+VCEQ/VCEQQ:4:result_uint16x4 [] = { 0, 0, ffff, 0,  }
+VCEQ/VCEQQ:5:result_uint32x2 [] = { 0, ffffffff,  }
+VCEQ/VCEQQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, 0, 0, 0,  }
+VCEQ/VCEQQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, 0,  }
+VCEQ/VCEQQ:8:result_uint32x4 [] = { 0, 0, ffffffff, 0,  }
+VCEQ/VCEQQ:9:result_uint8x16 [] = { 0, 0, 0, 0, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VCEQ/VCEQQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, 0,  }
+VCEQ/VCEQQ:11:result_uint32x4 [] = { 0, 0, ffffffff, 0,  }
+VCEQ/VCEQQ:12:result_uint32x2 [] = { 0, ffffffff,  }
+VCEQ/VCEQQ:13:result_uint32x4 [] = { 0, 0, ffffffff, 0,  }
+VCEQ/VCEQQ:14:result_uint32x2 [] = { ffffffff, 0,  }
+VCEQ/VCEQQ:15:result_uint32x2 [] = { 0, ffffffff,  }
+VCEQ/VCEQQ:16:result_uint32x2 [] = { ffffffff, 0,  }
+
+VCGE/VCGEQ output:
+VCGE/VCGEQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, ff, ff,  }
+VCGE/VCGEQ:1:result_uint16x4 [] = { 0, 0, ffff, ffff,  }
+VCGE/VCGEQ:2:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VCGE/VCGEQ:3:result_uint8x8 [] = { 0, 0, 0, ff, ff, ff, ff, ff,  }
+VCGE/VCGEQ:4:result_uint16x4 [] = { 0, 0, ffff, ffff,  }
+VCGE/VCGEQ:5:result_uint32x2 [] = { 0, ffffffff,  }
+VCGE/VCGEQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, ff, ff, ff,  }
+VCGE/VCGEQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, ffff,  }
+VCGE/VCGEQ:8:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff,  }
+VCGE/VCGEQ:9:result_uint8x16 [] = { 0, 0, 0, 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VCGE/VCGEQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, ffff, ffff,  }
+VCGE/VCGEQ:11:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff,  }
+VCGE/VCGEQ:12:result_uint32x2 [] = { 0, ffffffff,  }
+VCGE/VCGEQ:13:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff,  }
+VCGE/VCGEQ:14:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VCGE/VCGEQ:15:result_uint32x2 [] = { 0, ffffffff,  }
+VCGE/VCGEQ:16:result_uint32x2 [] = { ffffffff, ffffffff,  }
+
+VCLE/VCLEQ output:
+VCLE/VCLEQ:0:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, 0,  }
+VCLE/VCLEQ:1:result_uint16x4 [] = { ffff, ffff, ffff, 0,  }
+VCLE/VCLEQ:2:result_uint32x2 [] = { ffffffff, 0,  }
+VCLE/VCLEQ:3:result_uint8x8 [] = { ff, ff, ff, ff, 0, 0, 0, 0,  }
+VCLE/VCLEQ:4:result_uint16x4 [] = { ffff, ffff, ffff, 0,  }
+VCLE/VCLEQ:5:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VCLE/VCLEQ:6:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, 0, 0, 0,  }
+VCLE/VCLEQ:7:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, 0,  }
+VCLE/VCLEQ:8:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0,  }
+VCLE/VCLEQ:9:result_uint8x16 [] = { ff, ff, ff, ff, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VCLE/VCLEQ:10:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, 0,  }
+VCLE/VCLEQ:11:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0,  }
+VCLE/VCLEQ:12:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VCLE/VCLEQ:13:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0,  }
+VCLE/VCLEQ:14:result_uint32x2 [] = { ffffffff, 0,  }
+VCLE/VCLEQ:15:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VCLE/VCLEQ:16:result_uint32x2 [] = { ffffffff, 0,  }
+
+VCGT/VCGTQ output:
+VCGT/VCGTQ:0:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, ff,  }
+VCGT/VCGTQ:1:result_uint16x4 [] = { 0, 0, 0, ffff,  }
+VCGT/VCGTQ:2:result_uint32x2 [] = { 0, ffffffff,  }
+VCGT/VCGTQ:3:result_uint8x8 [] = { 0, 0, 0, 0, ff, ff, ff, ff,  }
+VCGT/VCGTQ:4:result_uint16x4 [] = { 0, 0, 0, ffff,  }
+VCGT/VCGTQ:5:result_uint32x2 [] = { 0, 0,  }
+VCGT/VCGTQ:6:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ff, ff, ff,  }
+VCGT/VCGTQ:7:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, ffff,  }
+VCGT/VCGTQ:8:result_uint32x4 [] = { 0, 0, 0, ffffffff,  }
+VCGT/VCGTQ:9:result_uint8x16 [] = { 0, 0, 0, 0, 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VCGT/VCGTQ:10:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, ffff,  }
+VCGT/VCGTQ:11:result_uint32x4 [] = { 0, 0, 0, ffffffff,  }
+VCGT/VCGTQ:12:result_uint32x2 [] = { 0, 0,  }
+VCGT/VCGTQ:13:result_uint32x4 [] = { 0, 0, 0, ffffffff,  }
+VCGT/VCGTQ:14:result_uint32x2 [] = { 0, ffffffff,  }
+VCGT/VCGTQ:15:result_uint32x2 [] = { 0, 0,  }
+VCGT/VCGTQ:16:result_uint32x2 [] = { 0, ffffffff,  }
+
+VCLT/VCLTQ output:
+VCLT/VCLTQ:0:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, 0, 0,  }
+VCLT/VCLTQ:1:result_uint16x4 [] = { ffff, ffff, 0, 0,  }
+VCLT/VCLTQ:2:result_uint32x2 [] = { 0, 0,  }
+VCLT/VCLTQ:3:result_uint8x8 [] = { ff, ff, ff, 0, 0, 0, 0, 0,  }
+VCLT/VCLTQ:4:result_uint16x4 [] = { ffff, ffff, 0, 0,  }
+VCLT/VCLTQ:5:result_uint32x2 [] = { ffffffff, 0,  }
+VCLT/VCLTQ:6:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, 0, 0, 0, 0,  }
+VCLT/VCLTQ:7:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, 0, 0,  }
+VCLT/VCLTQ:8:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0,  }
+VCLT/VCLTQ:9:result_uint8x16 [] = { ff, ff, ff, ff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VCLT/VCLTQ:10:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, 0, 0,  }
+VCLT/VCLTQ:11:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0,  }
+VCLT/VCLTQ:12:result_uint32x2 [] = { ffffffff, 0,  }
+VCLT/VCLTQ:13:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0,  }
+VCLT/VCLTQ:14:result_uint32x2 [] = { 0, 0,  }
+VCLT/VCLTQ:15:result_uint32x2 [] = { ffffffff, 0,  }
+VCLT/VCLTQ:16:result_uint32x2 [] = { 0, 0,  }
+
+VBSL/VBSLQ output:
+VBSL/VBSLQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6,  }
+VBSL/VBSLQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2,  }
+VBSL/VBSLQ:2:result_int32x2 [] = { fffffff0, fffffff0,  }
+VBSL/VBSLQ:3:result_int64x1 [] = { fffffffffffffffd,  }
+VBSL/VBSLQ:4:result_uint8x8 [] = { f3, f3, f3, f3, f7, f7, f7, f7,  }
+VBSL/VBSLQ:5:result_uint16x4 [] = { fff0, fff0, fff2, fff2,  }
+VBSL/VBSLQ:6:result_uint32x2 [] = { fffffff0, fffffff0,  }
+VBSL/VBSLQ:7:result_uint64x1 [] = { fffffff1,  }
+VBSL/VBSLQ:8:result_float32x2 [] = { c1800004 -0x1.000008p+4 -16, c1700004 -0x1.e00008p+3 -15,  }
+VBSL/VBSLQ:9:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6, fffffff2, fffffff2, fffffff2, fffffff2, fffffff6, fffffff6, fffffff6, fffffff6,  }
+VBSL/VBSLQ:10:result_int16x8 [] = { fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6,  }
+VBSL/VBSLQ:11:result_int32x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2,  }
+VBSL/VBSLQ:12:result_int64x2 [] = { fffffffffffffffd, fffffffffffffffd,  }
+VBSL/VBSLQ:13:result_uint8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, f3, f3, f3, f3, f7, f7, f7, f7,  }
+VBSL/VBSLQ:14:result_uint16x8 [] = { fff0, fff0, fff2, fff2, fff4, fff4, fff6, fff6,  }
+VBSL/VBSLQ:15:result_uint32x4 [] = { fffffff0, fffffff0, fffffff2, fffffff2,  }
+VBSL/VBSLQ:16:result_uint64x2 [] = { fffffff1, fffffff1,  }
+VBSL/VBSLQ:17:result_float32x4 [] = { c1800001 -0x1.000002p+4 -16, c1700001 -0x1.e00002p+3 -15, c1600001 -0x1.c00002p+3 -14, c1500001 -0x1.a00002p+3 -13,  }
+
+VSHL/VSHLQ output:
+VSHL/VSHLQ:0:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee,  }
+VSHL/VSHLQ:1:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98,  }
+VSHL/VSHLQ:2:result_int32x2 [] = { fffff000, fffff100,  }
+VSHL/VSHLQ:3:result_int64x1 [] = { ffffffffffffff80,  }
+VSHL/VSHLQ:4:result_uint8x8 [] = { e0, e2, e4, e6, e8, ea, ec, ee,  }
+VSHL/VSHLQ:5:result_uint16x4 [] = { ff80, ff88, ff90, ff98,  }
+VSHL/VSHLQ:6:result_uint32x2 [] = { fffff000, fffff100,  }
+VSHL/VSHLQ:7:result_uint64x1 [] = { ffffffffffffff80,  }
+VSHL/VSHLQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSHL/VSHLQ:9:result_int8x16 [] = { 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0,  }
+VSHL/VSHLQ:10:result_int16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000,  }
+VSHL/VSHLQ:11:result_int32x4 [] = { 0, 40000000, 80000000, c0000000,  }
+VSHL/VSHLQ:12:result_int64x2 [] = { 0, 8000000000000000,  }
+VSHL/VSHLQ:13:result_uint8x16 [] = { 0, 20, 40, 60, 80, a0, c0, e0, 0, 20, 40, 60, 80, a0, c0, e0,  }
+VSHL/VSHLQ:14:result_uint16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000,  }
+VSHL/VSHLQ:15:result_uint32x4 [] = { 0, 40000000, 80000000, c0000000,  }
+VSHL/VSHLQ:16:result_uint64x2 [] = { 0, 8000000000000000,  }
+VSHL/VSHLQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VSHL/VSHLQ (large shift amount) output:
+VSHL/VSHLQ:18:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VSHL/VSHLQ:19:result_int16x4 [] = { 0, 0, 0, 0,  }
+VSHL/VSHLQ:20:result_int32x2 [] = { 0, 0,  }
+VSHL/VSHLQ:21:result_int64x1 [] = { 0,  }
+VSHL/VSHLQ:22:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VSHL/VSHLQ:23:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VSHL/VSHLQ:24:result_uint32x2 [] = { 0, 0,  }
+VSHL/VSHLQ:25:result_uint64x1 [] = { 0,  }
+VSHL/VSHLQ:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSHL/VSHLQ:27:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VSHL/VSHLQ:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VSHL/VSHLQ:29:result_int32x4 [] = { 0, 0, 0, 0,  }
+VSHL/VSHLQ:30:result_int64x2 [] = { 0, 0,  }
+VSHL/VSHLQ:31:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VSHL/VSHLQ:32:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VSHL/VSHLQ:33:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VSHL/VSHLQ:34:result_uint64x2 [] = { 0, 0,  }
+VSHL/VSHLQ:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VSHL/VSHLQ (negative shift amount) output:
+VSHL/VSHLQ:36:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb,  }
+VSHL/VSHLQ:37:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9,  }
+VSHL/VSHLQ:38:result_int32x2 [] = { fffffffc, fffffffc,  }
+VSHL/VSHLQ:39:result_int64x1 [] = { ffffffffffffffff,  }
+VSHL/VSHLQ:40:result_uint8x8 [] = { 78, 78, 79, 79, 7a, 7a, 7b, 7b,  }
+VSHL/VSHLQ:41:result_uint16x4 [] = { 7ff8, 7ff8, 7ff9, 7ff9,  }
+VSHL/VSHLQ:42:result_uint32x2 [] = { 3ffffffc, 3ffffffc,  }
+VSHL/VSHLQ:43:result_uint64x1 [] = { fffffffffffffff,  }
+VSHL/VSHLQ:44:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSHL/VSHLQ:45:result_int8x16 [] = { fffffffc, fffffffc, fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe, fffffffe, fffffffe, ffffffff, ffffffff, ffffffff, ffffffff,  }
+VSHL/VSHLQ:46:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff,  }
+VSHL/VSHLQ:47:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe,  }
+VSHL/VSHLQ:48:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+VSHL/VSHLQ:49:result_uint8x16 [] = { 3c, 3c, 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f,  }
+VSHL/VSHLQ:50:result_uint16x8 [] = { 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, 7ff, 7ff,  }
+VSHL/VSHLQ:51:result_uint32x4 [] = { 1ffffffe, 1ffffffe, 1ffffffe, 1ffffffe,  }
+VSHL/VSHLQ:52:result_uint64x2 [] = { 7ffffffffffffff, 7ffffffffffffff,  }
+VSHL/VSHLQ:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VSHL_N output:
+VSHL_N:0:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee,  }
+VSHL_N:1:result_int16x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6,  }
+VSHL_N:2:result_int32x2 [] = { ffffff80, ffffff88,  }
+VSHL_N:3:result_int64x1 [] = { ffffffffffffffc0,  }
+VSHL_N:4:result_uint8x8 [] = { c0, c4, c8, cc, d0, d4, d8, dc,  }
+VSHL_N:5:result_uint16x4 [] = { ff00, ff10, ff20, ff30,  }
+VSHL_N:6:result_uint32x2 [] = { ffffff80, ffffff88,  }
+VSHL_N:7:result_uint64x1 [] = { ffffffffffffffe0,  }
+VSHL_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSHL_N:9:result_int8x16 [] = { 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0, 0, 20, 40, 60, ffffff80, ffffffa0, ffffffc0, ffffffe0,  }
+VSHL_N:10:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee,  }
+VSHL_N:11:result_int32x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc,  }
+VSHL_N:12:result_int64x2 [] = { ffffffffffffffc0, ffffffffffffffc4,  }
+VSHL_N:13:result_uint8x16 [] = { c0, c4, c8, cc, d0, d4, d8, dc, e0, e4, e8, ec, f0, f4, f8, fc,  }
+VSHL_N:14:result_uint16x8 [] = { ff80, ff88, ff90, ff98, ffa0, ffa8, ffb0, ffb8,  }
+VSHL_N:15:result_uint32x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc,  }
+VSHL_N:16:result_uint64x2 [] = { ffffffffffffffe0, ffffffffffffffe2,  }
+VSHL_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHL/VQSHLQ (with input = 0) overflow output:
+VQSHL/VQSHLQ:0:vqshl_s8 Neon overflow 0
+VQSHL/VQSHLQ:1:vqshl_s16 Neon overflow 0
+VQSHL/VQSHLQ:2:vqshl_s32 Neon overflow 0
+VQSHL/VQSHLQ:3:vqshl_s64 Neon overflow 0
+VQSHL/VQSHLQ:4:vqshl_u8 Neon overflow 0
+VQSHL/VQSHLQ:5:vqshl_u16 Neon overflow 0
+VQSHL/VQSHLQ:6:vqshl_u32 Neon overflow 0
+VQSHL/VQSHLQ:7:vqshl_u64 Neon overflow 0
+VQSHL/VQSHLQ:8:vqshlq_s8 Neon overflow 0
+VQSHL/VQSHLQ:9:vqshlq_s16 Neon overflow 0
+VQSHL/VQSHLQ:10:vqshlq_s32 Neon overflow 0
+VQSHL/VQSHLQ:11:vqshlq_s64 Neon overflow 0
+VQSHL/VQSHLQ:12:vqshlq_u8 Neon overflow 0
+VQSHL/VQSHLQ:13:vqshlq_u16 Neon overflow 0
+VQSHL/VQSHLQ:14:vqshlq_u32 Neon overflow 0
+VQSHL/VQSHLQ:15:vqshlq_u64 Neon overflow 0
+
+VQSHL/VQSHLQ (with input = 0) output:
+VQSHL/VQSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:17:result_int16x4 [] = { 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:18:result_int32x2 [] = { 0, 0,  }
+VQSHL/VQSHLQ:19:result_int64x1 [] = { 0,  }
+VQSHL/VQSHLQ:20:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:21:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:22:result_uint32x2 [] = { 0, 0,  }
+VQSHL/VQSHLQ:23:result_uint64x1 [] = { 0,  }
+VQSHL/VQSHLQ:24:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHL/VQSHLQ:25:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:26:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:27:result_int32x4 [] = { 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:28:result_int64x2 [] = { 0, 0,  }
+VQSHL/VQSHLQ:29:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:30:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:31:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:32:result_uint64x2 [] = { 0, 0,  }
+VQSHL/VQSHLQ:33:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHL/VQSHLQ (input 0 and negative shift amount) overflow output:
+VQSHL/VQSHLQ:34:vqshl_s8 Neon overflow 0
+VQSHL/VQSHLQ:35:vqshl_s16 Neon overflow 0
+VQSHL/VQSHLQ:36:vqshl_s32 Neon overflow 0
+VQSHL/VQSHLQ:37:vqshl_s64 Neon overflow 0
+VQSHL/VQSHLQ:38:vqshl_u8 Neon overflow 0
+VQSHL/VQSHLQ:39:vqshl_u16 Neon overflow 0
+VQSHL/VQSHLQ:40:vqshl_u32 Neon overflow 0
+VQSHL/VQSHLQ:41:vqshl_u64 Neon overflow 0
+VQSHL/VQSHLQ:42:vqshlq_s8 Neon overflow 0
+VQSHL/VQSHLQ:43:vqshlq_s16 Neon overflow 0
+VQSHL/VQSHLQ:44:vqshlq_s32 Neon overflow 0
+VQSHL/VQSHLQ:45:vqshlq_s64 Neon overflow 0
+VQSHL/VQSHLQ:46:vqshlq_u8 Neon overflow 0
+VQSHL/VQSHLQ:47:vqshlq_u16 Neon overflow 0
+VQSHL/VQSHLQ:48:vqshlq_u32 Neon overflow 0
+VQSHL/VQSHLQ:49:vqshlq_u64 Neon overflow 0
+
+VQSHL/VQSHLQ (input 0 and negative shift amount) output:
+VQSHL/VQSHLQ:50:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:51:result_int16x4 [] = { 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:52:result_int32x2 [] = { 0, 0,  }
+VQSHL/VQSHLQ:53:result_int64x1 [] = { 0,  }
+VQSHL/VQSHLQ:54:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:55:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:56:result_uint32x2 [] = { 0, 0,  }
+VQSHL/VQSHLQ:57:result_uint64x1 [] = { 0,  }
+VQSHL/VQSHLQ:58:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHL/VQSHLQ:59:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:60:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:61:result_int32x4 [] = { 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:62:result_int64x2 [] = { 0, 0,  }
+VQSHL/VQSHLQ:63:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:64:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:65:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VQSHL/VQSHLQ:66:result_uint64x2 [] = { 0, 0,  }
+VQSHL/VQSHLQ:67:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHL/VQSHLQ overflow output:
+VQSHL/VQSHLQ:68:vqshl_s8 Neon overflow 0
+VQSHL/VQSHLQ:69:vqshl_s16 Neon overflow 0
+VQSHL/VQSHLQ:70:vqshl_s32 Neon overflow 0
+VQSHL/VQSHLQ:71:vqshl_s64 Neon overflow 0
+VQSHL/VQSHLQ:72:vqshl_u8 Neon overflow 1
+VQSHL/VQSHLQ:73:vqshl_u16 Neon overflow 1
+VQSHL/VQSHLQ:74:vqshl_u32 Neon overflow 1
+VQSHL/VQSHLQ:75:vqshl_u64 Neon overflow 0
+VQSHL/VQSHLQ:76:vqshlq_s8 Neon overflow 1
+VQSHL/VQSHLQ:77:vqshlq_s16 Neon overflow 1
+VQSHL/VQSHLQ:78:vqshlq_s32 Neon overflow 1
+VQSHL/VQSHLQ:79:vqshlq_s64 Neon overflow 1
+VQSHL/VQSHLQ:80:vqshlq_u8 Neon overflow 1
+VQSHL/VQSHLQ:81:vqshlq_u16 Neon overflow 1
+VQSHL/VQSHLQ:82:vqshlq_u32 Neon overflow 1
+VQSHL/VQSHLQ:83:vqshlq_u64 Neon overflow 1
+
+VQSHL/VQSHLQ output:
+VQSHL/VQSHLQ:84:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee,  }
+VQSHL/VQSHLQ:85:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98,  }
+VQSHL/VQSHLQ:86:result_int32x2 [] = { fffff000, fffff100,  }
+VQSHL/VQSHLQ:87:result_int64x1 [] = { fffffffffffffffe,  }
+VQSHL/VQSHLQ:88:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHL/VQSHLQ:89:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQSHL/VQSHLQ:90:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQSHL/VQSHLQ:91:result_uint64x1 [] = { 1ffffffffffffffe,  }
+VQSHL/VQSHLQ:92:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHL/VQSHLQ:93:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80,  }
+VQSHL/VQSHLQ:94:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000,  }
+VQSHL/VQSHLQ:95:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+VQSHL/VQSHLQ:96:result_int64x2 [] = { 8000000000000000, 8000000000000000,  }
+VQSHL/VQSHLQ:97:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHL/VQSHLQ:98:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff,  }
+VQSHL/VQSHLQ:99:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQSHL/VQSHLQ:100:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+VQSHL/VQSHLQ:101:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHL/VQSHLQ (negative shift amount) overflow output:
+VQSHL/VQSHLQ:102:vqshl_s8 Neon overflow 0
+VQSHL/VQSHLQ:103:vqshl_s16 Neon overflow 0
+VQSHL/VQSHLQ:104:vqshl_s32 Neon overflow 0
+VQSHL/VQSHLQ:105:vqshl_s64 Neon overflow 0
+VQSHL/VQSHLQ:106:vqshl_u8 Neon overflow 0
+VQSHL/VQSHLQ:107:vqshl_u16 Neon overflow 0
+VQSHL/VQSHLQ:108:vqshl_u32 Neon overflow 0
+VQSHL/VQSHLQ:109:vqshl_u64 Neon overflow 0
+VQSHL/VQSHLQ:110:vqshlq_s8 Neon overflow 0
+VQSHL/VQSHLQ:111:vqshlq_s16 Neon overflow 0
+VQSHL/VQSHLQ:112:vqshlq_s32 Neon overflow 0
+VQSHL/VQSHLQ:113:vqshlq_s64 Neon overflow 0
+VQSHL/VQSHLQ:114:vqshlq_u8 Neon overflow 0
+VQSHL/VQSHLQ:115:vqshlq_u16 Neon overflow 0
+VQSHL/VQSHLQ:116:vqshlq_u32 Neon overflow 0
+VQSHL/VQSHLQ:117:vqshlq_u64 Neon overflow 0
+
+VQSHL/VQSHLQ (negative shift amount) output:
+VQSHL/VQSHLQ:118:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb,  }
+VQSHL/VQSHLQ:119:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffc,  }
+VQSHL/VQSHLQ:120:result_int32x2 [] = { fffffffe, fffffffe,  }
+VQSHL/VQSHLQ:121:result_int64x1 [] = { ffffffffffffffff,  }
+VQSHL/VQSHLQ:122:result_uint8x8 [] = { 78, 78, 79, 79, 7a, 7a, 7b, 7b,  }
+VQSHL/VQSHLQ:123:result_uint16x4 [] = { 3ffc, 3ffc, 3ffc, 3ffc,  }
+VQSHL/VQSHLQ:124:result_uint32x2 [] = { 1ffffffe, 1ffffffe,  }
+VQSHL/VQSHLQ:125:result_uint64x1 [] = { fffffffffffffff,  }
+VQSHL/VQSHLQ:126:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHL/VQSHLQ:127:result_int8x16 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQSHL/VQSHLQ:128:result_int16x8 [] = { ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQSHL/VQSHLQ:129:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQSHL/VQSHLQ:130:result_int64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+VQSHL/VQSHLQ:131:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  }
+VQSHL/VQSHLQ:132:result_uint16x8 [] = { 1f, 1f, 1f, 1f, 1f, 1f, 1f, 1f,  }
+VQSHL/VQSHLQ:133:result_uint32x4 [] = { 7ffff, 7ffff, 7ffff, 7ffff,  }
+VQSHL/VQSHLQ:134:result_uint64x2 [] = { fffffffffff, fffffffffff,  }
+VQSHL/VQSHLQ:135:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHL/VQSHLQ (large shift amount, negative input) overflow output:
+VQSHL/VQSHLQ:136:vqshl_s8 Neon overflow 1
+VQSHL/VQSHLQ:137:vqshl_s16 Neon overflow 1
+VQSHL/VQSHLQ:138:vqshl_s32 Neon overflow 1
+VQSHL/VQSHLQ:139:vqshl_s64 Neon overflow 1
+VQSHL/VQSHLQ:140:vqshl_u8 Neon overflow 1
+VQSHL/VQSHLQ:141:vqshl_u16 Neon overflow 1
+VQSHL/VQSHLQ:142:vqshl_u32 Neon overflow 1
+VQSHL/VQSHLQ:143:vqshl_u64 Neon overflow 1
+VQSHL/VQSHLQ:144:vqshlq_s8 Neon overflow 1
+VQSHL/VQSHLQ:145:vqshlq_s16 Neon overflow 1
+VQSHL/VQSHLQ:146:vqshlq_s32 Neon overflow 1
+VQSHL/VQSHLQ:147:vqshlq_s64 Neon overflow 1
+VQSHL/VQSHLQ:148:vqshlq_u8 Neon overflow 1
+VQSHL/VQSHLQ:149:vqshlq_u16 Neon overflow 1
+VQSHL/VQSHLQ:150:vqshlq_u32 Neon overflow 1
+VQSHL/VQSHLQ:151:vqshlq_u64 Neon overflow 1
+
+VQSHL/VQSHLQ (large shift amount, negative input) output:
+VQSHL/VQSHLQ:152:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80,  }
+VQSHL/VQSHLQ:153:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000,  }
+VQSHL/VQSHLQ:154:result_int32x2 [] = { 80000000, 80000000,  }
+VQSHL/VQSHLQ:155:result_int64x1 [] = { 8000000000000000,  }
+VQSHL/VQSHLQ:156:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHL/VQSHLQ:157:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQSHL/VQSHLQ:158:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQSHL/VQSHLQ:159:result_uint64x1 [] = { ffffffffffffffff,  }
+VQSHL/VQSHLQ:160:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHL/VQSHLQ:161:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80,  }
+VQSHL/VQSHLQ:162:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000,  }
+VQSHL/VQSHLQ:163:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+VQSHL/VQSHLQ:164:result_int64x2 [] = { 8000000000000000, 8000000000000000,  }
+VQSHL/VQSHLQ:165:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHL/VQSHLQ:166:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff,  }
+VQSHL/VQSHLQ:167:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQSHL/VQSHLQ:168:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+VQSHL/VQSHLQ:169:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHL/VQSHLQ (check saturation/overflow) overflow output:
+VQSHL/VQSHLQ:170:vqshl_s8 Neon overflow 0
+VQSHL/VQSHLQ:171:vqshl_s16 Neon overflow 0
+VQSHL/VQSHLQ:172:vqshl_s32 Neon overflow 0
+VQSHL/VQSHLQ:173:vqshl_s64 Neon overflow 0
+VQSHL/VQSHLQ:174:vqshl_u8 Neon overflow 0
+VQSHL/VQSHLQ:175:vqshl_u16 Neon overflow 0
+VQSHL/VQSHLQ:176:vqshl_u32 Neon overflow 0
+VQSHL/VQSHLQ:177:vqshl_u64 Neon overflow 0
+VQSHL/VQSHLQ:178:vqshlq_s8 Neon overflow 0
+VQSHL/VQSHLQ:179:vqshlq_s16 Neon overflow 0
+VQSHL/VQSHLQ:180:vqshlq_s32 Neon overflow 0
+VQSHL/VQSHLQ:181:vqshlq_s64 Neon overflow 0
+VQSHL/VQSHLQ:182:vqshlq_u8 Neon overflow 0
+VQSHL/VQSHLQ:183:vqshlq_u16 Neon overflow 0
+VQSHL/VQSHLQ:184:vqshlq_u32 Neon overflow 0
+VQSHL/VQSHLQ:185:vqshlq_u64 Neon overflow 0
+
+VQSHL/VQSHLQ (check saturation/overflow) output:
+VQSHL/VQSHLQ:186:result_int8x8 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f,  }
+VQSHL/VQSHLQ:187:result_int16x4 [] = { 3fff, 3fff, 3fff, 3fff,  }
+VQSHL/VQSHLQ:188:result_int32x2 [] = { 3fffffff, 3fffffff,  }
+VQSHL/VQSHLQ:189:result_int64x1 [] = { 3fffffffffffffff,  }
+VQSHL/VQSHLQ:190:result_uint8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQSHL/VQSHLQ:191:result_uint16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQSHL/VQSHLQ:192:result_uint32x2 [] = { 7fffffff, 7fffffff,  }
+VQSHL/VQSHLQ:193:result_uint64x1 [] = { 7fffffffffffffff,  }
+VQSHL/VQSHLQ:194:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHL/VQSHLQ:195:result_int8x16 [] = { 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f, 3f,  }
+VQSHL/VQSHLQ:196:result_int16x8 [] = { 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, 3fff, 3fff,  }
+VQSHL/VQSHLQ:197:result_int32x4 [] = { 3fffffff, 3fffffff, 3fffffff, 3fffffff,  }
+VQSHL/VQSHLQ:198:result_int64x2 [] = { 3fffffffffffffff, 3fffffffffffffff,  }
+VQSHL/VQSHLQ:199:result_uint8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQSHL/VQSHLQ:200:result_uint16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQSHL/VQSHLQ:201:result_uint32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQSHL/VQSHLQ:202:result_uint64x2 [] = { 7fffffffffffffff, 7fffffffffffffff,  }
+VQSHL/VQSHLQ:203:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHL/VQSHLQ (large shift amount, positive input) overflow output:
+VQSHL/VQSHLQ:204:vqshl_s8 Neon overflow 1
+VQSHL/VQSHLQ:205:vqshl_s16 Neon overflow 1
+VQSHL/VQSHLQ:206:vqshl_s32 Neon overflow 1
+VQSHL/VQSHLQ:207:vqshl_s64 Neon overflow 1
+VQSHL/VQSHLQ:208:vqshl_u8 Neon overflow 1
+VQSHL/VQSHLQ:209:vqshl_u16 Neon overflow 1
+VQSHL/VQSHLQ:210:vqshl_u32 Neon overflow 1
+VQSHL/VQSHLQ:211:vqshl_u64 Neon overflow 1
+VQSHL/VQSHLQ:212:vqshlq_s8 Neon overflow 1
+VQSHL/VQSHLQ:213:vqshlq_s16 Neon overflow 1
+VQSHL/VQSHLQ:214:vqshlq_s32 Neon overflow 1
+VQSHL/VQSHLQ:215:vqshlq_s64 Neon overflow 1
+VQSHL/VQSHLQ:216:vqshlq_u8 Neon overflow 1
+VQSHL/VQSHLQ:217:vqshlq_u16 Neon overflow 1
+VQSHL/VQSHLQ:218:vqshlq_u32 Neon overflow 1
+VQSHL/VQSHLQ:219:vqshlq_u64 Neon overflow 1
+
+VQSHL/VQSHLQ (large shift amount, positive input) output:
+VQSHL/VQSHLQ:220:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQSHL/VQSHLQ:221:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQSHL/VQSHLQ:222:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQSHL/VQSHLQ:223:result_int64x1 [] = { 7fffffffffffffff,  }
+VQSHL/VQSHLQ:224:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHL/VQSHLQ:225:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQSHL/VQSHLQ:226:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQSHL/VQSHLQ:227:result_uint64x1 [] = { ffffffffffffffff,  }
+VQSHL/VQSHLQ:228:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHL/VQSHLQ:229:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQSHL/VQSHLQ:230:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQSHL/VQSHLQ:231:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQSHL/VQSHLQ:232:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff,  }
+VQSHL/VQSHLQ:233:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHL/VQSHLQ:234:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff,  }
+VQSHL/VQSHLQ:235:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQSHL/VQSHLQ:236:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+VQSHL/VQSHLQ:237:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHL/VQSHLQ (check saturation on 64 bits) overflow output:
+VQSHL/VQSHLQ:238:vqshl_s8 Neon overflow 1
+VQSHL/VQSHLQ:239:vqshl_s16 Neon overflow 1
+VQSHL/VQSHLQ:240:vqshl_s32 Neon overflow 1
+VQSHL/VQSHLQ:241:vqshl_s64 Neon overflow 1
+VQSHL/VQSHLQ:242:vqshl_u8 Neon overflow 1
+VQSHL/VQSHLQ:243:vqshl_u16 Neon overflow 1
+VQSHL/VQSHLQ:244:vqshl_u32 Neon overflow 1
+VQSHL/VQSHLQ:245:vqshl_u64 Neon overflow 1
+VQSHL/VQSHLQ:246:vqshlq_s8 Neon overflow 1
+VQSHL/VQSHLQ:247:vqshlq_s16 Neon overflow 1
+VQSHL/VQSHLQ:248:vqshlq_s32 Neon overflow 1
+VQSHL/VQSHLQ:249:vqshlq_s64 Neon overflow 1
+VQSHL/VQSHLQ:250:vqshlq_u8 Neon overflow 1
+VQSHL/VQSHLQ:251:vqshlq_u16 Neon overflow 1
+VQSHL/VQSHLQ:252:vqshlq_u32 Neon overflow 1
+VQSHL/VQSHLQ:253:vqshlq_u64 Neon overflow 1
+
+VQSHL/VQSHLQ (check saturation on 64 bits) output:
+VQSHL/VQSHLQ:254:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQSHL/VQSHLQ:255:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQSHL/VQSHLQ:256:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQSHL/VQSHLQ:257:result_int64x1 [] = { 8000000000000000,  }
+VQSHL/VQSHLQ:258:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHL/VQSHLQ:259:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQSHL/VQSHLQ:260:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQSHL/VQSHLQ:261:result_uint64x1 [] = { ffffffffffffffff,  }
+VQSHL/VQSHLQ:262:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHL/VQSHLQ:263:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQSHL/VQSHLQ:264:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQSHL/VQSHLQ:265:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQSHL/VQSHLQ:266:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff,  }
+VQSHL/VQSHLQ:267:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHL/VQSHLQ:268:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff,  }
+VQSHL/VQSHLQ:269:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQSHL/VQSHLQ:270:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+VQSHL/VQSHLQ:271:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHL_N/VQSHLQ_N overflow output:
+VQSHL_N/VQSHLQ_N:0:vqshl_n_s8 Neon overflow 0
+VQSHL_N/VQSHLQ_N:1:vqshl_n_s16 Neon overflow 0
+VQSHL_N/VQSHLQ_N:2:vqshl_n_s32 Neon overflow 0
+VQSHL_N/VQSHLQ_N:3:vqshl_n_s64 Neon overflow 0
+VQSHL_N/VQSHLQ_N:4:vqshl_n_u8 Neon overflow 1
+VQSHL_N/VQSHLQ_N:5:vqshl_n_u16 Neon overflow 1
+VQSHL_N/VQSHLQ_N:6:vqshl_n_u32 Neon overflow 1
+VQSHL_N/VQSHLQ_N:7:vqshl_n_u64 Neon overflow 1
+VQSHL_N/VQSHLQ_N:8:vqshlq_n_s8 Neon overflow 0
+VQSHL_N/VQSHLQ_N:9:vqshlq_n_s16 Neon overflow 0
+VQSHL_N/VQSHLQ_N:10:vqshlq_n_s32 Neon overflow 0
+VQSHL_N/VQSHLQ_N:11:vqshlq_n_s64 Neon overflow 0
+VQSHL_N/VQSHLQ_N:12:vqshlq_n_u8 Neon overflow 1
+VQSHL_N/VQSHLQ_N:13:vqshlq_n_u16 Neon overflow 1
+VQSHL_N/VQSHLQ_N:14:vqshlq_n_u32 Neon overflow 1
+VQSHL_N/VQSHLQ_N:15:vqshlq_n_u64 Neon overflow 1
+
+VQSHL_N/VQSHLQ_N output:
+VQSHL_N/VQSHLQ_N:16:result_int8x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc,  }
+VQSHL_N/VQSHLQ_N:17:result_int16x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6,  }
+VQSHL_N/VQSHLQ_N:18:result_int32x2 [] = { ffffffe0, ffffffe2,  }
+VQSHL_N/VQSHLQ_N:19:result_int64x1 [] = { ffffffffffffffc0,  }
+VQSHL_N/VQSHLQ_N:20:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHL_N/VQSHLQ_N:21:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQSHL_N/VQSHLQ_N:22:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQSHL_N/VQSHLQ_N:23:result_uint64x1 [] = { ffffffffffffffff,  }
+VQSHL_N/VQSHLQ_N:24:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHL_N/VQSHLQ_N:25:result_int8x16 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc, ffffffe0, ffffffe4, ffffffe8, ffffffec, fffffff0, fffffff4, fffffff8, fffffffc,  }
+VQSHL_N/VQSHLQ_N:26:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee,  }
+VQSHL_N/VQSHLQ_N:27:result_int32x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6,  }
+VQSHL_N/VQSHLQ_N:28:result_int64x2 [] = { ffffffffffffffc0, ffffffffffffffc4,  }
+VQSHL_N/VQSHLQ_N:29:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHL_N/VQSHLQ_N:30:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff,  }
+VQSHL_N/VQSHLQ_N:31:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQSHL_N/VQSHLQ_N:32:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+VQSHL_N/VQSHLQ_N:33:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHL_N/VQSHLQ_N (check saturation with large positive input) overflow output:
+VQSHL_N/VQSHLQ_N:34:vqshl_n_s8 Neon overflow 1
+VQSHL_N/VQSHLQ_N:35:vqshl_n_s16 Neon overflow 1
+VQSHL_N/VQSHLQ_N:36:vqshl_n_s32 Neon overflow 1
+VQSHL_N/VQSHLQ_N:37:vqshl_n_s64 Neon overflow 1
+VQSHL_N/VQSHLQ_N:38:vqshl_n_u8 Neon overflow 1
+VQSHL_N/VQSHLQ_N:39:vqshl_n_u16 Neon overflow 1
+VQSHL_N/VQSHLQ_N:40:vqshl_n_u32 Neon overflow 1
+VQSHL_N/VQSHLQ_N:41:vqshl_n_u64 Neon overflow 1
+VQSHL_N/VQSHLQ_N:42:vqshlq_n_s8 Neon overflow 1
+VQSHL_N/VQSHLQ_N:43:vqshlq_n_s16 Neon overflow 1
+VQSHL_N/VQSHLQ_N:44:vqshlq_n_s32 Neon overflow 1
+VQSHL_N/VQSHLQ_N:45:vqshlq_n_s64 Neon overflow 1
+VQSHL_N/VQSHLQ_N:46:vqshlq_n_u8 Neon overflow 1
+VQSHL_N/VQSHLQ_N:47:vqshlq_n_u16 Neon overflow 1
+VQSHL_N/VQSHLQ_N:48:vqshlq_n_u32 Neon overflow 1
+VQSHL_N/VQSHLQ_N:49:vqshlq_n_u64 Neon overflow 1
+
+VQSHL_N/VQSHLQ_N (check saturation with large positive input) output:
+VQSHL_N/VQSHLQ_N:50:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQSHL_N/VQSHLQ_N:51:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQSHL_N/VQSHLQ_N:52:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQSHL_N/VQSHLQ_N:53:result_int64x1 [] = { 7fffffffffffffff,  }
+VQSHL_N/VQSHLQ_N:54:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHL_N/VQSHLQ_N:55:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQSHL_N/VQSHLQ_N:56:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQSHL_N/VQSHLQ_N:57:result_uint64x1 [] = { ffffffffffffffff,  }
+VQSHL_N/VQSHLQ_N:58:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHL_N/VQSHLQ_N:59:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQSHL_N/VQSHLQ_N:60:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQSHL_N/VQSHLQ_N:61:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQSHL_N/VQSHLQ_N:62:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff,  }
+VQSHL_N/VQSHLQ_N:63:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHL_N/VQSHLQ_N:64:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff,  }
+VQSHL_N/VQSHLQ_N:65:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQSHL_N/VQSHLQ_N:66:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+VQSHL_N/VQSHLQ_N:67:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHL/VRSHLQ (with input = 0) output:
+VRSHL/VRSHLQ:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:1:result_int16x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:2:result_int32x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:3:result_int64x1 [] = { 0,  }
+VRSHL/VRSHLQ:4:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:5:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:6:result_uint32x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:7:result_uint64x1 [] = { 0,  }
+VRSHL/VRSHLQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHL/VRSHLQ:9:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:10:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:11:result_int32x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:12:result_int64x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:13:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:14:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:15:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:16:result_uint64x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHL/VRSHLQ (input 0 and negative shift amount) output:
+VRSHL/VRSHLQ:18:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:19:result_int16x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:20:result_int32x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:21:result_int64x1 [] = { 0,  }
+VRSHL/VRSHLQ:22:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:23:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:24:result_uint32x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:25:result_uint64x1 [] = { 0,  }
+VRSHL/VRSHLQ:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHL/VRSHLQ:27:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:29:result_int32x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:30:result_int64x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:31:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:32:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:33:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:34:result_uint64x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHL/VRSHLQ output:
+VRSHL/VRSHLQ:36:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee,  }
+VRSHL/VRSHLQ:37:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98,  }
+VRSHL/VRSHLQ:38:result_int32x2 [] = { fffff000, fffff100,  }
+VRSHL/VRSHLQ:39:result_int64x1 [] = { fffffffffffffffe,  }
+VRSHL/VRSHLQ:40:result_uint8x8 [] = { e0, e2, e4, e6, e8, ea, ec, ee,  }
+VRSHL/VRSHLQ:41:result_uint16x4 [] = { ff80, ff88, ff90, ff98,  }
+VRSHL/VRSHLQ:42:result_uint32x2 [] = { fffff000, fffff100,  }
+VRSHL/VRSHLQ:43:result_uint64x1 [] = { 1ffffffffffffffe,  }
+VRSHL/VRSHLQ:44:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHL/VRSHLQ:45:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:46:result_int16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000,  }
+VRSHL/VRSHLQ:47:result_int32x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:48:result_int64x2 [] = { 0, 8000000000000000,  }
+VRSHL/VRSHLQ:49:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:50:result_uint16x8 [] = { 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000,  }
+VRSHL/VRSHLQ:51:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:52:result_uint64x2 [] = { 0, 8000000000000000,  }
+VRSHL/VRSHLQ:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHL/VRSHLQ (negative shift amount) output:
+VRSHL/VRSHLQ:54:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc,  }
+VRSHL/VRSHLQ:55:result_int16x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd,  }
+VRSHL/VRSHLQ:56:result_int32x2 [] = { fffffffe, fffffffe,  }
+VRSHL/VRSHLQ:57:result_int64x1 [] = { ffffffffffffffff,  }
+VRSHL/VRSHLQ:58:result_uint8x8 [] = { 78, 79, 79, 7a, 7a, 7b, 7b, 7c,  }
+VRSHL/VRSHLQ:59:result_uint16x4 [] = { 3ffc, 3ffc, 3ffd, 3ffd,  }
+VRSHL/VRSHLQ:60:result_uint32x2 [] = { 1ffffffe, 1ffffffe,  }
+VRSHL/VRSHLQ:61:result_uint64x1 [] = { fffffffffffffff,  }
+VRSHL/VRSHLQ:62:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHL/VRSHLQ:63:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:64:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:65:result_int32x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:66:result_int64x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:67:result_uint8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  }
+VRSHL/VRSHLQ:68:result_uint16x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20,  }
+VRSHL/VRSHLQ:69:result_uint32x4 [] = { 80000, 80000, 80000, 80000,  }
+VRSHL/VRSHLQ:70:result_uint64x2 [] = { 100000000000, 100000000000,  }
+VRSHL/VRSHLQ:71:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHL/VRSHLQ (checking round_const overflow: shift by -1) output:
+VRSHL/VRSHLQ:72:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40,  }
+VRSHL/VRSHLQ:73:result_int16x4 [] = { 4000, 4000, 4000, 4000,  }
+VRSHL/VRSHLQ:74:result_int32x2 [] = { 40000000, 40000000,  }
+VRSHL/VRSHLQ:75:result_int64x1 [] = { 4000000000000000,  }
+VRSHL/VRSHLQ:76:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80,  }
+VRSHL/VRSHLQ:77:result_uint16x4 [] = { 8000, 8000, 8000, 8000,  }
+VRSHL/VRSHLQ:78:result_uint32x2 [] = { 80000000, 80000000,  }
+VRSHL/VRSHLQ:79:result_uint64x1 [] = { 8000000000000000,  }
+VRSHL/VRSHLQ:80:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHL/VRSHLQ:81:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,  }
+VRSHL/VRSHLQ:82:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000,  }
+VRSHL/VRSHLQ:83:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000,  }
+VRSHL/VRSHLQ:84:result_int64x2 [] = { 4000000000000000, 4000000000000000,  }
+VRSHL/VRSHLQ:85:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80,  }
+VRSHL/VRSHLQ:86:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000,  }
+VRSHL/VRSHLQ:87:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+VRSHL/VRSHLQ:88:result_uint64x2 [] = { 8000000000000000, 8000000000000000,  }
+VRSHL/VRSHLQ:89:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHL/VRSHLQ (checking round_const overflow: shift by -3) output:
+VRSHL/VRSHLQ:90:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10,  }
+VRSHL/VRSHLQ:91:result_int16x4 [] = { 1000, 1000, 1000, 1000,  }
+VRSHL/VRSHLQ:92:result_int32x2 [] = { 10000000, 10000000,  }
+VRSHL/VRSHLQ:93:result_int64x1 [] = { 1000000000000000,  }
+VRSHL/VRSHLQ:94:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20,  }
+VRSHL/VRSHLQ:95:result_uint16x4 [] = { 2000, 2000, 2000, 2000,  }
+VRSHL/VRSHLQ:96:result_uint32x2 [] = { 20000000, 20000000,  }
+VRSHL/VRSHLQ:97:result_uint64x1 [] = { 2000000000000000,  }
+VRSHL/VRSHLQ:98:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHL/VRSHLQ:99:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  }
+VRSHL/VRSHLQ:100:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,  }
+VRSHL/VRSHLQ:101:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000,  }
+VRSHL/VRSHLQ:102:result_int64x2 [] = { 1000000000000000, 1000000000000000,  }
+VRSHL/VRSHLQ:103:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,  }
+VRSHL/VRSHLQ:104:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,  }
+VRSHL/VRSHLQ:105:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000,  }
+VRSHL/VRSHLQ:106:result_uint64x2 [] = { 2000000000000000, 2000000000000000,  }
+VRSHL/VRSHLQ:107:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHL/VRSHLQ (large shift amount) output:
+VRSHL/VRSHLQ:108:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:109:result_int16x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:110:result_int32x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:111:result_int64x1 [] = { 0,  }
+VRSHL/VRSHLQ:112:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:113:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:114:result_uint32x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:115:result_uint64x1 [] = { 0,  }
+VRSHL/VRSHLQ:116:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHL/VRSHLQ:117:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:118:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:119:result_int32x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:120:result_int64x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:121:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:122:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:123:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VRSHL/VRSHLQ:124:result_uint64x2 [] = { 0, 0,  }
+VRSHL/VRSHLQ:125:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VLD2/VLD2Q chunk 0 output:
+VLD2/VLD2Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD2/VLD2Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD2/VLD2Q:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VLD2/VLD2Q:3:result_int64x1 [] = { fffffffffffffff0,  }
+VLD2/VLD2Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7,  }
+VLD2/VLD2Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VLD2/VLD2Q:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VLD2/VLD2Q:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VLD2/VLD2Q:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VLD2/VLD2Q:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD2/VLD2Q:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD2/VLD2Q:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD2/VLD2Q:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD2/VLD2Q:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff,  }
+VLD2/VLD2Q:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7,  }
+VLD2/VLD2Q:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD2/VLD2Q:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD2/VLD2Q:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+
+VLD2/VLD2Q chunk 1 output:
+VLD2/VLD2Q:18:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD2/VLD2Q:19:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD2/VLD2Q:20:result_int32x2 [] = { fffffff2, fffffff3,  }
+VLD2/VLD2Q:21:result_int64x1 [] = { fffffffffffffff1,  }
+VLD2/VLD2Q:22:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff,  }
+VLD2/VLD2Q:23:result_uint16x4 [] = { fff4, fff5, fff6, fff7,  }
+VLD2/VLD2Q:24:result_uint32x2 [] = { fffffff2, fffffff3,  }
+VLD2/VLD2Q:25:result_uint64x1 [] = { fffffffffffffff1,  }
+VLD2/VLD2Q:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+VLD2/VLD2Q:27:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,  }
+VLD2/VLD2Q:28:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD2/VLD2Q:29:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD2/VLD2Q:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD2/VLD2Q:31:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,  }
+VLD2/VLD2Q:32:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff,  }
+VLD2/VLD2Q:33:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD2/VLD2Q:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD2/VLD2Q:35:result_float32x4 [] = { c1400000 -0x1.8p+3 -12, c1300000 -0x1.6p+3 -11, c1200000 -0x1.4p+3 -10, c1100000 -0x1.2p+3 -9,  }
+
+VLD3/VLD3Q chunk 0 output:
+VLD3/VLD3Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD3/VLD3Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD3/VLD3Q:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VLD3/VLD3Q:3:result_int64x1 [] = { fffffffffffffff0,  }
+VLD3/VLD3Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7,  }
+VLD3/VLD3Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VLD3/VLD3Q:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VLD3/VLD3Q:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VLD3/VLD3Q:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VLD3/VLD3Q:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD3/VLD3Q:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD3/VLD3Q:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD3/VLD3Q:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3/VLD3Q:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff,  }
+VLD3/VLD3Q:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7,  }
+VLD3/VLD3Q:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD3/VLD3Q:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3/VLD3Q:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+
+VLD3/VLD3Q chunk 1 output:
+VLD3/VLD3Q:18:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD3/VLD3Q:19:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD3/VLD3Q:20:result_int32x2 [] = { fffffff2, fffffff3,  }
+VLD3/VLD3Q:21:result_int64x1 [] = { fffffffffffffff1,  }
+VLD3/VLD3Q:22:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff,  }
+VLD3/VLD3Q:23:result_uint16x4 [] = { fff4, fff5, fff6, fff7,  }
+VLD3/VLD3Q:24:result_uint32x2 [] = { fffffff2, fffffff3,  }
+VLD3/VLD3Q:25:result_uint64x1 [] = { fffffffffffffff1,  }
+VLD3/VLD3Q:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+VLD3/VLD3Q:27:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,  }
+VLD3/VLD3Q:28:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD3/VLD3Q:29:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD3/VLD3Q:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3/VLD3Q:31:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,  }
+VLD3/VLD3Q:32:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff,  }
+VLD3/VLD3Q:33:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD3/VLD3Q:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3/VLD3Q:35:result_float32x4 [] = { c1400000 -0x1.8p+3 -12, c1300000 -0x1.6p+3 -11, c1200000 -0x1.4p+3 -10, c1100000 -0x1.2p+3 -9,  }
+
+VLD3/VLD3Q chunk 2 output:
+VLD3/VLD3Q:36:result_int8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7,  }
+VLD3/VLD3Q:37:result_int16x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb,  }
+VLD3/VLD3Q:38:result_int32x2 [] = { fffffff4, fffffff5,  }
+VLD3/VLD3Q:39:result_int64x1 [] = { fffffffffffffff2,  }
+VLD3/VLD3Q:40:result_uint8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7,  }
+VLD3/VLD3Q:41:result_uint16x4 [] = { fff8, fff9, fffa, fffb,  }
+VLD3/VLD3Q:42:result_uint32x2 [] = { fffffff4, fffffff5,  }
+VLD3/VLD3Q:43:result_uint64x1 [] = { fffffffffffffff2,  }
+VLD3/VLD3Q:44:result_float32x2 [] = { c1400000 -0x1.8p+3 -12, c1300000 -0x1.6p+3 -11,  }
+VLD3/VLD3Q:45:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff,  }
+VLD3/VLD3Q:46:result_int16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7,  }
+VLD3/VLD3Q:47:result_int32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb,  }
+VLD3/VLD3Q:48:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3/VLD3Q:49:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff,  }
+VLD3/VLD3Q:50:result_uint16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7,  }
+VLD3/VLD3Q:51:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb,  }
+VLD3/VLD3Q:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3/VLD3Q:53:result_float32x4 [] = { c1000000 -0x1p+3 -8, c0e00000 -0x1.cp+2 -7, c0c00000 -0x1.8p+2 -6, c0a00000 -0x1.4p+2 -5,  }
+
+VLD4/VLD4Q chunk 0 output:
+VLD4/VLD4Q:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD4/VLD4Q:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4/VLD4Q:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VLD4/VLD4Q:3:result_int64x1 [] = { fffffffffffffff0,  }
+VLD4/VLD4Q:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7,  }
+VLD4/VLD4Q:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VLD4/VLD4Q:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VLD4/VLD4Q:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VLD4/VLD4Q:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VLD4/VLD4Q:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD4/VLD4Q:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD4/VLD4Q:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4/VLD4Q:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4/VLD4Q:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff,  }
+VLD4/VLD4Q:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7,  }
+VLD4/VLD4Q:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4/VLD4Q:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4/VLD4Q:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+
+VLD4/VLD4Q chunk 1 output:
+VLD4/VLD4Q:18:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD4/VLD4Q:19:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD4/VLD4Q:20:result_int32x2 [] = { fffffff2, fffffff3,  }
+VLD4/VLD4Q:21:result_int64x1 [] = { fffffffffffffff1,  }
+VLD4/VLD4Q:22:result_uint8x8 [] = { f8, f9, fa, fb, fc, fd, fe, ff,  }
+VLD4/VLD4Q:23:result_uint16x4 [] = { fff4, fff5, fff6, fff7,  }
+VLD4/VLD4Q:24:result_uint32x2 [] = { fffffff2, fffffff3,  }
+VLD4/VLD4Q:25:result_uint64x1 [] = { fffffffffffffff1,  }
+VLD4/VLD4Q:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+VLD4/VLD4Q:27:result_int8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,  }
+VLD4/VLD4Q:28:result_int16x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD4/VLD4Q:29:result_int32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD4/VLD4Q:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4/VLD4Q:31:result_uint8x16 [] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,  }
+VLD4/VLD4Q:32:result_uint16x8 [] = { fff8, fff9, fffa, fffb, fffc, fffd, fffe, ffff,  }
+VLD4/VLD4Q:33:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7,  }
+VLD4/VLD4Q:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4/VLD4Q:35:result_float32x4 [] = { c1400000 -0x1.8p+3 -12, c1300000 -0x1.6p+3 -11, c1200000 -0x1.4p+3 -10, c1100000 -0x1.2p+3 -9,  }
+
+VLD4/VLD4Q chunk 2 output:
+VLD4/VLD4Q:36:result_int8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7,  }
+VLD4/VLD4Q:37:result_int16x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb,  }
+VLD4/VLD4Q:38:result_int32x2 [] = { fffffff4, fffffff5,  }
+VLD4/VLD4Q:39:result_int64x1 [] = { fffffffffffffff2,  }
+VLD4/VLD4Q:40:result_uint8x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7,  }
+VLD4/VLD4Q:41:result_uint16x4 [] = { fff8, fff9, fffa, fffb,  }
+VLD4/VLD4Q:42:result_uint32x2 [] = { fffffff4, fffffff5,  }
+VLD4/VLD4Q:43:result_uint64x1 [] = { fffffffffffffff2,  }
+VLD4/VLD4Q:44:result_float32x2 [] = { c1400000 -0x1.8p+3 -12, c1300000 -0x1.6p+3 -11,  }
+VLD4/VLD4Q:45:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff,  }
+VLD4/VLD4Q:46:result_int16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7,  }
+VLD4/VLD4Q:47:result_int32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb,  }
+VLD4/VLD4Q:48:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4/VLD4Q:49:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff,  }
+VLD4/VLD4Q:50:result_uint16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7,  }
+VLD4/VLD4Q:51:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb,  }
+VLD4/VLD4Q:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4/VLD4Q:53:result_float32x4 [] = { c1000000 -0x1p+3 -8, c0e00000 -0x1.cp+2 -7, c0c00000 -0x1.8p+2 -6, c0a00000 -0x1.4p+2 -5,  }
+
+VLD4/VLD4Q chunk 3 output:
+VLD4/VLD4Q:54:result_int8x8 [] = { 8, 9, a, b, c, d, e, f,  }
+VLD4/VLD4Q:55:result_int16x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD4/VLD4Q:56:result_int32x2 [] = { fffffff6, fffffff7,  }
+VLD4/VLD4Q:57:result_int64x1 [] = { fffffffffffffff3,  }
+VLD4/VLD4Q:58:result_uint8x8 [] = { 8, 9, a, b, c, d, e, f,  }
+VLD4/VLD4Q:59:result_uint16x4 [] = { fffc, fffd, fffe, ffff,  }
+VLD4/VLD4Q:60:result_uint32x2 [] = { fffffff6, fffffff7,  }
+VLD4/VLD4Q:61:result_uint64x1 [] = { fffffffffffffff3,  }
+VLD4/VLD4Q:62:result_float32x2 [] = { c1200000 -0x1.4p+3 -10, c1100000 -0x1.2p+3 -9,  }
+VLD4/VLD4Q:63:result_int8x16 [] = { fffffff8, ffffffff, fffffff9, ffffffff, fffffffa, ffffffff, fffffffb, ffffffff, fffffffc, ffffffff, fffffffd, ffffffff, fffffffe, ffffffff, ffffffff, ffffffff,  }
+VLD4/VLD4Q:64:result_int16x8 [] = { 8, 9, a, b, c, d, e, f,  }
+VLD4/VLD4Q:65:result_int32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD4/VLD4Q:66:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4/VLD4Q:67:result_uint8x16 [] = { f8, ff, f9, ff, fa, ff, fb, ff, fc, ff, fd, ff, fe, ff, ff, ff,  }
+VLD4/VLD4Q:68:result_uint16x8 [] = { 8, 9, a, b, c, d, e, f,  }
+VLD4/VLD4Q:69:result_uint32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff,  }
+VLD4/VLD4Q:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4/VLD4Q:71:result_float32x4 [] = { c0800000 -0x1p+2 -4, c0400000 -0x1.8p+1 -3, c0000000 -0x1p+1 -2, bf800000 -0x1p+0 -1,  }
+
+VDUP_LANE/VDUP_LANEQ output:
+VDUP_LANE/VDUP_LANEQ:0:result_int8x8 [] = { fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1, fffffff1,  }
+VDUP_LANE/VDUP_LANEQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff2,  }
+VDUP_LANE/VDUP_LANEQ:2:result_int32x2 [] = { fffffff1, fffffff1,  }
+VDUP_LANE/VDUP_LANEQ:3:result_int64x1 [] = { fffffffffffffff0,  }
+VDUP_LANE/VDUP_LANEQ:4:result_uint8x8 [] = { f7, f7, f7, f7, f7, f7, f7, f7,  }
+VDUP_LANE/VDUP_LANEQ:5:result_uint16x4 [] = { fff3, fff3, fff3, fff3,  }
+VDUP_LANE/VDUP_LANEQ:6:result_uint32x2 [] = { fffffff1, fffffff1,  }
+VDUP_LANE/VDUP_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VDUP_LANE/VDUP_LANEQ:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15,  }
+VDUP_LANE/VDUP_LANEQ:9:result_int8x16 [] = { fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2, fffffff2,  }
+VDUP_LANE/VDUP_LANEQ:10:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3,  }
+VDUP_LANE/VDUP_LANEQ:11:result_int32x4 [] = { fffffff1, fffffff1, fffffff1, fffffff1,  }
+VDUP_LANE/VDUP_LANEQ:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff0,  }
+VDUP_LANE/VDUP_LANEQ:13:result_uint8x16 [] = { f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5, f5,  }
+VDUP_LANE/VDUP_LANEQ:14:result_uint16x8 [] = { fff1, fff1, fff1, fff1, fff1, fff1, fff1, fff1,  }
+VDUP_LANE/VDUP_LANEQ:15:result_uint32x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0,  }
+VDUP_LANE/VDUP_LANEQ:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0,  }
+VDUP_LANE/VDUP_LANEQ:17:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15,  }
+
+VQDMULL_LANE overflow output:
+VQDMULL_LANE:0:vqdmull_lane_s16 Neon overflow 0
+VQDMULL_LANE:1:vqdmull_lane_s32 Neon overflow 0
+
+VQDMULL_LANE output:
+VQDMULL_LANE:2:result_int32x4 [] = { 8000, 8000, 8000, 8000,  }
+VQDMULL_LANE:3:result_int64x2 [] = { 4000, 4000,  }
+
+VQDMULL_LANE (check mul overflow) overflow output:
+VQDMULL_LANE:4:vqdmull_lane_s16 Neon overflow 1
+VQDMULL_LANE:5:vqdmull_lane_s32 Neon overflow 1
+
+VQDMULL_LANE (check mul overflow) output:
+VQDMULL_LANE:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQDMULL_LANE:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff,  }
+
+VQDMULL_N overflow output:
+VQDMULL_N:0:vqdmull_n_s16 Neon overflow 0
+VQDMULL_N:1:vqdmull_n_s32 Neon overflow 0
+
+VQDMULL_N output:
+VQDMULL_N:2:result_int32x4 [] = { 44000, 44000, 44000, 44000,  }
+VQDMULL_N:3:result_int64x2 [] = { aa000, aa000,  }
+
+VQDMULL_N (check mul overflow) overflow output:
+VQDMULL_N:4:vqdmull_n_s16 Neon overflow 1
+VQDMULL_N:5:vqdmull_n_s32 Neon overflow 1
+
+VQDMULL_N (check mul overflow) output:
+VQDMULL_N:6:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQDMULL_N:7:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff,  }
+
+VST1_LANE/VST1_LANEQ output:
+VST1_LANE/VST1_LANEQ:0:result_int8x8 [] = { fffffff7, 33, 33, 33, 33, 33, 33, 33,  }
+VST1_LANE/VST1_LANEQ:1:result_int16x4 [] = { fffffff3, 3333, 3333, 3333,  }
+VST1_LANE/VST1_LANEQ:2:result_int32x2 [] = { fffffff1, 33333333,  }
+VST1_LANE/VST1_LANEQ:3:result_int64x1 [] = { fffffffffffffff0,  }
+VST1_LANE/VST1_LANEQ:4:result_uint8x8 [] = { f6, 33, 33, 33, 33, 33, 33, 33,  }
+VST1_LANE/VST1_LANEQ:5:result_uint16x4 [] = { fff2, 3333, 3333, 3333,  }
+VST1_LANE/VST1_LANEQ:6:result_uint32x2 [] = { fffffff0, 33333333,  }
+VST1_LANE/VST1_LANEQ:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VST1_LANE/VST1_LANEQ:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, 33333333 0x1.666666p-25 4.17233e-08,  }
+VST1_LANE/VST1_LANEQ:9:result_int8x16 [] = { ffffffff, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST1_LANE/VST1_LANEQ:10:result_int16x8 [] = { fffffff5, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VST1_LANE/VST1_LANEQ:11:result_int32x4 [] = { fffffff1, 33333333, 33333333, 33333333,  }
+VST1_LANE/VST1_LANEQ:12:result_int64x2 [] = { fffffffffffffff1, 3333333333333333,  }
+VST1_LANE/VST1_LANEQ:13:result_uint8x16 [] = { fa, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST1_LANE/VST1_LANEQ:14:result_uint16x8 [] = { fff4, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VST1_LANE/VST1_LANEQ:15:result_uint32x4 [] = { fffffff3, 33333333, 33333333, 33333333,  }
+VST1_LANE/VST1_LANEQ:16:result_uint64x2 [] = { fffffffffffffff0, 3333333333333333,  }
+VST1_LANE/VST1_LANEQ:17:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VSUB/VSUBQ output:
+VSUB/VSUBQ:0:result_int8x8 [] = { ffffffee, ffffffef, fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5,  }
+VSUB/VSUBQ:1:result_int16x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7,  }
+VSUB/VSUBQ:2:result_int32x2 [] = { ffffffed, ffffffee,  }
+VSUB/VSUBQ:3:result_int64x1 [] = { ffffffffffffff8c,  }
+VSUB/VSUBQ:4:result_uint8x8 [] = { dc, dd, de, df, e0, e1, e2, e3,  }
+VSUB/VSUBQ:5:result_uint16x4 [] = { ffd2, ffd3, ffd4, ffd5,  }
+VSUB/VSUBQ:6:result_uint32x2 [] = { ffffffc8, ffffffc9,  }
+VSUB/VSUBQ:7:result_uint64x1 [] = { ffffffffffffffee,  }
+VSUB/VSUBQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSUB/VSUBQ:9:result_int8x16 [] = { fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,  }
+VSUB/VSUBQ:10:result_int16x8 [] = { 4, 5, 6, 7, 8, 9, a, b,  }
+VSUB/VSUBQ:11:result_int32x4 [] = { e, f, 10, 11,  }
+VSUB/VSUBQ:12:result_int64x2 [] = { ffffffffffffffd8, ffffffffffffffd9,  }
+VSUB/VSUBQ:13:result_uint8x16 [] = { e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef, f0, f1, f2, f3,  }
+VSUB/VSUBQ:14:result_uint16x8 [] = { ffed, ffee, ffef, fff0, fff1, fff2, fff3, fff4,  }
+VSUB/VSUBQ:15:result_uint32x4 [] = { ffffffb9, ffffffba, ffffffbb, ffffffbc,  }
+VSUB/VSUBQ:16:result_uint64x2 [] = { ffffffffffffffed, ffffffffffffffee,  }
+VSUB/VSUBQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+float32:
+VSUB/VSUBQ:18:result_float32x2 [] = { c00ccccd -0x1.19999ap+1 -2.2, c00ccccd -0x1.19999ap+1 -2.2,  }
+VSUB/VSUBQ:19:result_float32x4 [] = { c00ccccc -0x1.199998p+1 -2.2, c00ccccc -0x1.199998p+1 -2.2, c00ccccc -0x1.199998p+1 -2.2, c00ccccc -0x1.199998p+1 -2.2,  }
+
+VQADD/VQADDQ overflow output:
+VQADD/VQADDQ:0:vqadd_s8 Neon overflow 0
+VQADD/VQADDQ:1:vqadd_s16 Neon overflow 0
+VQADD/VQADDQ:2:vqadd_s32 Neon overflow 0
+VQADD/VQADDQ:3:vqadd_s64 Neon overflow 0
+VQADD/VQADDQ:4:vqadd_u8 Neon overflow 1
+VQADD/VQADDQ:5:vqadd_u16 Neon overflow 1
+VQADD/VQADDQ:6:vqadd_u32 Neon overflow 1
+VQADD/VQADDQ:7:vqadd_u64 Neon overflow 1
+VQADD/VQADDQ:8:vqaddq_s8 Neon overflow 0
+VQADD/VQADDQ:9:vqaddq_s16 Neon overflow 0
+VQADD/VQADDQ:10:vqaddq_s32 Neon overflow 0
+VQADD/VQADDQ:11:vqaddq_s64 Neon overflow 0
+VQADD/VQADDQ:12:vqaddq_u8 Neon overflow 1
+VQADD/VQADDQ:13:vqaddq_u16 Neon overflow 1
+VQADD/VQADDQ:14:vqaddq_u32 Neon overflow 1
+VQADD/VQADDQ:15:vqaddq_u64 Neon overflow 1
+
+VQADD/VQADDQ output:
+VQADD/VQADDQ:16:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8,  }
+VQADD/VQADDQ:17:result_int16x4 [] = { 12, 13, 14, 15,  }
+VQADD/VQADDQ:18:result_int32x2 [] = { 23, 24,  }
+VQADD/VQADDQ:19:result_int64x1 [] = { 34,  }
+VQADD/VQADDQ:20:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQADD/VQADDQ:21:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQADD/VQADDQ:22:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQADD/VQADDQ:23:result_uint64x1 [] = { ffffffffffffffff,  }
+VQADD/VQADDQ:24:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQADD/VQADDQ:25:result_int8x16 [] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, 10,  }
+VQADD/VQADDQ:26:result_int16x8 [] = { 12, 13, 14, 15, 16, 17, 18, 19,  }
+VQADD/VQADDQ:27:result_int32x4 [] = { 23, 24, 25, 26,  }
+VQADD/VQADDQ:28:result_int64x2 [] = { 34, 35,  }
+VQADD/VQADDQ:29:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQADD/VQADDQ:30:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff,  }
+VQADD/VQADDQ:31:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQADD/VQADDQ:32:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+VQADD/VQADDQ:33:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQADD/VQADDQ 64 bits saturation overflow output:
+VQADD/VQADDQ:34:vqadd_s64 Neon overflow 0
+VQADD/VQADDQ:35:vqadd_u64 Neon overflow 0
+VQADD/VQADDQ:36:vqaddq_s64 Neon overflow 0
+VQADD/VQADDQ:37:vqaddq_u64 Neon overflow 0
+
+64 bits saturation:
+VQADD/VQADDQ:38:result_int64x1 [] = { fffffffffffffff0,  }
+VQADD/VQADDQ:39:result_uint64x1 [] = { fffffffffffffff0,  }
+VQADD/VQADDQ:40:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VQADD/VQADDQ:41:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+
+VQADD/VQADDQ 64 bits saturation overflow output:
+VQADD/VQADDQ:42:vqadd_s64 Neon overflow 0
+VQADD/VQADDQ:43:vqadd_u64 Neon overflow 1
+VQADD/VQADDQ:44:vqaddq_s64 Neon overflow 0
+VQADD/VQADDQ:45:vqaddq_u64 Neon overflow 1
+VQADD/VQADDQ:46:result_int64x1 [] = { 34,  }
+VQADD/VQADDQ:47:result_uint64x1 [] = { ffffffffffffffff,  }
+VQADD/VQADDQ:48:result_int64x2 [] = { 34, 35,  }
+VQADD/VQADDQ:49:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+
+VQADD/VQADDQ 64 bits saturation overflow output:
+VQADD/VQADDQ:50:vqadd_s64 Neon overflow 1
+VQADD/VQADDQ:51:vqadd_u64 Neon overflow 1
+VQADD/VQADDQ:52:vqaddq_s64 Neon overflow 1
+VQADD/VQADDQ:53:vqaddq_u64 Neon overflow 1
+VQADD/VQADDQ:54:result_int64x1 [] = { 8000000000000000,  }
+VQADD/VQADDQ:55:result_uint64x1 [] = { ffffffffffffffff,  }
+VQADD/VQADDQ:56:result_int64x2 [] = { 7fffffffffffffff, 7fffffffffffffff,  }
+VQADD/VQADDQ:57:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+
+less than 64 bits saturation:
+VQADD/VQADDQ:58:vqadd_s8 Neon overflow 1
+VQADD/VQADDQ:59:vqadd_s16 Neon overflow 1
+VQADD/VQADDQ:60:vqadd_s32 Neon overflow 1
+VQADD/VQADDQ:61:vqaddq_s8 Neon overflow 1
+VQADD/VQADDQ:62:vqaddq_s16 Neon overflow 1
+VQADD/VQADDQ:63:vqaddq_s32 Neon overflow 1
+VQADD/VQADDQ:64:result_int8x8 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80,  }
+VQADD/VQADDQ:65:result_int16x4 [] = { ffff8000, ffff8000, ffff8000, ffff8000,  }
+VQADD/VQADDQ:66:result_int32x2 [] = { 80000000, 80000000,  }
+VQADD/VQADDQ:67:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80,  }
+VQADD/VQADDQ:68:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000,  }
+VQADD/VQADDQ:69:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+
+VQADD/VQADDQ less than 64 bits saturation overflow output:
+VQADD/VQADDQ:70:vqadd_u8 Neon overflow 1
+VQADD/VQADDQ:71:vqadd_u16 Neon overflow 1
+VQADD/VQADDQ:72:vqadd_u32 Neon overflow 1
+VQADD/VQADDQ:73:vqaddq_u8 Neon overflow 1
+VQADD/VQADDQ:74:vqaddq_u16 Neon overflow 1
+VQADD/VQADDQ:75:vqaddq_u32 Neon overflow 1
+VQADD/VQADDQ:76:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQADD/VQADDQ:77:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQADD/VQADDQ:78:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQADD/VQADDQ:79:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQADD/VQADDQ:80:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff,  }
+VQADD/VQADDQ:81:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+
+VABS/VABSQ output:
+VABS/VABSQ:0:result_int8x8 [] = { 10, f, e, d, c, b, a, 9,  }
+VABS/VABSQ:1:result_int16x4 [] = { 10, f, e, d,  }
+VABS/VABSQ:2:result_int32x2 [] = { 10, f,  }
+VABS/VABSQ:3:result_int64x1 [] = { 3333333333333333,  }
+VABS/VABSQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VABS/VABSQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VABS/VABSQ:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VABS/VABSQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VABS/VABSQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VABS/VABSQ:9:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1,  }
+VABS/VABSQ:10:result_int16x8 [] = { 10, f, e, d, c, b, a, 9,  }
+VABS/VABSQ:11:result_int32x4 [] = { 10, f, e, d,  }
+VABS/VABSQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VABS/VABSQ:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VABS/VABSQ:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VABS/VABSQ:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VABS/VABSQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VABS/VABSQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+float32:
+VABS/VABSQ:18:result_float32x2 [] = { 40133333 0x1.266666p+1 2.3, 40133333 0x1.266666p+1 2.3,  }
+VABS/VABSQ:19:result_float32x4 [] = { 4059999a 0x1.b33334p+1 3.4, 4059999a 0x1.b33334p+1 3.4, 4059999a 0x1.b33334p+1 3.4, 4059999a 0x1.b33334p+1 3.4,  }
+
+VQABS/VQABSQ overflow output:
+VQABS/VQABSQ:0:vqabs_s8 Neon overflow 0
+VQABS/VQABSQ:1:vqabs_s16 Neon overflow 0
+VQABS/VQABSQ:2:vqabs_s32 Neon overflow 0
+VQABS/VQABSQ:3:vqabsq_s8 Neon overflow 0
+VQABS/VQABSQ:4:vqabsq_s16 Neon overflow 0
+VQABS/VQABSQ:5:vqabsq_s32 Neon overflow 0
+
+VQABS/VQABSQ output:
+VQABS/VQABSQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9,  }
+VQABS/VQABSQ:7:result_int16x4 [] = { 10, f, e, d,  }
+VQABS/VQABSQ:8:result_int32x2 [] = { 10, f,  }
+VQABS/VQABSQ:9:result_int64x1 [] = { 3333333333333333,  }
+VQABS/VQABSQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQABS/VQABSQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQABS/VQABSQ:12:result_uint32x2 [] = { 33333333, 33333333,  }
+VQABS/VQABSQ:13:result_uint64x1 [] = { 3333333333333333,  }
+VQABS/VQABSQ:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQABS/VQABSQ:15:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1,  }
+VQABS/VQABSQ:16:result_int16x8 [] = { 10, f, e, d, c, b, a, 9,  }
+VQABS/VQABSQ:17:result_int32x4 [] = { 10, f, e, d,  }
+VQABS/VQABSQ:18:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQABS/VQABSQ:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQABS/VQABSQ:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQABS/VQABSQ:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQABS/VQABSQ:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQABS/VQABSQ:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQABS/VQABSQ overflow output:
+VQABS/VQABSQ:0:vqabs_s8 Neon overflow 1
+VQABS/VQABSQ:1:vqabs_s16 Neon overflow 1
+VQABS/VQABSQ:2:vqabs_s32 Neon overflow 1
+VQABS/VQABSQ:3:vqabsq_s8 Neon overflow 1
+VQABS/VQABSQ:4:vqabsq_s16 Neon overflow 1
+VQABS/VQABSQ:5:vqabsq_s32 Neon overflow 1
+
+VQABS/VQABSQ output:
+VQABS/VQABSQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQABS/VQABSQ:7:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQABS/VQABSQ:8:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQABS/VQABSQ:9:result_int64x1 [] = { 3333333333333333,  }
+VQABS/VQABSQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQABS/VQABSQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQABS/VQABSQ:12:result_uint32x2 [] = { 33333333, 33333333,  }
+VQABS/VQABSQ:13:result_uint64x1 [] = { 3333333333333333,  }
+VQABS/VQABSQ:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQABS/VQABSQ:15:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQABS/VQABSQ:16:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQABS/VQABSQ:17:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQABS/VQABSQ:18:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQABS/VQABSQ:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQABS/VQABSQ:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQABS/VQABSQ:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQABS/VQABSQ:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQABS/VQABSQ:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VCOMBINE output:
+VCOMBINE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VCOMBINE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VCOMBINE:2:result_int32x2 [] = { 33333333, 33333333,  }
+VCOMBINE:3:result_int64x1 [] = { 3333333333333333,  }
+VCOMBINE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VCOMBINE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VCOMBINE:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VCOMBINE:7:result_uint64x1 [] = { 3333333333333333,  }
+VCOMBINE:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VCOMBINE:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 11, 11, 11, 11, 11, 11, 11, 11,  }
+VCOMBINE:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 22, 22, 22, 22,  }
+VCOMBINE:11:result_int32x4 [] = { fffffff0, fffffff1, 33, 33,  }
+VCOMBINE:12:result_int64x2 [] = { fffffffffffffff0, 44,  }
+VCOMBINE:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, 55, 55, 55, 55, 55, 55, 55, 55,  }
+VCOMBINE:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, 66, 66, 66, 66,  }
+VCOMBINE:15:result_uint32x4 [] = { fffffff0, fffffff1, 77, 77,  }
+VCOMBINE:16:result_uint64x2 [] = { fffffffffffffff0, 88,  }
+VCOMBINE:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, 40533333 0x1.a66666p+1 3.3, 40533333 0x1.a66666p+1 3.3,  }
+
+VMAX/VMAXQ output:
+VMAX/VMAXQ:0:result_int8x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VMAX/VMAXQ:1:result_int16x4 [] = { fffffff2, fffffff2, fffffff2, fffffff3,  }
+VMAX/VMAXQ:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VMAX/VMAXQ:3:result_int64x1 [] = { 3333333333333333,  }
+VMAX/VMAXQ:4:result_uint8x8 [] = { f3, f3, f3, f3, f4, f5, f6, f7,  }
+VMAX/VMAXQ:5:result_uint16x4 [] = { fff1, fff1, fff2, fff3,  }
+VMAX/VMAXQ:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VMAX/VMAXQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VMAX/VMAXQ:8:result_float32x2 [] = { c1780000 -0x1.fp+3 -15.5, c1700000 -0x1.ep+3 -15,  }
+VMAX/VMAXQ:9:result_int8x16 [] = { fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VMAX/VMAXQ:10:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VMAX/VMAXQ:11:result_int32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff3,  }
+VMAX/VMAXQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMAX/VMAXQ:13:result_uint8x16 [] = { f9, f9, f9, f9, f9, f9, f9, f9, f9, f9, fa, fb, fc, fd, fe, ff,  }
+VMAX/VMAXQ:14:result_uint16x8 [] = { fff2, fff2, fff2, fff3, fff4, fff5, fff6, fff7,  }
+VMAX/VMAXQ:15:result_uint32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff3,  }
+VMAX/VMAXQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMAX/VMAXQ:17:result_float32x4 [] = { c1680000 -0x1.dp+3 -14.5, c1680000 -0x1.dp+3 -14.5, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+
+VMIN/VMINQ output:
+VMIN/VMINQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3,  }
+VMIN/VMINQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff2,  }
+VMIN/VMINQ:2:result_int32x2 [] = { fffffff0, fffffff0,  }
+VMIN/VMINQ:3:result_int64x1 [] = { 3333333333333333,  }
+VMIN/VMINQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f3, f3, f3, f3,  }
+VMIN/VMINQ:5:result_uint16x4 [] = { fff0, fff1, fff1, fff1,  }
+VMIN/VMINQ:6:result_uint32x2 [] = { fffffff0, fffffff0,  }
+VMIN/VMINQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VMIN/VMINQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1780000 -0x1.fp+3 -15.5,  }
+VMIN/VMINQ:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4, fffffff4,  }
+VMIN/VMINQ:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff3, fffffff3, fffffff3, fffffff3,  }
+VMIN/VMINQ:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff1,  }
+VMIN/VMINQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMIN/VMINQ:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f9, f9, f9, f9, f9, f9,  }
+VMIN/VMINQ:14:result_uint16x8 [] = { fff0, fff1, fff2, fff2, fff2, fff2, fff2, fff2,  }
+VMIN/VMINQ:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff1,  }
+VMIN/VMINQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMIN/VMINQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1680000 -0x1.dp+3 -14.5, c1680000 -0x1.dp+3 -14.5,  }
+
+VNEG/VNEGQ output:
+VNEG/VNEGQ:0:result_int8x8 [] = { 10, f, e, d, c, b, a, 9,  }
+VNEG/VNEGQ:1:result_int16x4 [] = { 10, f, e, d,  }
+VNEG/VNEGQ:2:result_int32x2 [] = { 10, f,  }
+VNEG/VNEGQ:3:result_int64x1 [] = { 3333333333333333,  }
+VNEG/VNEGQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VNEG/VNEGQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VNEG/VNEGQ:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VNEG/VNEGQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VNEG/VNEGQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VNEG/VNEGQ:9:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1,  }
+VNEG/VNEGQ:10:result_int16x8 [] = { 10, f, e, d, c, b, a, 9,  }
+VNEG/VNEGQ:11:result_int32x4 [] = { 10, f, e, d,  }
+VNEG/VNEGQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VNEG/VNEGQ:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VNEG/VNEGQ:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VNEG/VNEGQ:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VNEG/VNEGQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VNEG/VNEGQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+float32:
+VNEG/VNEGQ:18:result_float32x2 [] = { c0133333 -0x1.266666p+1 -2.3, c0133333 -0x1.266666p+1 -2.3,  }
+VNEG/VNEGQ:19:result_float32x4 [] = { c059999a -0x1.b33334p+1 -3.4, c059999a -0x1.b33334p+1 -3.4, c059999a -0x1.b33334p+1 -3.4, c059999a -0x1.b33334p+1 -3.4,  }
+
+VQNEG/VQNEGQ overflow output:
+VQNEG/VQNEGQ:0:vqneg_s8 Neon overflow 0
+VQNEG/VQNEGQ:1:vqneg_s16 Neon overflow 0
+VQNEG/VQNEGQ:2:vqneg_s32 Neon overflow 0
+VQNEG/VQNEGQ:3:vqnegq_s8 Neon overflow 0
+VQNEG/VQNEGQ:4:vqnegq_s16 Neon overflow 0
+VQNEG/VQNEGQ:5:vqnegq_s32 Neon overflow 0
+
+VQNEG/VQNEGQ output:
+VQNEG/VQNEGQ:6:result_int8x8 [] = { 10, f, e, d, c, b, a, 9,  }
+VQNEG/VQNEGQ:7:result_int16x4 [] = { 10, f, e, d,  }
+VQNEG/VQNEGQ:8:result_int32x2 [] = { 10, f,  }
+VQNEG/VQNEGQ:9:result_int64x1 [] = { 3333333333333333,  }
+VQNEG/VQNEGQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQNEG/VQNEGQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQNEG/VQNEGQ:12:result_uint32x2 [] = { 33333333, 33333333,  }
+VQNEG/VQNEGQ:13:result_uint64x1 [] = { 3333333333333333,  }
+VQNEG/VQNEGQ:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQNEG/VQNEGQ:15:result_int8x16 [] = { 10, f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1,  }
+VQNEG/VQNEGQ:16:result_int16x8 [] = { 10, f, e, d, c, b, a, 9,  }
+VQNEG/VQNEGQ:17:result_int32x4 [] = { 10, f, e, d,  }
+VQNEG/VQNEGQ:18:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQNEG/VQNEGQ:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQNEG/VQNEGQ:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQNEG/VQNEGQ:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQNEG/VQNEGQ:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQNEG/VQNEGQ:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQNEG/VQNEGQ overflow output:
+VQNEG/VQNEGQ:0:vqneg_s8 Neon overflow 1
+VQNEG/VQNEGQ:1:vqneg_s16 Neon overflow 1
+VQNEG/VQNEGQ:2:vqneg_s32 Neon overflow 1
+VQNEG/VQNEGQ:3:vqnegq_s8 Neon overflow 1
+VQNEG/VQNEGQ:4:vqnegq_s16 Neon overflow 1
+VQNEG/VQNEGQ:5:vqnegq_s32 Neon overflow 1
+
+VQNEG/VQNEGQ output:
+VQNEG/VQNEGQ:6:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQNEG/VQNEGQ:7:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQNEG/VQNEGQ:8:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQNEG/VQNEGQ:9:result_int64x1 [] = { 3333333333333333,  }
+VQNEG/VQNEGQ:10:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQNEG/VQNEGQ:11:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQNEG/VQNEGQ:12:result_uint32x2 [] = { 33333333, 33333333,  }
+VQNEG/VQNEGQ:13:result_uint64x1 [] = { 3333333333333333,  }
+VQNEG/VQNEGQ:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQNEG/VQNEGQ:15:result_int8x16 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQNEG/VQNEGQ:16:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQNEG/VQNEGQ:17:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQNEG/VQNEGQ:18:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQNEG/VQNEGQ:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQNEG/VQNEGQ:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQNEG/VQNEGQ:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQNEG/VQNEGQ:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQNEG/VQNEGQ:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VMLAL output:
+VMLAL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLAL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMLAL:2:result_int32x2 [] = { 33333333, 33333333,  }
+VMLAL:3:result_int64x1 [] = { 3333333333333333,  }
+VMLAL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLAL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMLAL:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VMLAL:7:result_uint64x1 [] = { 3333333333333333,  }
+VMLAL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VMLAL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLAL:10:result_int16x8 [] = { ffffe907, ffffe908, ffffe909, ffffe90a, ffffe90b, ffffe90c, ffffe90d, ffffe90e,  }
+VMLAL:11:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a,  }
+VMLAL:12:result_int64x2 [] = { 3e07, 3e08,  }
+VMLAL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLAL:14:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e,  }
+VMLAL:15:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a,  }
+VMLAL:16:result_uint64x2 [] = { 3e07, 3e08,  }
+VMLAL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VMLSL output:
+VMLSL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLSL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMLSL:2:result_int32x2 [] = { 33333333, 33333333,  }
+VMLSL:3:result_int64x1 [] = { 3333333333333333,  }
+VMLSL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLSL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMLSL:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VMLSL:7:result_uint64x1 [] = { 3333333333333333,  }
+VMLSL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VMLSL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLSL:10:result_int16x8 [] = { 16d9, 16da, 16db, 16dc, 16dd, 16de, 16df, 16e0,  }
+VMLSL:11:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc,  }
+VMLSL:12:result_int64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da,  }
+VMLSL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLSL:14:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0,  }
+VMLSL:15:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc,  }
+VMLSL:16:result_uint64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da,  }
+VMLSL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VMLAL_LANE output:
+VMLAL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLAL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMLAL_LANE:2:result_int32x2 [] = { 33333333, 33333333,  }
+VMLAL_LANE:3:result_int64x1 [] = { 3333333333333333,  }
+VMLAL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLAL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMLAL_LANE:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VMLAL_LANE:7:result_uint64x1 [] = { 3333333333333333,  }
+VMLAL_LANE:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VMLAL_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLAL_LANE:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VMLAL_LANE:11:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a,  }
+VMLAL_LANE:12:result_int64x2 [] = { 3e07, 3e08,  }
+VMLAL_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLAL_LANE:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VMLAL_LANE:15:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a,  }
+VMLAL_LANE:16:result_uint64x2 [] = { 3e07, 3e08,  }
+VMLAL_LANE:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VMLSL_LANE output:
+VMLSL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLSL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMLSL_LANE:2:result_int32x2 [] = { 33333333, 33333333,  }
+VMLSL_LANE:3:result_int64x1 [] = { 3333333333333333,  }
+VMLSL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLSL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMLSL_LANE:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VMLSL_LANE:7:result_uint64x1 [] = { 3333333333333333,  }
+VMLSL_LANE:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VMLSL_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLSL_LANE:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VMLSL_LANE:11:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc,  }
+VMLSL_LANE:12:result_int64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da,  }
+VMLSL_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLSL_LANE:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VMLSL_LANE:15:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc,  }
+VMLSL_LANE:16:result_uint64x2 [] = { ffffffffffffc1d9, ffffffffffffc1da,  }
+VMLSL_LANE:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VMLAL_N output:
+VMLAL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLAL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMLAL_N:2:result_int32x2 [] = { 33333333, 33333333,  }
+VMLAL_N:3:result_int64x1 [] = { 3333333333333333,  }
+VMLAL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLAL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMLAL_N:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VMLAL_N:7:result_uint64x1 [] = { 3333333333333333,  }
+VMLAL_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VMLAL_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLAL_N:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VMLAL_N:11:result_int32x4 [] = { 595, 596, 597, 598,  }
+VMLAL_N:12:result_int64x2 [] = { b3a, b3b,  }
+VMLAL_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLAL_N:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VMLAL_N:15:result_uint32x4 [] = { 10df, 10e0, 10e1, 10e2,  }
+VMLAL_N:16:result_uint64x2 [] = { 10df, 10e0,  }
+VMLAL_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VMLSL_N output:
+VMLSL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLSL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMLSL_N:2:result_int32x2 [] = { 33333333, 33333333,  }
+VMLSL_N:3:result_int64x1 [] = { 3333333333333333,  }
+VMLSL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLSL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMLSL_N:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VMLSL_N:7:result_uint64x1 [] = { 3333333333333333,  }
+VMLSL_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VMLSL_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLSL_N:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VMLSL_N:11:result_int32x4 [] = { fffffa4b, fffffa4c, fffffa4d, fffffa4e,  }
+VMLSL_N:12:result_int64x2 [] = { fffffffffffff4a6, fffffffffffff4a7,  }
+VMLSL_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLSL_N:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VMLSL_N:15:result_uint32x4 [] = { ffffef01, ffffef02, ffffef03, ffffef04,  }
+VMLSL_N:16:result_uint64x2 [] = { ffffffffffffef01, ffffffffffffef02,  }
+VMLSL_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VMOVL output:
+VMOVL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMOVL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMOVL:2:result_int32x2 [] = { 33333333, 33333333,  }
+VMOVL:3:result_int64x1 [] = { 3333333333333333,  }
+VMOVL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMOVL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMOVL:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VMOVL:7:result_uint64x1 [] = { 3333333333333333,  }
+VMOVL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VMOVL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMOVL:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VMOVL:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VMOVL:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VMOVL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMOVL:14:result_uint16x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7,  }
+VMOVL:15:result_uint32x4 [] = { fff0, fff1, fff2, fff3,  }
+VMOVL:16:result_uint64x2 [] = { fffffff0, fffffff1,  }
+VMOVL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VMOVN output:
+VMOVN:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VMOVN:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VMOVN:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VMOVN:3:result_int64x1 [] = { 3333333333333333,  }
+VMOVN:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7,  }
+VMOVN:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VMOVN:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VMOVN:7:result_uint64x1 [] = { 3333333333333333,  }
+VMOVN:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VMOVN:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMOVN:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VMOVN:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VMOVN:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMOVN:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMOVN:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VMOVN:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VMOVN:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMOVN:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VMULL output:
+VMULL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMULL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMULL:2:result_int32x2 [] = { 33333333, 33333333,  }
+VMULL:3:result_int64x1 [] = { 3333333333333333,  }
+VMULL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMULL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMULL:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VMULL:7:result_uint64x1 [] = { 3333333333333333,  }
+VMULL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VMULL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMULL:10:result_int16x8 [] = { 100, e1, c4, a9, 90, 79, 64, 51,  }
+VMULL:11:result_int32x4 [] = { 100, e1, c4, a9,  }
+VMULL:12:result_int64x2 [] = { 100, e1,  }
+VMULL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMULL:14:result_uint16x8 [] = { e100, e2e1, e4c4, e6a9, e890, ea79, ec64, ee51,  }
+VMULL:15:result_uint32x4 [] = { ffe00100, ffe200e1, ffe400c4, ffe600a9,  }
+VMULL:16:result_uint64x2 [] = { ffffffe000000100, ffffffe2000000e1,  }
+VMULL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VMULL_LANE output:
+VMULL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMULL_LANE:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMULL_LANE:2:result_int32x2 [] = { 33333333, 33333333,  }
+VMULL_LANE:3:result_int64x1 [] = { 3333333333333333,  }
+VMULL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMULL_LANE:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VMULL_LANE:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VMULL_LANE:7:result_uint64x1 [] = { 3333333333333333,  }
+VMULL_LANE:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VMULL_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMULL_LANE:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VMULL_LANE:11:result_int32x4 [] = { 4000, 4000, 4000, 4000,  }
+VMULL_LANE:12:result_int64x2 [] = { 2000, 2000,  }
+VMULL_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMULL_LANE:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VMULL_LANE:15:result_uint32x4 [] = { 4000, 4000, 4000, 4000,  }
+VMULL_LANE:16:result_uint64x2 [] = { 2000, 2000,  }
+VMULL_LANE:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VREV16 output:
+VREV16:0:result_int8x8 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6,  }
+VREV16:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VREV16:2:result_int32x2 [] = { 33333333, 33333333,  }
+VREV16:3:result_int64x1 [] = { 3333333333333333,  }
+VREV16:4:result_uint8x8 [] = { f1, f0, f3, f2, f5, f4, f7, f6,  }
+VREV16:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VREV16:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VREV16:7:result_uint64x1 [] = { 3333333333333333,  }
+VREV16:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VREV16:9:result_int8x16 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6, fffffff9, fffffff8, fffffffb, fffffffa, fffffffd, fffffffc, ffffffff, fffffffe,  }
+VREV16:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VREV16:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VREV16:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VREV16:13:result_uint8x16 [] = { f1, f0, f3, f2, f5, f4, f7, f6, f9, f8, fb, fa, fd, fc, ff, fe,  }
+VREV16:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VREV16:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VREV16:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VREV16:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VREV32 output:
+VREV32:18:result_int8x8 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4,  }
+VREV32:19:result_int16x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2,  }
+VREV32:20:result_int32x2 [] = { 33333333, 33333333,  }
+VREV32:21:result_int64x1 [] = { 3333333333333333,  }
+VREV32:22:result_uint8x8 [] = { f3, f2, f1, f0, f7, f6, f5, f4,  }
+VREV32:23:result_uint16x4 [] = { fff1, fff0, fff3, fff2,  }
+VREV32:24:result_uint32x2 [] = { 33333333, 33333333,  }
+VREV32:25:result_uint64x1 [] = { 3333333333333333,  }
+VREV32:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VREV32:27:result_int8x16 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4, fffffffb, fffffffa, fffffff9, fffffff8, ffffffff, fffffffe, fffffffd, fffffffc,  }
+VREV32:28:result_int16x8 [] = { fffffff1, fffffff0, fffffff3, fffffff2, fffffff5, fffffff4, fffffff7, fffffff6,  }
+VREV32:29:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VREV32:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VREV32:31:result_uint8x16 [] = { f3, f2, f1, f0, f7, f6, f5, f4, fb, fa, f9, f8, ff, fe, fd, fc,  }
+VREV32:32:result_uint16x8 [] = { fff1, fff0, fff3, fff2, fff5, fff4, fff7, fff6,  }
+VREV32:33:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VREV32:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VREV32:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VREV64 output:
+VREV64:36:result_int8x8 [] = { fffffff7, fffffff6, fffffff5, fffffff4, fffffff3, fffffff2, fffffff1, fffffff0,  }
+VREV64:37:result_int16x4 [] = { fffffff3, fffffff2, fffffff1, fffffff0,  }
+VREV64:38:result_int32x2 [] = { fffffff1, fffffff0,  }
+VREV64:39:result_int64x1 [] = { 3333333333333333,  }
+VREV64:40:result_uint8x8 [] = { f7, f6, f5, f4, f3, f2, f1, f0,  }
+VREV64:41:result_uint16x4 [] = { fff3, fff2, fff1, fff0,  }
+VREV64:42:result_uint32x2 [] = { fffffff1, fffffff0,  }
+VREV64:43:result_uint64x1 [] = { 3333333333333333,  }
+VREV64:44:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1800000 -0x1p+4 -16,  }
+VREV64:45:result_int8x16 [] = { fffffff7, fffffff6, fffffff5, fffffff4, fffffff3, fffffff2, fffffff1, fffffff0, ffffffff, fffffffe, fffffffd, fffffffc, fffffffb, fffffffa, fffffff9, fffffff8,  }
+VREV64:46:result_int16x8 [] = { fffffff3, fffffff2, fffffff1, fffffff0, fffffff7, fffffff6, fffffff5, fffffff4,  }
+VREV64:47:result_int32x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2,  }
+VREV64:48:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VREV64:49:result_uint8x16 [] = { f7, f6, f5, f4, f3, f2, f1, f0, ff, fe, fd, fc, fb, fa, f9, f8,  }
+VREV64:50:result_uint16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4,  }
+VREV64:51:result_uint32x4 [] = { fffffff1, fffffff0, fffffff3, fffffff2,  }
+VREV64:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VREV64:53:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, c1800000 -0x1p+4 -16, c1500000 -0x1.ap+3 -13, c1600000 -0x1.cp+3 -14,  }
+
+VSRA_N output:
+VSRA_N:0:result_int8x8 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VSRA_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VSRA_N:2:result_int32x2 [] = { fffffffc, fffffffd,  }
+VSRA_N:3:result_int64x1 [] = { fffffffffffffff0,  }
+VSRA_N:4:result_uint8x8 [] = { 5, 6, 7, 8, 9, a, b, c,  }
+VSRA_N:5:result_uint16x4 [] = { fffc, fffd, fffe, ffff,  }
+VSRA_N:6:result_uint32x2 [] = { fffffff3, fffffff4,  }
+VSRA_N:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VSRA_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSRA_N:9:result_int8x16 [] = { fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7,  }
+VSRA_N:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VSRA_N:11:result_int32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff,  }
+VSRA_N:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VSRA_N:13:result_uint8x16 [] = { 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, 11, 12, 13, 14,  }
+VSRA_N:14:result_uint16x8 [] = { fffc, fffd, fffe, ffff, 0, 1, 2, 3,  }
+VSRA_N:15:result_uint32x4 [] = { fffffff3, fffffff4, fffffff5, fffffff6,  }
+VSRA_N:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VSRA_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VTRN/VTRNQ chunk 0 output:
+VTRN/VTRNQ:0:result_int8x8 [] = { fffffff0, fffffff1, 11, 11, fffffff2, fffffff3, 11, 11,  }
+VTRN/VTRNQ:1:result_int16x4 [] = { fffffff0, fffffff1, 22, 22,  }
+VTRN/VTRNQ:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VTRN/VTRNQ:3:result_int64x1 [] = { 3333333333333333,  }
+VTRN/VTRNQ:4:result_uint8x8 [] = { f0, f1, 55, 55, f2, f3, 55, 55,  }
+VTRN/VTRNQ:5:result_uint16x4 [] = { fff0, fff1, 66, 66,  }
+VTRN/VTRNQ:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VTRN/VTRNQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VTRN/VTRNQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VTRN/VTRNQ:9:result_int8x16 [] = { fffffff0, fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11,  }
+VTRN/VTRNQ:10:result_int16x8 [] = { fffffff0, fffffff1, 22, 22, fffffff2, fffffff3, 22, 22,  }
+VTRN/VTRNQ:11:result_int32x4 [] = { fffffff0, fffffff1, 33, 33,  }
+VTRN/VTRNQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTRN/VTRNQ:13:result_uint8x16 [] = { f0, f1, 55, 55, f2, f3, 55, 55, f4, f5, 55, 55, f6, f7, 55, 55,  }
+VTRN/VTRNQ:14:result_uint16x8 [] = { fff0, fff1, 66, 66, fff2, fff3, 66, 66,  }
+VTRN/VTRNQ:15:result_uint32x4 [] = { fffffff0, fffffff1, 77, 77,  }
+VTRN/VTRNQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTRN/VTRNQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, 42073333 0x1.0e6666p+5 33.8, 42073333 0x1.0e6666p+5 33.8,  }
+
+VTRN/VTRNQ chunk 1 output:
+VTRN/VTRNQ:18:result_int8x8 [] = { fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4,  }
+VTRN/VTRNQ:19:result_int16x4 [] = { fffffff1, 22, 22, fffffff2,  }
+VTRN/VTRNQ:20:result_int32x2 [] = { fffffff1, 33,  }
+VTRN/VTRNQ:21:result_int64x1 [] = { 3333333333333333,  }
+VTRN/VTRNQ:22:result_uint8x8 [] = { f1, 55, 55, f2, f3, 55, 55, f4,  }
+VTRN/VTRNQ:23:result_uint16x4 [] = { fff1, 66, 66, fff2,  }
+VTRN/VTRNQ:24:result_uint32x2 [] = { fffffff1, 77,  }
+VTRN/VTRNQ:25:result_uint64x1 [] = { 3333333333333333,  }
+VTRN/VTRNQ:26:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, 42066666 0x1.0cccccp+5 33.6,  }
+VTRN/VTRNQ:27:result_int8x16 [] = { fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, fffffff8,  }
+VTRN/VTRNQ:28:result_int16x8 [] = { fffffff1, 22, 22, fffffff2, fffffff3, 22, 22, fffffff4,  }
+VTRN/VTRNQ:29:result_int32x4 [] = { fffffff1, 33, 33, fffffff2,  }
+VTRN/VTRNQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTRN/VTRNQ:31:result_uint8x16 [] = { f1, 55, 55, f2, f3, 55, 55, f4, f5, 55, 55, f6, f7, 55, 55, f8,  }
+VTRN/VTRNQ:32:result_uint16x8 [] = { fff1, 66, 66, fff2, fff3, 66, 66, fff4,  }
+VTRN/VTRNQ:33:result_uint32x4 [] = { fffffff1, 77, 77, fffffff2,  }
+VTRN/VTRNQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTRN/VTRNQ:35:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, 42073333 0x1.0e6666p+5 33.8, 42073333 0x1.0e6666p+5 33.8, c1600000 -0x1.cp+3 -14,  }
+
+VUZP/VUZPQ chunk 0 output:
+VUZP/VUZPQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VUZP/VUZPQ:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VUZP/VUZPQ:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VUZP/VUZPQ:3:result_int64x1 [] = { 3333333333333333,  }
+VUZP/VUZPQ:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7,  }
+VUZP/VUZPQ:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VUZP/VUZPQ:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VUZP/VUZPQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VUZP/VUZPQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VUZP/VUZPQ:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VUZP/VUZPQ:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VUZP/VUZPQ:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VUZP/VUZPQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VUZP/VUZPQ:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff,  }
+VUZP/VUZPQ:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7,  }
+VUZP/VUZPQ:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VUZP/VUZPQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VUZP/VUZPQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+
+VUZP/VUZPQ chunk 1 output:
+VUZP/VUZPQ:18:result_int8x8 [] = { fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 11,  }
+VUZP/VUZPQ:19:result_int16x4 [] = { fffffff1, fffffff2, fffffff3, 22,  }
+VUZP/VUZPQ:20:result_int32x2 [] = { fffffff1, 33,  }
+VUZP/VUZPQ:21:result_int64x1 [] = { 3333333333333333,  }
+VUZP/VUZPQ:22:result_uint8x8 [] = { f1, f2, f3, f4, f5, f6, f7, 55,  }
+VUZP/VUZPQ:23:result_uint16x4 [] = { fff1, fff2, fff3, 66,  }
+VUZP/VUZPQ:24:result_uint32x2 [] = { fffffff1, 77,  }
+VUZP/VUZPQ:25:result_uint64x1 [] = { 3333333333333333,  }
+VUZP/VUZPQ:26:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, 42066666 0x1.0cccccp+5 33.6,  }
+VUZP/VUZPQ:27:result_int8x16 [] = { fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 11,  }
+VUZP/VUZPQ:28:result_int16x8 [] = { fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 22,  }
+VUZP/VUZPQ:29:result_int32x4 [] = { fffffff1, fffffff2, fffffff3, 33,  }
+VUZP/VUZPQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VUZP/VUZPQ:31:result_uint8x16 [] = { f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, 55,  }
+VUZP/VUZPQ:32:result_uint16x8 [] = { fff1, fff2, fff3, fff4, fff5, fff6, fff7, 66,  }
+VUZP/VUZPQ:33:result_uint32x4 [] = { fffffff1, fffffff2, fffffff3, 77,  }
+VUZP/VUZPQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VUZP/VUZPQ:35:result_float32x4 [] = { c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13, 42073333 0x1.0e6666p+5 33.8,  }
+
+VZIP/VZIPQ chunk 0 output:
+VZIP/VZIPQ:0:result_int8x8 [] = { fffffff0, fffffff4, 11, 11, fffffff1, fffffff5, 11, 11,  }
+VZIP/VZIPQ:1:result_int16x4 [] = { fffffff0, fffffff2, 22, 22,  }
+VZIP/VZIPQ:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VZIP/VZIPQ:3:result_int64x1 [] = { 3333333333333333,  }
+VZIP/VZIPQ:4:result_uint8x8 [] = { f0, f4, 55, 55, f1, f5, 55, 55,  }
+VZIP/VZIPQ:5:result_uint16x4 [] = { fff0, fff2, 66, 66,  }
+VZIP/VZIPQ:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VZIP/VZIPQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VZIP/VZIPQ:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VZIP/VZIPQ:9:result_int8x16 [] = { fffffff0, fffffff8, 11, 11, fffffff1, fffffff9, 11, 11, fffffff2, fffffffa, 11, 11, fffffff3, fffffffb, 11, 11,  }
+VZIP/VZIPQ:10:result_int16x8 [] = { fffffff0, fffffff4, 22, 22, fffffff1, fffffff5, 22, 22,  }
+VZIP/VZIPQ:11:result_int32x4 [] = { fffffff0, fffffff2, 33, 33,  }
+VZIP/VZIPQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VZIP/VZIPQ:13:result_uint8x16 [] = { f0, f8, 55, 55, f1, f9, 55, 55, f2, fa, 55, 55, f3, fb, 55, 55,  }
+VZIP/VZIPQ:14:result_uint16x8 [] = { fff0, fff4, 66, 66, fff1, fff5, 66, 66,  }
+VZIP/VZIPQ:15:result_uint32x4 [] = { fffffff0, fffffff2, 77, 77,  }
+VZIP/VZIPQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VZIP/VZIPQ:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1600000 -0x1.cp+3 -14, 42073333 0x1.0e6666p+5 33.8, 42073333 0x1.0e6666p+5 33.8,  }
+
+VZIP/VZIPQ chunk 1 output:
+VZIP/VZIPQ:18:result_int8x8 [] = { fffffff4, 11, 11, fffffff1, fffffff5, 11, 11, fffffff2,  }
+VZIP/VZIPQ:19:result_int16x4 [] = { fffffff2, 22, 22, fffffff1,  }
+VZIP/VZIPQ:20:result_int32x2 [] = { fffffff1, 33,  }
+VZIP/VZIPQ:21:result_int64x1 [] = { 3333333333333333,  }
+VZIP/VZIPQ:22:result_uint8x8 [] = { f4, 55, 55, f1, f5, 55, 55, f2,  }
+VZIP/VZIPQ:23:result_uint16x4 [] = { fff2, 66, 66, fff1,  }
+VZIP/VZIPQ:24:result_uint32x2 [] = { fffffff1, 77,  }
+VZIP/VZIPQ:25:result_uint64x1 [] = { 3333333333333333,  }
+VZIP/VZIPQ:26:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, 42066666 0x1.0cccccp+5 33.6,  }
+VZIP/VZIPQ:27:result_int8x16 [] = { fffffff8, 11, 11, fffffff1, fffffff9, 11, 11, fffffff2, fffffffa, 11, 11, fffffff3, fffffffb, 11, 11, fffffff4,  }
+VZIP/VZIPQ:28:result_int16x8 [] = { fffffff4, 22, 22, fffffff1, fffffff5, 22, 22, fffffff2,  }
+VZIP/VZIPQ:29:result_int32x4 [] = { fffffff2, 33, 33, fffffff1,  }
+VZIP/VZIPQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VZIP/VZIPQ:31:result_uint8x16 [] = { f8, 55, 55, f1, f9, 55, 55, f2, fa, 55, 55, f3, fb, 55, 55, f4,  }
+VZIP/VZIPQ:32:result_uint16x8 [] = { fff4, 66, 66, fff1, fff5, 66, 66, fff2,  }
+VZIP/VZIPQ:33:result_uint32x4 [] = { fffffff2, 77, 77, fffffff1,  }
+VZIP/VZIPQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VZIP/VZIPQ:35:result_float32x4 [] = { c1600000 -0x1.cp+3 -14, 42073333 0x1.0e6666p+5 33.8, 42073333 0x1.0e6666p+5 33.8, c1700000 -0x1.ep+3 -15,  }
+
+VREINTERPRET/VREINTERPRETQ output:
+VREINTERPRET/VREINTERPRETQ:0:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:1:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:2:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:3:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VREINTERPRET/VREINTERPRETQ:4:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:5:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:6:result_int8x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:7:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6,  }
+VREINTERPRET/VREINTERPRETQ:8:result_int16x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:9:result_int16x4 [] = { fffffff0, ffffffff, ffffffff, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:10:result_int16x4 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6,  }
+VREINTERPRET/VREINTERPRETQ:11:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VREINTERPRET/VREINTERPRETQ:12:result_int16x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:13:result_int16x4 [] = { fffffff0, ffffffff, ffffffff, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:14:result_int32x2 [] = { f3f2f1f0, f7f6f5f4,  }
+VREINTERPRET/VREINTERPRETQ:15:result_int32x2 [] = { fff1fff0, fff3fff2,  }
+VREINTERPRET/VREINTERPRETQ:16:result_int32x2 [] = { fffffff0, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:17:result_int32x2 [] = { f3f2f1f0, f7f6f5f4,  }
+VREINTERPRET/VREINTERPRETQ:18:result_int32x2 [] = { fff1fff0, fff3fff2,  }
+VREINTERPRET/VREINTERPRETQ:19:result_int32x2 [] = { fffffff0, fffffff1,  }
+VREINTERPRET/VREINTERPRETQ:20:result_int32x2 [] = { fffffff0, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:21:result_int64x1 [] = { f7f6f5f4f3f2f1f0,  }
+VREINTERPRET/VREINTERPRETQ:22:result_int64x1 [] = { fff3fff2fff1fff0,  }
+VREINTERPRET/VREINTERPRETQ:23:result_int64x1 [] = { fffffff1fffffff0,  }
+VREINTERPRET/VREINTERPRETQ:24:result_int64x1 [] = { f7f6f5f4f3f2f1f0,  }
+VREINTERPRET/VREINTERPRETQ:25:result_int64x1 [] = { fff3fff2fff1fff0,  }
+VREINTERPRET/VREINTERPRETQ:26:result_int64x1 [] = { fffffff1fffffff0,  }
+VREINTERPRET/VREINTERPRETQ:27:result_int64x1 [] = { fffffffffffffff0,  }
+VREINTERPRET/VREINTERPRETQ:28:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7,  }
+VREINTERPRET/VREINTERPRETQ:29:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff,  }
+VREINTERPRET/VREINTERPRETQ:30:result_uint8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff,  }
+VREINTERPRET/VREINTERPRETQ:31:result_uint8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff,  }
+VREINTERPRET/VREINTERPRETQ:32:result_uint8x8 [] = { f0, ff, f1, ff, f2, ff, f3, ff,  }
+VREINTERPRET/VREINTERPRETQ:33:result_uint8x8 [] = { f0, ff, ff, ff, f1, ff, ff, ff,  }
+VREINTERPRET/VREINTERPRETQ:34:result_uint8x8 [] = { f0, ff, ff, ff, ff, ff, ff, ff,  }
+VREINTERPRET/VREINTERPRETQ:35:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6,  }
+VREINTERPRET/VREINTERPRETQ:36:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VREINTERPRET/VREINTERPRETQ:37:result_uint16x4 [] = { fff0, ffff, fff1, ffff,  }
+VREINTERPRET/VREINTERPRETQ:38:result_uint16x4 [] = { fff0, ffff, ffff, ffff,  }
+VREINTERPRET/VREINTERPRETQ:39:result_uint16x4 [] = { f1f0, f3f2, f5f4, f7f6,  }
+VREINTERPRET/VREINTERPRETQ:40:result_uint16x4 [] = { fff0, ffff, fff1, ffff,  }
+VREINTERPRET/VREINTERPRETQ:41:result_uint16x4 [] = { fff0, ffff, ffff, ffff,  }
+VREINTERPRET/VREINTERPRETQ:42:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4,  }
+VREINTERPRET/VREINTERPRETQ:43:result_uint32x2 [] = { fff1fff0, fff3fff2,  }
+VREINTERPRET/VREINTERPRETQ:44:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VREINTERPRET/VREINTERPRETQ:45:result_uint32x2 [] = { fffffff0, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:46:result_uint32x2 [] = { f3f2f1f0, f7f6f5f4,  }
+VREINTERPRET/VREINTERPRETQ:47:result_uint32x2 [] = { fff1fff0, fff3fff2,  }
+VREINTERPRET/VREINTERPRETQ:48:result_uint32x2 [] = { fffffff0, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:49:result_uint64x1 [] = { f7f6f5f4f3f2f1f0,  }
+VREINTERPRET/VREINTERPRETQ:50:result_uint64x1 [] = { fff3fff2fff1fff0,  }
+VREINTERPRET/VREINTERPRETQ:51:result_uint64x1 [] = { fffffff1fffffff0,  }
+VREINTERPRET/VREINTERPRETQ:52:result_uint64x1 [] = { fffffffffffffff0,  }
+VREINTERPRET/VREINTERPRETQ:53:result_uint64x1 [] = { f7f6f5f4f3f2f1f0,  }
+VREINTERPRET/VREINTERPRETQ:54:result_uint64x1 [] = { fff3fff2fff1fff0,  }
+VREINTERPRET/VREINTERPRETQ:55:result_uint64x1 [] = { fffffff1fffffff0,  }
+VREINTERPRET/VREINTERPRETQ:56:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:57:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, fffffff2, ffffffff, ffffffff, ffffffff, fffffff3, ffffffff, ffffffff, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:58:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:59:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:60:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:61:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, fffffff2, ffffffff, ffffffff, ffffffff, fffffff3, ffffffff, ffffffff, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:62:result_int8x16 [] = { fffffff0, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:63:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe,  }
+VREINTERPRET/VREINTERPRETQ:64:result_int16x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:65:result_int16x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:66:result_int16x8 [] = { fffff1f0, fffff3f2, fffff5f4, fffff7f6, fffff9f8, fffffbfa, fffffdfc, fffffffe,  }
+VREINTERPRET/VREINTERPRETQ:67:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VREINTERPRET/VREINTERPRETQ:68:result_int16x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:69:result_int16x8 [] = { fffffff0, ffffffff, ffffffff, ffffffff, fffffff1, ffffffff, ffffffff, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:70:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc,  }
+VREINTERPRET/VREINTERPRETQ:71:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6,  }
+VREINTERPRET/VREINTERPRETQ:72:result_int32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:73:result_int32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc,  }
+VREINTERPRET/VREINTERPRETQ:74:result_int32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6,  }
+VREINTERPRET/VREINTERPRETQ:75:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VREINTERPRET/VREINTERPRETQ:76:result_int32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:77:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8,  }
+VREINTERPRET/VREINTERPRETQ:78:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4,  }
+VREINTERPRET/VREINTERPRETQ:79:result_int64x2 [] = { fffffff1fffffff0, fffffff3fffffff2,  }
+VREINTERPRET/VREINTERPRETQ:80:result_int64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8,  }
+VREINTERPRET/VREINTERPRETQ:81:result_int64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4,  }
+VREINTERPRET/VREINTERPRETQ:82:result_int64x2 [] = { fffffff1fffffff0, fffffff3fffffff2,  }
+VREINTERPRET/VREINTERPRETQ:83:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VREINTERPRET/VREINTERPRETQ:84:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe,  }
+VREINTERPRET/VREINTERPRETQ:85:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7,  }
+VREINTERPRET/VREINTERPRETQ:86:result_uint16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff,  }
+VREINTERPRET/VREINTERPRETQ:87:result_uint16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff,  }
+VREINTERPRET/VREINTERPRETQ:88:result_uint16x8 [] = { f1f0, f3f2, f5f4, f7f6, f9f8, fbfa, fdfc, fffe,  }
+VREINTERPRET/VREINTERPRETQ:89:result_uint16x8 [] = { fff0, ffff, fff1, ffff, fff2, ffff, fff3, ffff,  }
+VREINTERPRET/VREINTERPRETQ:90:result_uint16x8 [] = { fff0, ffff, ffff, ffff, fff1, ffff, ffff, ffff,  }
+VREINTERPRET/VREINTERPRETQ:91:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc,  }
+VREINTERPRET/VREINTERPRETQ:92:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6,  }
+VREINTERPRET/VREINTERPRETQ:93:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VREINTERPRET/VREINTERPRETQ:94:result_uint32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:95:result_uint32x4 [] = { f3f2f1f0, f7f6f5f4, fbfaf9f8, fffefdfc,  }
+VREINTERPRET/VREINTERPRETQ:96:result_uint32x4 [] = { fff1fff0, fff3fff2, fff5fff4, fff7fff6,  }
+VREINTERPRET/VREINTERPRETQ:97:result_uint32x4 [] = { fffffff0, ffffffff, fffffff1, ffffffff,  }
+VREINTERPRET/VREINTERPRETQ:98:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8,  }
+VREINTERPRET/VREINTERPRETQ:99:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4,  }
+VREINTERPRET/VREINTERPRETQ:100:result_uint64x2 [] = { fffffff1fffffff0, fffffff3fffffff2,  }
+VREINTERPRET/VREINTERPRETQ:101:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VREINTERPRET/VREINTERPRETQ:102:result_uint64x2 [] = { f7f6f5f4f3f2f1f0, fffefdfcfbfaf9f8,  }
+VREINTERPRET/VREINTERPRETQ:103:result_uint64x2 [] = { fff3fff2fff1fff0, fff7fff6fff5fff4,  }
+VREINTERPRET/VREINTERPRETQ:104:result_uint64x2 [] = { fffffff1fffffff0, fffffff3fffffff2,  }
+VREINTERPRET/VREINTERPRETQ:105:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff,  }
+VREINTERPRET/VREINTERPRETQ:106:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff,  }
+VREINTERPRET/VREINTERPRETQ:107:result_uint8x16 [] = { f0, ff, ff, ff, f1, ff, ff, ff, f2, ff, ff, ff, f3, ff, ff, ff,  }
+VREINTERPRET/VREINTERPRETQ:108:result_uint8x16 [] = { f0, ff, ff, ff, ff, ff, ff, ff, f1, ff, ff, ff, ff, ff, ff, ff,  }
+VREINTERPRET/VREINTERPRETQ:109:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff,  }
+VREINTERPRET/VREINTERPRETQ:110:result_uint8x16 [] = { f0, ff, ff, ff, f1, ff, ff, ff, f2, ff, ff, ff, f3, ff, ff, ff,  }
+VREINTERPRET/VREINTERPRETQ:111:result_uint8x16 [] = { f0, ff, ff, ff, ff, ff, ff, ff, f1, ff, ff, ff, ff, ff, ff, ff,  }
+VREINTERPRET/VREINTERPRETQ:112:result_float32x2 [] = { f3f2f1f0 -0x1.e5e3ep+104 -3.84962e+31, f7f6f5f4 -0x1.edebe8p+112 -1.00179e+34,  }
+VREINTERPRET/VREINTERPRETQ:113:result_float32x2 [] = { fff1fff0 nan nan, fff3fff2 nan nan,  }
+VREINTERPRET/VREINTERPRETQ:114:result_float32x2 [] = { fffffff0 nan nan, fffffff1 nan nan,  }
+VREINTERPRET/VREINTERPRETQ:115:result_float32x2 [] = { fffffff0 nan nan, ffffffff nan nan,  }
+VREINTERPRET/VREINTERPRETQ:116:result_float32x2 [] = { f3f2f1f0 -0x1.e5e3ep+104 -3.84962e+31, f7f6f5f4 -0x1.edebe8p+112 -1.00179e+34,  }
+VREINTERPRET/VREINTERPRETQ:117:result_float32x2 [] = { fff1fff0 nan nan, fff3fff2 nan nan,  }
+VREINTERPRET/VREINTERPRETQ:118:result_float32x2 [] = { fffffff0 nan nan, fffffff1 nan nan,  }
+VREINTERPRET/VREINTERPRETQ:119:result_float32x2 [] = { fffffff0 nan nan, ffffffff nan nan,  }
+VREINTERPRET/VREINTERPRETQ:120:result_float32x4 [] = { f3f2f1f0 -0x1.e5e3ep+104 -3.84962e+31, f7f6f5f4 -0x1.edebe8p+112 -1.00179e+34, fbfaf9f8 -0x1.f5f3fp+120 -2.60629e+36, fffefdfc nan nan,  }
+VREINTERPRET/VREINTERPRETQ:121:result_float32x4 [] = { fff1fff0 nan nan, fff3fff2 nan nan, fff5fff4 nan nan, fff7fff6 nan nan,  }
+VREINTERPRET/VREINTERPRETQ:122:result_float32x4 [] = { fffffff0 nan nan, fffffff1 nan nan, fffffff2 nan nan, fffffff3 nan nan,  }
+VREINTERPRET/VREINTERPRETQ:123:result_float32x4 [] = { fffffff0 nan nan, ffffffff nan nan, fffffff1 nan nan, ffffffff nan nan,  }
+VREINTERPRET/VREINTERPRETQ:124:result_float32x4 [] = { f3f2f1f0 -0x1.e5e3ep+104 -3.84962e+31, f7f6f5f4 -0x1.edebe8p+112 -1.00179e+34, fbfaf9f8 -0x1.f5f3fp+120 -2.60629e+36, fffefdfc nan nan,  }
+VREINTERPRET/VREINTERPRETQ:125:result_float32x4 [] = { fff1fff0 nan nan, fff3fff2 nan nan, fff5fff4 nan nan, fff7fff6 nan nan,  }
+VREINTERPRET/VREINTERPRETQ:126:result_float32x4 [] = { fffffff0 nan nan, fffffff1 nan nan, fffffff2 nan nan, fffffff3 nan nan,  }
+VREINTERPRET/VREINTERPRETQ:127:result_float32x4 [] = { fffffff0 nan nan, ffffffff nan nan, fffffff1 nan nan, ffffffff nan nan,  }
+VREINTERPRET/VREINTERPRETQ:128:result_int8x8 [] = { 0, 0, ffffff80, ffffffc1, 0, 0, 70, ffffffc1,  }
+VREINTERPRET/VREINTERPRETQ:129:result_int16x4 [] = { 0, ffffc180, 0, ffffc170,  }
+VREINTERPRET/VREINTERPRETQ:130:result_int32x2 [] = { c1800000, c1700000,  }
+VREINTERPRET/VREINTERPRETQ:131:result_int64x1 [] = { c1700000c1800000,  }
+VREINTERPRET/VREINTERPRETQ:132:result_uint8x8 [] = { 0, 0, 80, c1, 0, 0, 70, c1,  }
+VREINTERPRET/VREINTERPRETQ:133:result_uint16x4 [] = { 0, c180, 0, c170,  }
+VREINTERPRET/VREINTERPRETQ:134:result_uint32x2 [] = { c1800000, c1700000,  }
+VREINTERPRET/VREINTERPRETQ:135:result_uint64x1 [] = { c1700000c1800000,  }
+VREINTERPRET/VREINTERPRETQ:136:result_int8x16 [] = { 0, 0, ffffff80, ffffffc1, 0, 0, 70, ffffffc1, 0, 0, 60, ffffffc1, 0, 0, 50, ffffffc1,  }
+VREINTERPRET/VREINTERPRETQ:137:result_int16x8 [] = { 0, ffffc180, 0, ffffc170, 0, ffffc160, 0, ffffc150,  }
+VREINTERPRET/VREINTERPRETQ:138:result_int32x4 [] = { c1800000, c1700000, c1600000, c1500000,  }
+VREINTERPRET/VREINTERPRETQ:139:result_int64x2 [] = { c1700000c1800000, c1500000c1600000,  }
+VREINTERPRET/VREINTERPRETQ:140:result_uint8x16 [] = { 0, 0, 80, c1, 0, 0, 70, c1, 0, 0, 60, c1, 0, 0, 50, c1,  }
+VREINTERPRET/VREINTERPRETQ:141:result_uint16x8 [] = { 0, c180, 0, c170, 0, c160, 0, c150,  }
+VREINTERPRET/VREINTERPRETQ:142:result_uint32x4 [] = { c1800000, c1700000, c1600000, c1500000,  }
+VREINTERPRET/VREINTERPRETQ:143:result_uint64x2 [] = { c1700000c1800000, c1500000c1600000,  }
+
+VQRDMULH overflow output:
+VQRDMULH:0:vqrdmulh_s16 Neon overflow 0
+VQRDMULH:1:vqrdmulh_s32 Neon overflow 0
+VQRDMULH:2:vqrdmulhq_s16 Neon overflow 0
+VQRDMULH:3:vqrdmulhq_s32 Neon overflow 0
+
+VQRDMULH output:
+VQRDMULH:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH:5:result_int16x4 [] = { fffffff5, fffffff6, fffffff7, fffffff7,  }
+VQRDMULH:6:result_int32x2 [] = { 0, 0,  }
+VQRDMULH:7:result_int64x1 [] = { 3333333333333333,  }
+VQRDMULH:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRDMULH:10:result_uint32x2 [] = { 33333333, 33333333,  }
+VQRDMULH:11:result_uint64x1 [] = { 3333333333333333,  }
+VQRDMULH:12:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRDMULH:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH:14:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRDMULH:15:result_int32x4 [] = { 0, 0, 0, 0,  }
+VQRDMULH:16:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRDMULH:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRDMULH:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH:21:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRDMULH (check mul overflow) overflow output:
+VQRDMULH:22:vqrdmulh_s16 Neon overflow 1
+VQRDMULH:23:vqrdmulh_s32 Neon overflow 1
+VQRDMULH:24:vqrdmulhq_s16 Neon overflow 1
+VQRDMULH:25:vqrdmulhq_s32 Neon overflow 1
+
+VQRDMULH (check mul overflow) output:
+VQRDMULH:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH:27:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQRDMULH:28:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQRDMULH:29:result_int64x1 [] = { 3333333333333333,  }
+VQRDMULH:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRDMULH:32:result_uint32x2 [] = { 33333333, 33333333,  }
+VQRDMULH:33:result_uint64x1 [] = { 3333333333333333,  }
+VQRDMULH:34:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRDMULH:35:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH:36:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQRDMULH:37:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQRDMULH:38:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH:39:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH:40:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRDMULH:41:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRDMULH:42:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH:43:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRDMULH (check rounding overflow) overflow output:
+VQRDMULH:44:vqrdmulh_s16 Neon overflow 0
+VQRDMULH:45:vqrdmulh_s32 Neon overflow 0
+VQRDMULH:46:vqrdmulhq_s16 Neon overflow 0
+VQRDMULH:47:vqrdmulhq_s32 Neon overflow 0
+
+VQRDMULH (check rounding overflow) output:
+VQRDMULH:48:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH:49:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQRDMULH:50:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQRDMULH:51:result_int64x1 [] = { 3333333333333333,  }
+VQRDMULH:52:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH:53:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRDMULH:54:result_uint32x2 [] = { 33333333, 33333333,  }
+VQRDMULH:55:result_uint64x1 [] = { 3333333333333333,  }
+VQRDMULH:56:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRDMULH:57:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH:58:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQRDMULH:59:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQRDMULH:60:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH:61:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH:62:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRDMULH:63:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRDMULH:64:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH:65:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRDMULH_LANE overflow output:
+VQRDMULH_LANE:0:vqrdmulh_lane_s16 Neon overflow 0
+VQRDMULH_LANE:1:vqrdmulh_lane_s32 Neon overflow 0
+VQRDMULH_LANE:2:vqrdmulhq_lane_s16 Neon overflow 0
+VQRDMULH_LANE:3:vqrdmulhq_lane_s32 Neon overflow 0
+
+VQRDMULH_LANE output:
+VQRDMULH_LANE:4:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_LANE:5:result_int16x4 [] = { 0, 0, 0, 0,  }
+VQRDMULH_LANE:6:result_int32x2 [] = { 0, 0,  }
+VQRDMULH_LANE:7:result_int64x1 [] = { 3333333333333333,  }
+VQRDMULH_LANE:8:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_LANE:9:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRDMULH_LANE:10:result_uint32x2 [] = { 33333333, 33333333,  }
+VQRDMULH_LANE:11:result_uint64x1 [] = { 3333333333333333,  }
+VQRDMULH_LANE:12:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRDMULH_LANE:13:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_LANE:14:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRDMULH_LANE:15:result_int32x4 [] = { 0, 0, 0, 0,  }
+VQRDMULH_LANE:16:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH_LANE:17:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_LANE:18:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRDMULH_LANE:19:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRDMULH_LANE:20:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH_LANE:21:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRDMULH_LANE (check mul overflow) overflow output:
+VQRDMULH_LANE:22:vqrdmulh_lane_s16 Neon overflow 1
+VQRDMULH_LANE:23:vqrdmulh_lane_s32 Neon overflow 1
+VQRDMULH_LANE:24:vqrdmulhq_lane_s16 Neon overflow 1
+VQRDMULH_LANE:25:vqrdmulhq_lane_s32 Neon overflow 1
+
+VQRDMULH_LANE (check mul overflow) output:
+VQRDMULH_LANE:26:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_LANE:27:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQRDMULH_LANE:28:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQRDMULH_LANE:29:result_int64x1 [] = { 3333333333333333,  }
+VQRDMULH_LANE:30:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_LANE:31:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRDMULH_LANE:32:result_uint32x2 [] = { 33333333, 33333333,  }
+VQRDMULH_LANE:33:result_uint64x1 [] = { 3333333333333333,  }
+VQRDMULH_LANE:34:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRDMULH_LANE:35:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_LANE:36:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQRDMULH_LANE:37:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQRDMULH_LANE:38:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH_LANE:39:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_LANE:40:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRDMULH_LANE:41:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRDMULH_LANE:42:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH_LANE:43:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRDMULH_LANE (check rounding overflow) overflow output:
+VQRDMULH_LANE:44:vqrdmulh_lane_s16 Neon overflow 0
+VQRDMULH_LANE:45:vqrdmulh_lane_s32 Neon overflow 0
+VQRDMULH_LANE:46:vqrdmulhq_lane_s16 Neon overflow 0
+VQRDMULH_LANE:47:vqrdmulhq_lane_s32 Neon overflow 0
+
+VQRDMULH_LANE (check rounding overflow) output:
+VQRDMULH_LANE:48:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_LANE:49:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQRDMULH_LANE:50:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQRDMULH_LANE:51:result_int64x1 [] = { 3333333333333333,  }
+VQRDMULH_LANE:52:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_LANE:53:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRDMULH_LANE:54:result_uint32x2 [] = { 33333333, 33333333,  }
+VQRDMULH_LANE:55:result_uint64x1 [] = { 3333333333333333,  }
+VQRDMULH_LANE:56:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRDMULH_LANE:57:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_LANE:58:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQRDMULH_LANE:59:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQRDMULH_LANE:60:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH_LANE:61:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_LANE:62:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRDMULH_LANE:63:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRDMULH_LANE:64:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH_LANE:65:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRDMULH_N overflow output:
+VQRDMULH_N:0:vqrdmulh_n_s16 Neon overflow 0
+VQRDMULH_N:1:vqrdmulh_n_s32 Neon overflow 0
+VQRDMULH_N:2:vqrdmulhq_n_s16 Neon overflow 0
+VQRDMULH_N:3:vqrdmulhq_n_s32 Neon overflow 0
+
+VQRDMULH_N output:
+VQRDMULH_N:4:result_int16x4 [] = { fffffffc, fffffffc, fffffffc, fffffffd,  }
+VQRDMULH_N:5:result_int32x2 [] = { fffffffe, fffffffe,  }
+VQRDMULH_N:6:result_int16x8 [] = { 6, 6, 6, 5, 5, 4, 4, 4,  }
+VQRDMULH_N:7:result_int32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe,  }
+
+VQRDMULH_N (check mul overflow) overflow output:
+VQRDMULH_N:8:vqrdmulh_n_s16 Neon overflow 1
+VQRDMULH_N:9:vqrdmulh_n_s32 Neon overflow 1
+VQRDMULH_N:10:vqrdmulhq_n_s16 Neon overflow 1
+VQRDMULH_N:11:vqrdmulhq_n_s32 Neon overflow 1
+
+VQRDMULH_N (check mul overflow) output:
+VQRDMULH_N:12:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_N:13:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQRDMULH_N:14:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQRDMULH_N:15:result_int64x1 [] = { 3333333333333333,  }
+VQRDMULH_N:16:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_N:17:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRDMULH_N:18:result_uint32x2 [] = { 33333333, 33333333,  }
+VQRDMULH_N:19:result_uint64x1 [] = { 3333333333333333,  }
+VQRDMULH_N:20:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRDMULH_N:21:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_N:22:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQRDMULH_N:23:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQRDMULH_N:24:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH_N:25:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_N:26:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRDMULH_N:27:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRDMULH_N:28:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH_N:29:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRDMULH_N (check rounding overflow) overflow output:
+VQRDMULH_N:30:vqrdmulh_n_s16 Neon overflow 0
+VQRDMULH_N:31:vqrdmulh_n_s32 Neon overflow 0
+VQRDMULH_N:32:vqrdmulhq_n_s16 Neon overflow 0
+VQRDMULH_N:33:vqrdmulhq_n_s32 Neon overflow 0
+
+VQRDMULH_N (check rounding overflow) output:
+VQRDMULH_N:34:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_N:35:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQRDMULH_N:36:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQRDMULH_N:37:result_int64x1 [] = { 3333333333333333,  }
+VQRDMULH_N:38:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_N:39:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRDMULH_N:40:result_uint32x2 [] = { 33333333, 33333333,  }
+VQRDMULH_N:41:result_uint64x1 [] = { 3333333333333333,  }
+VQRDMULH_N:42:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRDMULH_N:43:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_N:44:result_int16x8 [] = { 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff, 7fff,  }
+VQRDMULH_N:45:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+VQRDMULH_N:46:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH_N:47:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRDMULH_N:48:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRDMULH_N:49:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRDMULH_N:50:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRDMULH_N:51:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHL/VQRSHLQ (with input = 0) overflow output:
+VQRSHL/VQRSHLQ:0:vqrshl_s8 Neon overflow 0
+VQRSHL/VQRSHLQ:1:vqrshl_s16 Neon overflow 0
+VQRSHL/VQRSHLQ:2:vqrshl_s32 Neon overflow 0
+VQRSHL/VQRSHLQ:3:vqrshl_s64 Neon overflow 0
+VQRSHL/VQRSHLQ:4:vqrshl_u8 Neon overflow 0
+VQRSHL/VQRSHLQ:5:vqrshl_u16 Neon overflow 0
+VQRSHL/VQRSHLQ:6:vqrshl_u32 Neon overflow 0
+VQRSHL/VQRSHLQ:7:vqrshl_u64 Neon overflow 0
+VQRSHL/VQRSHLQ:8:vqrshlq_s8 Neon overflow 0
+VQRSHL/VQRSHLQ:9:vqrshlq_s16 Neon overflow 0
+VQRSHL/VQRSHLQ:10:vqrshlq_s32 Neon overflow 0
+VQRSHL/VQRSHLQ:11:vqrshlq_s64 Neon overflow 0
+VQRSHL/VQRSHLQ:12:vqrshlq_u8 Neon overflow 0
+VQRSHL/VQRSHLQ:13:vqrshlq_u16 Neon overflow 0
+VQRSHL/VQRSHLQ:14:vqrshlq_u32 Neon overflow 0
+VQRSHL/VQRSHLQ:15:vqrshlq_u64 Neon overflow 0
+
+VQRSHL/VQRSHLQ (with input = 0) output:
+VQRSHL/VQRSHLQ:16:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:17:result_int16x4 [] = { 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:18:result_int32x2 [] = { 0, 0,  }
+VQRSHL/VQRSHLQ:19:result_int64x1 [] = { 0,  }
+VQRSHL/VQRSHLQ:20:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:21:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:22:result_uint32x2 [] = { 0, 0,  }
+VQRSHL/VQRSHLQ:23:result_uint64x1 [] = { 0,  }
+VQRSHL/VQRSHLQ:24:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHL/VQRSHLQ:25:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:26:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:27:result_int32x4 [] = { 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:28:result_int64x2 [] = { 0, 0,  }
+VQRSHL/VQRSHLQ:29:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:30:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:31:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:32:result_uint64x2 [] = { 0, 0,  }
+VQRSHL/VQRSHLQ:33:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHL/VQRSHLQ (input 0 and negative shift amount) overflow output:
+VQRSHL/VQRSHLQ:34:vqrshl_s8 Neon overflow 0
+VQRSHL/VQRSHLQ:35:vqrshl_s16 Neon overflow 0
+VQRSHL/VQRSHLQ:36:vqrshl_s32 Neon overflow 0
+VQRSHL/VQRSHLQ:37:vqrshl_s64 Neon overflow 0
+VQRSHL/VQRSHLQ:38:vqrshl_u8 Neon overflow 0
+VQRSHL/VQRSHLQ:39:vqrshl_u16 Neon overflow 0
+VQRSHL/VQRSHLQ:40:vqrshl_u32 Neon overflow 0
+VQRSHL/VQRSHLQ:41:vqrshl_u64 Neon overflow 0
+VQRSHL/VQRSHLQ:42:vqrshlq_s8 Neon overflow 0
+VQRSHL/VQRSHLQ:43:vqrshlq_s16 Neon overflow 0
+VQRSHL/VQRSHLQ:44:vqrshlq_s32 Neon overflow 0
+VQRSHL/VQRSHLQ:45:vqrshlq_s64 Neon overflow 0
+VQRSHL/VQRSHLQ:46:vqrshlq_u8 Neon overflow 0
+VQRSHL/VQRSHLQ:47:vqrshlq_u16 Neon overflow 0
+VQRSHL/VQRSHLQ:48:vqrshlq_u32 Neon overflow 0
+VQRSHL/VQRSHLQ:49:vqrshlq_u64 Neon overflow 0
+
+VQRSHL/VQRSHLQ (input 0 and negative shift amount) output:
+VQRSHL/VQRSHLQ:50:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:51:result_int16x4 [] = { 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:52:result_int32x2 [] = { 0, 0,  }
+VQRSHL/VQRSHLQ:53:result_int64x1 [] = { 0,  }
+VQRSHL/VQRSHLQ:54:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:55:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:56:result_uint32x2 [] = { 0, 0,  }
+VQRSHL/VQRSHLQ:57:result_uint64x1 [] = { 0,  }
+VQRSHL/VQRSHLQ:58:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHL/VQRSHLQ:59:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:60:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:61:result_int32x4 [] = { 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:62:result_int64x2 [] = { 0, 0,  }
+VQRSHL/VQRSHLQ:63:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:64:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:65:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:66:result_uint64x2 [] = { 0, 0,  }
+VQRSHL/VQRSHLQ:67:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHL/VQRSHLQ overflow output:
+VQRSHL/VQRSHLQ:68:vqrshl_s8 Neon overflow 0
+VQRSHL/VQRSHLQ:69:vqrshl_s16 Neon overflow 0
+VQRSHL/VQRSHLQ:70:vqrshl_s32 Neon overflow 0
+VQRSHL/VQRSHLQ:71:vqrshl_s64 Neon overflow 0
+VQRSHL/VQRSHLQ:72:vqrshl_u8 Neon overflow 1
+VQRSHL/VQRSHLQ:73:vqrshl_u16 Neon overflow 1
+VQRSHL/VQRSHLQ:74:vqrshl_u32 Neon overflow 1
+VQRSHL/VQRSHLQ:75:vqrshl_u64 Neon overflow 1
+VQRSHL/VQRSHLQ:76:vqrshlq_s8 Neon overflow 1
+VQRSHL/VQRSHLQ:77:vqrshlq_s16 Neon overflow 1
+VQRSHL/VQRSHLQ:78:vqrshlq_s32 Neon overflow 1
+VQRSHL/VQRSHLQ:79:vqrshlq_s64 Neon overflow 1
+VQRSHL/VQRSHLQ:80:vqrshlq_u8 Neon overflow 1
+VQRSHL/VQRSHLQ:81:vqrshlq_u16 Neon overflow 1
+VQRSHL/VQRSHLQ:82:vqrshlq_u32 Neon overflow 1
+VQRSHL/VQRSHLQ:83:vqrshlq_u64 Neon overflow 1
+
+VQRSHL/VQRSHLQ output:
+VQRSHL/VQRSHLQ:84:result_int8x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee,  }
+VQRSHL/VQRSHLQ:85:result_int16x4 [] = { ffffff80, ffffff88, ffffff90, ffffff98,  }
+VQRSHL/VQRSHLQ:86:result_int32x2 [] = { fffff000, fffff100,  }
+VQRSHL/VQRSHLQ:87:result_int64x1 [] = { ffffffffffffff80,  }
+VQRSHL/VQRSHLQ:88:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQRSHL/VQRSHLQ:89:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQRSHL/VQRSHLQ:90:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQRSHL/VQRSHLQ:91:result_uint64x1 [] = { ffffffffffffffff,  }
+VQRSHL/VQRSHLQ:92:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHL/VQRSHLQ:93:result_int8x16 [] = { ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80, ffffff80,  }
+VQRSHL/VQRSHLQ:94:result_int16x8 [] = { ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000, ffff8000,  }
+VQRSHL/VQRSHLQ:95:result_int32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+VQRSHL/VQRSHLQ:96:result_int64x2 [] = { 8000000000000000, 8000000000000000,  }
+VQRSHL/VQRSHLQ:97:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQRSHL/VQRSHLQ:98:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff,  }
+VQRSHL/VQRSHLQ:99:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQRSHL/VQRSHLQ:100:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+VQRSHL/VQRSHLQ:101:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHL/VQRSHLQ (negative shift amount) overflow output:
+VQRSHL/VQRSHLQ:102:vqrshl_s8 Neon overflow 0
+VQRSHL/VQRSHLQ:103:vqrshl_s16 Neon overflow 0
+VQRSHL/VQRSHLQ:104:vqrshl_s32 Neon overflow 0
+VQRSHL/VQRSHLQ:105:vqrshl_s64 Neon overflow 0
+VQRSHL/VQRSHLQ:106:vqrshl_u8 Neon overflow 0
+VQRSHL/VQRSHLQ:107:vqrshl_u16 Neon overflow 0
+VQRSHL/VQRSHLQ:108:vqrshl_u32 Neon overflow 0
+VQRSHL/VQRSHLQ:109:vqrshl_u64 Neon overflow 0
+VQRSHL/VQRSHLQ:110:vqrshlq_s8 Neon overflow 0
+VQRSHL/VQRSHLQ:111:vqrshlq_s16 Neon overflow 0
+VQRSHL/VQRSHLQ:112:vqrshlq_s32 Neon overflow 0
+VQRSHL/VQRSHLQ:113:vqrshlq_s64 Neon overflow 0
+VQRSHL/VQRSHLQ:114:vqrshlq_u8 Neon overflow 0
+VQRSHL/VQRSHLQ:115:vqrshlq_u16 Neon overflow 0
+VQRSHL/VQRSHLQ:116:vqrshlq_u32 Neon overflow 0
+VQRSHL/VQRSHLQ:117:vqrshlq_u64 Neon overflow 0
+
+VQRSHL/VQRSHLQ (negative shift amount) output:
+VQRSHL/VQRSHLQ:118:result_int8x8 [] = { fffffffc, fffffffc, fffffffd, fffffffd, fffffffd, fffffffd, fffffffe, fffffffe,  }
+VQRSHL/VQRSHLQ:119:result_int16x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd,  }
+VQRSHL/VQRSHLQ:120:result_int32x2 [] = { fffffffe, fffffffe,  }
+VQRSHL/VQRSHLQ:121:result_int64x1 [] = { ffffffffffffffff,  }
+VQRSHL/VQRSHLQ:122:result_uint8x8 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e,  }
+VQRSHL/VQRSHLQ:123:result_uint16x4 [] = { 3ffc, 3ffc, 3ffd, 3ffd,  }
+VQRSHL/VQRSHLQ:124:result_uint32x2 [] = { 1ffffffe, 1ffffffe,  }
+VQRSHL/VQRSHLQ:125:result_uint64x1 [] = { fffffffffffffff,  }
+VQRSHL/VQRSHLQ:126:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHL/VQRSHLQ:127:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:128:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:129:result_int32x4 [] = { 0, 0, 0, 0,  }
+VQRSHL/VQRSHLQ:130:result_int64x2 [] = { 0, 0,  }
+VQRSHL/VQRSHLQ:131:result_uint8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  }
+VQRSHL/VQRSHLQ:132:result_uint16x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20,  }
+VQRSHL/VQRSHLQ:133:result_uint32x4 [] = { 80000, 80000, 80000, 80000,  }
+VQRSHL/VQRSHLQ:134:result_uint64x2 [] = { 100000000000, 100000000000,  }
+VQRSHL/VQRSHLQ:135:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHL/VQRSHLQ (checking overflow: shift by -1) overflow output:
+VQRSHL/VQRSHLQ:136:vqrshl_s8 Neon overflow 0
+VQRSHL/VQRSHLQ:137:vqrshl_s16 Neon overflow 0
+VQRSHL/VQRSHLQ:138:vqrshl_s32 Neon overflow 0
+VQRSHL/VQRSHLQ:139:vqrshl_s64 Neon overflow 0
+VQRSHL/VQRSHLQ:140:vqrshl_u8 Neon overflow 0
+VQRSHL/VQRSHLQ:141:vqrshl_u16 Neon overflow 0
+VQRSHL/VQRSHLQ:142:vqrshl_u32 Neon overflow 0
+VQRSHL/VQRSHLQ:143:vqrshl_u64 Neon overflow 0
+VQRSHL/VQRSHLQ:144:vqrshlq_s8 Neon overflow 0
+VQRSHL/VQRSHLQ:145:vqrshlq_s16 Neon overflow 0
+VQRSHL/VQRSHLQ:146:vqrshlq_s32 Neon overflow 0
+VQRSHL/VQRSHLQ:147:vqrshlq_s64 Neon overflow 0
+VQRSHL/VQRSHLQ:148:vqrshlq_u8 Neon overflow 0
+VQRSHL/VQRSHLQ:149:vqrshlq_u16 Neon overflow 0
+VQRSHL/VQRSHLQ:150:vqrshlq_u32 Neon overflow 0
+VQRSHL/VQRSHLQ:151:vqrshlq_u64 Neon overflow 0
+
+VQRSHL/VQRSHLQ (checking overflow: shift by -1) output:
+VQRSHL/VQRSHLQ:152:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40,  }
+VQRSHL/VQRSHLQ:153:result_int16x4 [] = { 4000, 4000, 4000, 4000,  }
+VQRSHL/VQRSHLQ:154:result_int32x2 [] = { 40000000, 40000000,  }
+VQRSHL/VQRSHLQ:155:result_int64x1 [] = { 4000000000000000,  }
+VQRSHL/VQRSHLQ:156:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80,  }
+VQRSHL/VQRSHLQ:157:result_uint16x4 [] = { 8000, 8000, 8000, 8000,  }
+VQRSHL/VQRSHLQ:158:result_uint32x2 [] = { 80000000, 80000000,  }
+VQRSHL/VQRSHLQ:159:result_uint64x1 [] = { 8000000000000000,  }
+VQRSHL/VQRSHLQ:160:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHL/VQRSHLQ:161:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,  }
+VQRSHL/VQRSHLQ:162:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000,  }
+VQRSHL/VQRSHLQ:163:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000,  }
+VQRSHL/VQRSHLQ:164:result_int64x2 [] = { 4000000000000000, 4000000000000000,  }
+VQRSHL/VQRSHLQ:165:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80,  }
+VQRSHL/VQRSHLQ:166:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000,  }
+VQRSHL/VQRSHLQ:167:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+VQRSHL/VQRSHLQ:168:result_uint64x2 [] = { 8000000000000000, 8000000000000000,  }
+VQRSHL/VQRSHLQ:169:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHL/VQRSHLQ (checking overflow: shift by -3) overflow output:
+VQRSHL/VQRSHLQ:170:vqrshl_s8 Neon overflow 0
+VQRSHL/VQRSHLQ:171:vqrshl_s16 Neon overflow 0
+VQRSHL/VQRSHLQ:172:vqrshl_s32 Neon overflow 0
+VQRSHL/VQRSHLQ:173:vqrshl_s64 Neon overflow 0
+VQRSHL/VQRSHLQ:174:vqrshl_u8 Neon overflow 0
+VQRSHL/VQRSHLQ:175:vqrshl_u16 Neon overflow 0
+VQRSHL/VQRSHLQ:176:vqrshl_u32 Neon overflow 0
+VQRSHL/VQRSHLQ:177:vqrshl_u64 Neon overflow 0
+VQRSHL/VQRSHLQ:178:vqrshlq_s8 Neon overflow 0
+VQRSHL/VQRSHLQ:179:vqrshlq_s16 Neon overflow 0
+VQRSHL/VQRSHLQ:180:vqrshlq_s32 Neon overflow 0
+VQRSHL/VQRSHLQ:181:vqrshlq_s64 Neon overflow 0
+VQRSHL/VQRSHLQ:182:vqrshlq_u8 Neon overflow 0
+VQRSHL/VQRSHLQ:183:vqrshlq_u16 Neon overflow 0
+VQRSHL/VQRSHLQ:184:vqrshlq_u32 Neon overflow 0
+VQRSHL/VQRSHLQ:185:vqrshlq_u64 Neon overflow 0
+
+VQRSHL/VQRSHLQ (checking overflow: shift by -3) output:
+VQRSHL/VQRSHLQ:186:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10,  }
+VQRSHL/VQRSHLQ:187:result_int16x4 [] = { 1000, 1000, 1000, 1000,  }
+VQRSHL/VQRSHLQ:188:result_int32x2 [] = { 10000000, 10000000,  }
+VQRSHL/VQRSHLQ:189:result_int64x1 [] = { 1000000000000000,  }
+VQRSHL/VQRSHLQ:190:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20,  }
+VQRSHL/VQRSHLQ:191:result_uint16x4 [] = { 2000, 2000, 2000, 2000,  }
+VQRSHL/VQRSHLQ:192:result_uint32x2 [] = { 20000000, 20000000,  }
+VQRSHL/VQRSHLQ:193:result_uint64x1 [] = { 2000000000000000,  }
+VQRSHL/VQRSHLQ:194:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHL/VQRSHLQ:195:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  }
+VQRSHL/VQRSHLQ:196:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,  }
+VQRSHL/VQRSHLQ:197:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000,  }
+VQRSHL/VQRSHLQ:198:result_int64x2 [] = { 1000000000000000, 1000000000000000,  }
+VQRSHL/VQRSHLQ:199:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,  }
+VQRSHL/VQRSHLQ:200:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,  }
+VQRSHL/VQRSHLQ:201:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000,  }
+VQRSHL/VQRSHLQ:202:result_uint64x2 [] = { 2000000000000000, 2000000000000000,  }
+VQRSHL/VQRSHLQ:203:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VABA/VABAQ output:
+VABA/VABAQ:0:result_int8x8 [] = { fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd,  }
+VABA/VABAQ:1:result_int16x4 [] = { 16, 17, 18, 19,  }
+VABA/VABAQ:2:result_int32x2 [] = { 20, 21,  }
+VABA/VABAQ:3:result_int64x1 [] = { 3333333333333333,  }
+VABA/VABAQ:4:result_uint8x8 [] = { 53, 54, 55, 56, 57, 58, 59, 5a,  }
+VABA/VABAQ:5:result_uint16x4 [] = { 907, 908, 909, 90a,  }
+VABA/VABAQ:6:result_uint32x2 [] = { ffffffe7, ffffffe8,  }
+VABA/VABAQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VABA/VABAQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VABA/VABAQ:9:result_int8x16 [] = { 5e, 5f, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 6a, 6b, 6c, 6d,  }
+VABA/VABAQ:10:result_int16x8 [] = { b9c, b9d, b9e, b9f, ba0, ba1, ba2, ba3,  }
+VABA/VABAQ:11:result_int32x4 [] = { 26e0, 26e1, 26e2, 26e3,  }
+VABA/VABAQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VABA/VABAQ:13:result_uint8x16 [] = { f8, f9, fa, fb, fc, fd, fe, ff, 0, 1, 2, 3, 4, 5, 6, 7,  }
+VABA/VABAQ:14:result_uint16x8 [] = { fff9, fffa, fffb, fffc, fffd, fffe, ffff, 0,  }
+VABA/VABAQ:15:result_uint32x4 [] = { c, d, e, f,  }
+VABA/VABAQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VABA/VABAQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VABAL output:
+VABAL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VABAL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VABAL:2:result_int32x2 [] = { 33333333, 33333333,  }
+VABAL:3:result_int64x1 [] = { 3333333333333333,  }
+VABAL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VABAL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VABAL:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VABAL:7:result_uint64x1 [] = { 3333333333333333,  }
+VABAL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VABAL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VABAL:10:result_int16x8 [] = { fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd,  }
+VABAL:11:result_int32x4 [] = { 16, 17, 18, 19,  }
+VABAL:12:result_int64x2 [] = { 20, 21,  }
+VABAL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VABAL:14:result_uint16x8 [] = { 53, 54, 55, 56, 57, 58, 59, 5a,  }
+VABAL:15:result_uint32x4 [] = { 907, 908, 909, 90a,  }
+VABAL:16:result_uint64x2 [] = { ffffffe7, ffffffe8,  }
+VABAL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VABD/VABDQ output:
+VABD/VABDQ:0:result_int8x8 [] = { 11, 10, f, e, d, c, b, a,  }
+VABD/VABDQ:1:result_int16x4 [] = { 3, 2, 1, 0,  }
+VABD/VABDQ:2:result_int32x2 [] = { 18, 17,  }
+VABD/VABDQ:3:result_int64x1 [] = { 3333333333333333,  }
+VABD/VABDQ:4:result_uint8x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6,  }
+VABD/VABDQ:5:result_uint16x4 [] = { ffe3, ffe4, ffe5, ffe6,  }
+VABD/VABDQ:6:result_uint32x2 [] = { ffffffe8, ffffffe9,  }
+VABD/VABDQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VABD/VABDQ:8:result_float32x2 [] = { 41c26666 0x1.84ccccp+4 24.3, 41ba6666 0x1.74ccccp+4 23.3,  }
+VABD/VABDQ:9:result_int8x16 [] = { 1a, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, f, e, d, c, b,  }
+VABD/VABDQ:10:result_int16x8 [] = { 4, 3, 2, 1, 0, 1, 2, 3,  }
+VABD/VABDQ:11:result_int32x4 [] = { 30, 2f, 2e, 2d,  }
+VABD/VABDQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VABD/VABDQ:13:result_uint8x16 [] = { e6, e7, e8, e9, ea, eb, ec, ed, ee, ef, f0, f1, f2, f3, f4, f5,  }
+VABD/VABDQ:14:result_uint16x8 [] = { ffe4, ffe5, ffe6, ffe7, ffe8, ffe9, ffea, ffeb,  }
+VABD/VABDQ:15:result_uint32x4 [] = { ffffffd0, ffffffd1, ffffffd2, ffffffd3,  }
+VABD/VABDQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VABD/VABDQ:17:result_float32x4 [] = { 42407ae1 0x1.80f5c2p+5 48.12, 423c7ae1 0x1.78f5c2p+5 47.12, 42387ae1 0x1.70f5c2p+5 46.12, 42347ae1 0x1.68f5c2p+5 45.12,  }
+
+VABDL output:
+VABDL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VABDL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VABDL:2:result_int32x2 [] = { 33333333, 33333333,  }
+VABDL:3:result_int64x1 [] = { 3333333333333333,  }
+VABDL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VABDL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VABDL:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VABDL:7:result_uint64x1 [] = { 3333333333333333,  }
+VABDL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VABDL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VABDL:10:result_int16x8 [] = { 11, 10, f, e, d, c, b, a,  }
+VABDL:11:result_int32x4 [] = { 3, 2, 1, 0,  }
+VABDL:12:result_int64x2 [] = { 18, 17,  }
+VABDL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VABDL:14:result_uint16x8 [] = { ef, f0, f1, f2, f3, f4, f5, f6,  }
+VABDL:15:result_uint32x4 [] = { ffe3, ffe4, ffe5, ffe6,  }
+VABDL:16:result_uint64x2 [] = { ffffffe8, ffffffe9,  }
+VABDL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VAND/VANDQ output:
+VAND/VANDQ:0:result_int8x8 [] = { 0, 0, 2, 2, 0, 0, 2, 2,  }
+VAND/VANDQ:1:result_int16x4 [] = { fffffff0, fffffff0, fffffff0, fffffff0,  }
+VAND/VANDQ:2:result_int32x2 [] = { 0, 1,  }
+VAND/VANDQ:3:result_int64x1 [] = { 60,  }
+VAND/VANDQ:4:result_uint8x8 [] = { 10, 10, 10, 10, 14, 14, 14, 14,  }
+VAND/VANDQ:5:result_uint16x4 [] = { 10, 10, 12, 12,  }
+VAND/VANDQ:6:result_uint32x2 [] = { 20, 20,  }
+VAND/VANDQ:7:result_uint64x1 [] = { 0,  }
+VAND/VANDQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VAND/VANDQ:9:result_int8x16 [] = { fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6, fffffff0, fffffff0, fffffff2, fffffff2, fffffff4, fffffff4, fffffff6, fffffff6,  }
+VAND/VANDQ:10:result_int16x8 [] = { ffffffe0, ffffffe0, ffffffe0, ffffffe0, ffffffe4, ffffffe4, ffffffe4, ffffffe4,  }
+VAND/VANDQ:11:result_int32x4 [] = { ffffffe0, ffffffe0, ffffffe2, ffffffe2,  }
+VAND/VANDQ:12:result_int64x2 [] = { 10, 10,  }
+VAND/VANDQ:13:result_uint8x16 [] = { 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, c, c, c, c,  }
+VAND/VANDQ:14:result_uint16x8 [] = { 0, 1, 2, 3, 0, 1, 2, 3,  }
+VAND/VANDQ:15:result_uint32x4 [] = { 30, 31, 32, 33,  }
+VAND/VANDQ:16:result_uint64x2 [] = { 0, 1,  }
+VAND/VANDQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VORR/VORRQ output:
+VORR/VORRQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff2, fffffff3, fffffff6, fffffff7, fffffff6, fffffff7,  }
+VORR/VORRQ:1:result_int16x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff,  }
+VORR/VORRQ:2:result_int32x2 [] = { fffffff3, fffffff3,  }
+VORR/VORRQ:3:result_int64x1 [] = { fffffffffffffff4,  }
+VORR/VORRQ:4:result_uint8x8 [] = { f4, f5, f6, f7, f4, f5, f6, f7,  }
+VORR/VORRQ:5:result_uint16x4 [] = { fffe, ffff, fffe, ffff,  }
+VORR/VORRQ:6:result_uint32x2 [] = { fffffff8, fffffff9,  }
+VORR/VORRQ:7:result_uint64x1 [] = { fffffffffffffff2,  }
+VORR/VORRQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VORR/VORRQ:9:result_int8x16 [] = { fffffff6, fffffff7, fffffff6, fffffff7, fffffff6, fffffff7, fffffff6, fffffff7, fffffffe, ffffffff, fffffffe, ffffffff, fffffffe, ffffffff, fffffffe, ffffffff,  }
+VORR/VORRQ:10:result_int16x8 [] = { fffffffc, fffffffd, fffffffe, ffffffff, fffffffc, fffffffd, fffffffe, ffffffff,  }
+VORR/VORRQ:11:result_int32x4 [] = { fffffff2, fffffff3, fffffff2, fffffff3,  }
+VORR/VORRQ:12:result_int64x2 [] = { fffffffffffffff8, fffffffffffffff9,  }
+VORR/VORRQ:13:result_uint8x16 [] = { fc, fd, fe, ff, fc, fd, fe, ff, fc, fd, fe, ff, fc, fd, fe, ff,  }
+VORR/VORRQ:14:result_uint16x8 [] = { fff3, fff3, fff3, fff3, fff7, fff7, fff7, fff7,  }
+VORR/VORRQ:15:result_uint32x4 [] = { fffffff7, fffffff7, fffffff7, fffffff7,  }
+VORR/VORRQ:16:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff3,  }
+VORR/VORRQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VORN/VORNQ output:
+VORN/VORNQ:0:result_int8x8 [] = { fffffffd, fffffffd, ffffffff, ffffffff, fffffffd, fffffffd, ffffffff, ffffffff,  }
+VORN/VORNQ:1:result_int16x4 [] = { fffffff3, fffffff3, fffffff3, fffffff3,  }
+VORN/VORNQ:2:result_int32x2 [] = { fffffffc, fffffffd,  }
+VORN/VORNQ:3:result_int64x1 [] = { fffffffffffffffb,  }
+VORN/VORNQ:4:result_uint8x8 [] = { fb, fb, fb, fb, ff, ff, ff, ff,  }
+VORN/VORNQ:5:result_uint16x4 [] = { fff1, fff1, fff3, fff3,  }
+VORN/VORNQ:6:result_uint32x2 [] = { fffffff7, fffffff7,  }
+VORN/VORNQ:7:result_uint64x1 [] = { fffffffffffffffd,  }
+VORN/VORNQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VORN/VORNQ:9:result_int8x16 [] = { fffffff9, fffffff9, fffffffb, fffffffb, fffffffd, fffffffd, ffffffff, ffffffff, fffffff9, fffffff9, fffffffb, fffffffb, fffffffd, fffffffd, ffffffff, ffffffff,  }
+VORN/VORNQ:10:result_int16x8 [] = { fffffff3, fffffff3, fffffff3, fffffff3, fffffff7, fffffff7, fffffff7, fffffff7,  }
+VORN/VORNQ:11:result_int32x4 [] = { fffffffd, fffffffd, ffffffff, ffffffff,  }
+VORN/VORNQ:12:result_int64x2 [] = { fffffffffffffff7, fffffffffffffff7,  }
+VORN/VORNQ:13:result_uint8x16 [] = { f3, f3, f3, f3, f7, f7, f7, f7, fb, fb, fb, fb, ff, ff, ff, ff,  }
+VORN/VORNQ:14:result_uint16x8 [] = { fffc, fffd, fffe, ffff, fffc, fffd, fffe, ffff,  }
+VORN/VORNQ:15:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb,  }
+VORN/VORNQ:16:result_uint64x2 [] = { fffffffffffffffc, fffffffffffffffd,  }
+VORN/VORNQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VEOR/VEORQ output:
+VEOR/VEORQ:0:result_int8x8 [] = { fffffff2, fffffff3, fffffff0, fffffff1, fffffff6, fffffff7, fffffff4, fffffff5,  }
+VEOR/VEORQ:1:result_int16x4 [] = { c, d, e, f,  }
+VEOR/VEORQ:2:result_int32x2 [] = { fffffff3, fffffff2,  }
+VEOR/VEORQ:3:result_int64x1 [] = { ffffffffffffff94,  }
+VEOR/VEORQ:4:result_uint8x8 [] = { e4, e5, e6, e7, e0, e1, e2, e3,  }
+VEOR/VEORQ:5:result_uint16x4 [] = { ffee, ffef, ffec, ffed,  }
+VEOR/VEORQ:6:result_uint32x2 [] = { ffffffd8, ffffffd9,  }
+VEOR/VEORQ:7:result_uint64x1 [] = { fffffffffffffff2,  }
+VEOR/VEORQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VEOR/VEORQ:9:result_int8x16 [] = { 6, 7, 4, 5, 2, 3, 0, 1, e, f, c, d, a, b, 8, 9,  }
+VEOR/VEORQ:10:result_int16x8 [] = { 1c, 1d, 1e, 1f, 18, 19, 1a, 1b,  }
+VEOR/VEORQ:11:result_int32x4 [] = { 12, 13, 10, 11,  }
+VEOR/VEORQ:12:result_int64x2 [] = { ffffffffffffffe8, ffffffffffffffe9,  }
+VEOR/VEORQ:13:result_uint8x16 [] = { fc, fd, fe, ff, f8, f9, fa, fb, f4, f5, f6, f7, f0, f1, f2, f3,  }
+VEOR/VEORQ:14:result_uint16x8 [] = { fff3, fff2, fff1, fff0, fff7, fff6, fff5, fff4,  }
+VEOR/VEORQ:15:result_uint32x4 [] = { ffffffc7, ffffffc6, ffffffc5, ffffffc4,  }
+VEOR/VEORQ:16:result_uint64x2 [] = { fffffffffffffff3, fffffffffffffff2,  }
+VEOR/VEORQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VBIC/VBICQ output:
+VBIC/VBICQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff4, fffffff5, fffffff4, fffffff5,  }
+VBIC/VBICQ:1:result_int16x4 [] = { 0, 1, 2, 3,  }
+VBIC/VBICQ:2:result_int32x2 [] = { fffffff0, fffffff0,  }
+VBIC/VBICQ:3:result_int64x1 [] = { ffffffffffffff90,  }
+VBIC/VBICQ:4:result_uint8x8 [] = { e0, e1, e2, e3, e0, e1, e2, e3,  }
+VBIC/VBICQ:5:result_uint16x4 [] = { ffe0, ffe1, ffe0, ffe1,  }
+VBIC/VBICQ:6:result_uint32x2 [] = { ffffffd0, ffffffd1,  }
+VBIC/VBICQ:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VBIC/VBICQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VBIC/VBICQ:9:result_int8x16 [] = { 0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9,  }
+VBIC/VBICQ:10:result_int16x8 [] = { 10, 11, 12, 13, 10, 11, 12, 13,  }
+VBIC/VBICQ:11:result_int32x4 [] = { 10, 11, 10, 11,  }
+VBIC/VBICQ:12:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1,  }
+VBIC/VBICQ:13:result_uint8x16 [] = { f0, f1, f2, f3, f0, f1, f2, f3, f0, f1, f2, f3, f0, f1, f2, f3,  }
+VBIC/VBICQ:14:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff4, fff4, fff4, fff4,  }
+VBIC/VBICQ:15:result_uint32x4 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0,  }
+VBIC/VBICQ:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff0,  }
+VBIC/VBICQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VCREATE output:
+VCREATE:0:result_int8x8 [] = { fffffff0, ffffffde, ffffffbc, ffffff9a, 78, 56, 34, 12,  }
+VCREATE:1:result_int16x4 [] = { ffffdef0, ffff9abc, 5678, 1234,  }
+VCREATE:2:result_int32x2 [] = { 9abcdef0, 12345678,  }
+VCREATE:3:result_int64x1 [] = { 123456789abcdef0,  }
+VCREATE:4:result_uint8x8 [] = { f0, de, bc, 9a, 78, 56, 34, 12,  }
+VCREATE:5:result_uint16x4 [] = { def0, 9abc, 5678, 1234,  }
+VCREATE:6:result_uint32x2 [] = { 9abcdef0, 12345678,  }
+VCREATE:7:result_uint64x1 [] = { 123456789abcdef0,  }
+VCREATE:8:result_float32x2 [] = { 9abcdef0 -0x1.79bdep-74 -7.81152e-23, 12345678 0x1.68acfp-91 5.69046e-28,  }
+VCREATE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VCREATE:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VCREATE:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VCREATE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VCREATE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VCREATE:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VCREATE:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VCREATE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VCREATE:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VLD2_LANE/VLD2Q_LANE chunk 0 output:
+VLD2_LANE/VLD2Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa,  }
+VLD2_LANE/VLD2Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD2_LANE/VLD2Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VLD2_LANE/VLD2Q_LANE:3:result_int64x1 [] = { 3333333333333333,  }
+VLD2_LANE/VLD2Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa,  }
+VLD2_LANE/VLD2Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa,  }
+VLD2_LANE/VLD2Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa,  }
+VLD2_LANE/VLD2Q_LANE:7:result_uint64x1 [] = { 3333333333333333,  }
+VLD2_LANE/VLD2Q_LANE:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VLD2_LANE/VLD2Q_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD2_LANE/VLD2Q_LANE:10:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD2_LANE/VLD2Q_LANE:11:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa,  }
+VLD2_LANE/VLD2Q_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD2_LANE/VLD2Q_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD2_LANE/VLD2Q_LANE:14:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa,  }
+VLD2_LANE/VLD2Q_LANE:15:result_uint32x4 [] = { fffffff0, fffffff1, aaaaaaaa, aaaaaaaa,  }
+VLD2_LANE/VLD2Q_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD2_LANE/VLD2Q_LANE:17:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+
+VLD2_LANE/VLD2Q_LANE chunk 1 output:
+VLD2_LANE/VLD2Q_LANE:18:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1,  }
+VLD2_LANE/VLD2Q_LANE:19:result_int16x4 [] = { fffffff0, fffffff1, ffffaaaa, ffffaaaa,  }
+VLD2_LANE/VLD2Q_LANE:20:result_int32x2 [] = { aaaaaaaa, aaaaaaaa,  }
+VLD2_LANE/VLD2Q_LANE:21:result_int64x1 [] = { 3333333333333333,  }
+VLD2_LANE/VLD2Q_LANE:22:result_uint8x8 [] = { f0, f1, aa, aa, aa, aa, aa, aa,  }
+VLD2_LANE/VLD2Q_LANE:23:result_uint16x4 [] = { aaaa, aaaa, fff0, fff1,  }
+VLD2_LANE/VLD2Q_LANE:24:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VLD2_LANE/VLD2Q_LANE:25:result_uint64x1 [] = { 3333333333333333,  }
+VLD2_LANE/VLD2Q_LANE:26:result_float32x2 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+VLD2_LANE/VLD2Q_LANE:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD2_LANE/VLD2Q_LANE:28:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, fffffff0, fffffff1, ffffaaaa, ffffaaaa,  }
+VLD2_LANE/VLD2Q_LANE:29:result_int32x4 [] = { fffffff0, fffffff1, aaaaaaaa, aaaaaaaa,  }
+VLD2_LANE/VLD2Q_LANE:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD2_LANE/VLD2Q_LANE:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD2_LANE/VLD2Q_LANE:32:result_uint16x8 [] = { aaaa, aaaa, fff0, fff1, aaaa, aaaa, aaaa, aaaa,  }
+VLD2_LANE/VLD2Q_LANE:33:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa,  }
+VLD2_LANE/VLD2Q_LANE:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD2_LANE/VLD2Q_LANE:35:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+
+VLD3_LANE/VLD3Q_LANE chunk 0 output:
+VLD3_LANE/VLD3Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa,  }
+VLD3_LANE/VLD3Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD3_LANE/VLD3Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VLD3_LANE/VLD3Q_LANE:3:result_int64x1 [] = { 3333333333333333,  }
+VLD3_LANE/VLD3Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa,  }
+VLD3_LANE/VLD3Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa,  }
+VLD3_LANE/VLD3Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa,  }
+VLD3_LANE/VLD3Q_LANE:7:result_uint64x1 [] = { 3333333333333333,  }
+VLD3_LANE/VLD3Q_LANE:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VLD3_LANE/VLD3Q_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD3_LANE/VLD3Q_LANE:10:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD3_LANE/VLD3Q_LANE:11:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa,  }
+VLD3_LANE/VLD3Q_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3_LANE/VLD3Q_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD3_LANE/VLD3Q_LANE:14:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa,  }
+VLD3_LANE/VLD3Q_LANE:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, aaaaaaaa,  }
+VLD3_LANE/VLD3Q_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3_LANE/VLD3Q_LANE:17:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+
+VLD3_LANE/VLD3Q_LANE chunk 1 output:
+VLD3_LANE/VLD3Q_LANE:18:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa,  }
+VLD3_LANE/VLD3Q_LANE:19:result_int16x4 [] = { ffffaaaa, ffffaaaa, fffffff0, fffffff1,  }
+VLD3_LANE/VLD3Q_LANE:20:result_int32x2 [] = { fffffff2, aaaaaaaa,  }
+VLD3_LANE/VLD3Q_LANE:21:result_int64x1 [] = { 3333333333333333,  }
+VLD3_LANE/VLD3Q_LANE:22:result_uint8x8 [] = { aa, aa, aa, aa, f0, f1, f2, aa,  }
+VLD3_LANE/VLD3Q_LANE:23:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa,  }
+VLD3_LANE/VLD3Q_LANE:24:result_uint32x2 [] = { aaaaaaaa, fffffff0,  }
+VLD3_LANE/VLD3Q_LANE:25:result_uint64x1 [] = { 3333333333333333,  }
+VLD3_LANE/VLD3Q_LANE:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+VLD3_LANE/VLD3Q_LANE:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD3_LANE/VLD3Q_LANE:28:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD3_LANE/VLD3Q_LANE:29:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, fffffff0, fffffff1,  }
+VLD3_LANE/VLD3Q_LANE:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3_LANE/VLD3Q_LANE:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD3_LANE/VLD3Q_LANE:32:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, fff0,  }
+VLD3_LANE/VLD3Q_LANE:33:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa,  }
+VLD3_LANE/VLD3Q_LANE:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3_LANE/VLD3Q_LANE:35:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+
+VLD3_LANE/VLD3Q_LANE chunk 2 output:
+VLD3_LANE/VLD3Q_LANE:36:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, fffffff2,  }
+VLD3_LANE/VLD3Q_LANE:37:result_int16x4 [] = { fffffff2, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD3_LANE/VLD3Q_LANE:38:result_int32x2 [] = { aaaaaaaa, aaaaaaaa,  }
+VLD3_LANE/VLD3Q_LANE:39:result_int64x1 [] = { 3333333333333333,  }
+VLD3_LANE/VLD3Q_LANE:40:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa,  }
+VLD3_LANE/VLD3Q_LANE:41:result_uint16x4 [] = { aaaa, fff0, fff1, fff2,  }
+VLD3_LANE/VLD3Q_LANE:42:result_uint32x2 [] = { fffffff1, fffffff2,  }
+VLD3_LANE/VLD3Q_LANE:43:result_uint64x1 [] = { 3333333333333333,  }
+VLD3_LANE/VLD3Q_LANE:44:result_float32x2 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+VLD3_LANE/VLD3Q_LANE:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD3_LANE/VLD3Q_LANE:46:result_int16x8 [] = { ffffaaaa, ffffaaaa, fffffff0, fffffff1, fffffff2, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD3_LANE/VLD3Q_LANE:47:result_int32x4 [] = { fffffff2, aaaaaaaa, aaaaaaaa, aaaaaaaa,  }
+VLD3_LANE/VLD3Q_LANE:48:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3_LANE/VLD3Q_LANE:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD3_LANE/VLD3Q_LANE:50:result_uint16x8 [] = { fff1, fff2, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa,  }
+VLD3_LANE/VLD3Q_LANE:51:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa,  }
+VLD3_LANE/VLD3Q_LANE:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3_LANE/VLD3Q_LANE:53:result_float32x4 [] = { c1600000 -0x1.cp+3 -14, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+
+VLD4_LANE/VLD4Q_LANE chunk 0 output:
+VLD4_LANE/VLD4Q_LANE:0:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa,  }
+VLD4_LANE/VLD4Q_LANE:1:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD4_LANE/VLD4Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VLD4_LANE/VLD4Q_LANE:3:result_int64x1 [] = { 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:4:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa,  }
+VLD4_LANE/VLD4Q_LANE:5:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa,  }
+VLD4_LANE/VLD4Q_LANE:6:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa,  }
+VLD4_LANE/VLD4Q_LANE:7:result_uint64x1 [] = { 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VLD4_LANE/VLD4Q_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_LANE/VLD4Q_LANE:10:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD4_LANE/VLD4Q_LANE:11:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa,  }
+VLD4_LANE/VLD4Q_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_LANE/VLD4Q_LANE:14:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa,  }
+VLD4_LANE/VLD4Q_LANE:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4_LANE/VLD4Q_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:17:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+
+VLD4_LANE/VLD4Q_LANE chunk 1 output:
+VLD4_LANE/VLD4Q_LANE:18:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa,  }
+VLD4_LANE/VLD4Q_LANE:19:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD4_LANE/VLD4Q_LANE:20:result_int32x2 [] = { fffffff2, fffffff3,  }
+VLD4_LANE/VLD4Q_LANE:21:result_int64x1 [] = { 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:22:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa,  }
+VLD4_LANE/VLD4Q_LANE:23:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa,  }
+VLD4_LANE/VLD4Q_LANE:24:result_uint32x2 [] = { aaaaaaaa, aaaaaaaa,  }
+VLD4_LANE/VLD4Q_LANE:25:result_uint64x1 [] = { 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+VLD4_LANE/VLD4Q_LANE:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_LANE/VLD4Q_LANE:28:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD4_LANE/VLD4Q_LANE:29:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa,  }
+VLD4_LANE/VLD4Q_LANE:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_LANE/VLD4Q_LANE:32:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa,  }
+VLD4_LANE/VLD4Q_LANE:33:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa,  }
+VLD4_LANE/VLD4Q_LANE:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:35:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+
+VLD4_LANE/VLD4Q_LANE chunk 2 output:
+VLD4_LANE/VLD4Q_LANE:36:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa, ffffffaa,  }
+VLD4_LANE/VLD4Q_LANE:37:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4_LANE/VLD4Q_LANE:38:result_int32x2 [] = { aaaaaaaa, aaaaaaaa,  }
+VLD4_LANE/VLD4Q_LANE:39:result_int64x1 [] = { 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:40:result_uint8x8 [] = { f0, f1, f2, f3, aa, aa, aa, aa,  }
+VLD4_LANE/VLD4Q_LANE:41:result_uint16x4 [] = { aaaa, aaaa, aaaa, aaaa,  }
+VLD4_LANE/VLD4Q_LANE:42:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VLD4_LANE/VLD4Q_LANE:43:result_uint64x1 [] = { 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:44:result_float32x2 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+VLD4_LANE/VLD4Q_LANE:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_LANE/VLD4Q_LANE:46:result_int16x8 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD4_LANE/VLD4Q_LANE:47:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4_LANE/VLD4Q_LANE:48:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_LANE/VLD4Q_LANE:50:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, fff0, fff1, fff2, fff3,  }
+VLD4_LANE/VLD4Q_LANE:51:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa,  }
+VLD4_LANE/VLD4Q_LANE:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:53:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+
+VLD4_LANE/VLD4Q_LANE chunk 3 output:
+VLD4_LANE/VLD4Q_LANE:54:result_int8x8 [] = { ffffffaa, ffffffaa, ffffffaa, ffffffaa, fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4_LANE/VLD4Q_LANE:55:result_int16x4 [] = { ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD4_LANE/VLD4Q_LANE:56:result_int32x2 [] = { aaaaaaaa, aaaaaaaa,  }
+VLD4_LANE/VLD4Q_LANE:57:result_int64x1 [] = { 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:58:result_uint8x8 [] = { aa, aa, aa, aa, aa, aa, aa, aa,  }
+VLD4_LANE/VLD4Q_LANE:59:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VLD4_LANE/VLD4Q_LANE:60:result_uint32x2 [] = { fffffff2, fffffff3,  }
+VLD4_LANE/VLD4Q_LANE:61:result_uint64x1 [] = { 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:62:result_float32x2 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+VLD4_LANE/VLD4Q_LANE:63:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_LANE/VLD4Q_LANE:64:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, ffffaaaa, ffffaaaa, ffffaaaa, ffffaaaa,  }
+VLD4_LANE/VLD4Q_LANE:65:result_int32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa,  }
+VLD4_LANE/VLD4Q_LANE:66:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:67:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_LANE/VLD4Q_LANE:68:result_uint16x8 [] = { aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa, aaaa,  }
+VLD4_LANE/VLD4Q_LANE:69:result_uint32x4 [] = { aaaaaaaa, aaaaaaaa, aaaaaaaa, aaaaaaaa,  }
+VLD4_LANE/VLD4Q_LANE:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_LANE/VLD4Q_LANE:71:result_float32x4 [] = { aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13, aaaaaaaa -0x1.555554p-42 -3.03165e-13,  }
+
+VLD2_DUP/VLD2Q_DUP chunk 0 output:
+VLD2_DUP/VLD2Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1,  }
+VLD2_DUP/VLD2Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff0, fffffff1,  }
+VLD2_DUP/VLD2Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VLD2_DUP/VLD2Q_DUP:3:result_int64x1 [] = { fffffffffffffff0,  }
+VLD2_DUP/VLD2Q_DUP:4:result_uint8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1,  }
+VLD2_DUP/VLD2Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff0, fff1,  }
+VLD2_DUP/VLD2Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VLD2_DUP/VLD2Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VLD2_DUP/VLD2Q_DUP:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VLD2_DUP/VLD2Q_DUP:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD2_DUP/VLD2Q_DUP:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD2_DUP/VLD2Q_DUP:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD2_DUP/VLD2Q_DUP:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD2_DUP/VLD2Q_DUP:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD2_DUP/VLD2Q_DUP:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD2_DUP/VLD2Q_DUP:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD2_DUP/VLD2Q_DUP:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD2_DUP/VLD2Q_DUP:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VLD2_DUP/VLD2Q_DUP chunk 1 output:
+VLD2_DUP/VLD2Q_DUP:18:result_int8x8 [] = { fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1, fffffff0, fffffff1,  }
+VLD2_DUP/VLD2Q_DUP:19:result_int16x4 [] = { fffffff0, fffffff1, fffffff0, fffffff1,  }
+VLD2_DUP/VLD2Q_DUP:20:result_int32x2 [] = { fffffff0, fffffff1,  }
+VLD2_DUP/VLD2Q_DUP:21:result_int64x1 [] = { fffffffffffffff1,  }
+VLD2_DUP/VLD2Q_DUP:22:result_uint8x8 [] = { f0, f1, f0, f1, f0, f1, f0, f1,  }
+VLD2_DUP/VLD2Q_DUP:23:result_uint16x4 [] = { fff0, fff1, fff0, fff1,  }
+VLD2_DUP/VLD2Q_DUP:24:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VLD2_DUP/VLD2Q_DUP:25:result_uint64x1 [] = { fffffffffffffff1,  }
+VLD2_DUP/VLD2Q_DUP:26:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VLD2_DUP/VLD2Q_DUP:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD2_DUP/VLD2Q_DUP:28:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD2_DUP/VLD2Q_DUP:29:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD2_DUP/VLD2Q_DUP:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD2_DUP/VLD2Q_DUP:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD2_DUP/VLD2Q_DUP:32:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD2_DUP/VLD2Q_DUP:33:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD2_DUP/VLD2Q_DUP:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD2_DUP/VLD2Q_DUP:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VLD3_DUP/VLD3Q_DUP chunk 0 output:
+VLD3_DUP/VLD3Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1,  }
+VLD3_DUP/VLD3Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff0,  }
+VLD3_DUP/VLD3Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VLD3_DUP/VLD3Q_DUP:3:result_int64x1 [] = { fffffffffffffff0,  }
+VLD3_DUP/VLD3Q_DUP:4:result_uint8x8 [] = { f0, f1, f2, f0, f1, f2, f0, f1,  }
+VLD3_DUP/VLD3Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff2, fff0,  }
+VLD3_DUP/VLD3Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VLD3_DUP/VLD3Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VLD3_DUP/VLD3Q_DUP:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VLD3_DUP/VLD3Q_DUP:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD3_DUP/VLD3Q_DUP:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD3_DUP/VLD3Q_DUP:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD3_DUP/VLD3Q_DUP:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3_DUP/VLD3Q_DUP:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD3_DUP/VLD3Q_DUP:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD3_DUP/VLD3Q_DUP:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD3_DUP/VLD3Q_DUP:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3_DUP/VLD3Q_DUP:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VLD3_DUP/VLD3Q_DUP chunk 1 output:
+VLD3_DUP/VLD3Q_DUP:18:result_int8x8 [] = { fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0,  }
+VLD3_DUP/VLD3Q_DUP:19:result_int16x4 [] = { fffffff1, fffffff2, fffffff0, fffffff1,  }
+VLD3_DUP/VLD3Q_DUP:20:result_int32x2 [] = { fffffff2, fffffff0,  }
+VLD3_DUP/VLD3Q_DUP:21:result_int64x1 [] = { fffffffffffffff1,  }
+VLD3_DUP/VLD3Q_DUP:22:result_uint8x8 [] = { f2, f0, f1, f2, f0, f1, f2, f0,  }
+VLD3_DUP/VLD3Q_DUP:23:result_uint16x4 [] = { fff1, fff2, fff0, fff1,  }
+VLD3_DUP/VLD3Q_DUP:24:result_uint32x2 [] = { fffffff2, fffffff0,  }
+VLD3_DUP/VLD3Q_DUP:25:result_uint64x1 [] = { fffffffffffffff1,  }
+VLD3_DUP/VLD3Q_DUP:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1800000 -0x1p+4 -16,  }
+VLD3_DUP/VLD3Q_DUP:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD3_DUP/VLD3Q_DUP:28:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD3_DUP/VLD3Q_DUP:29:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD3_DUP/VLD3Q_DUP:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3_DUP/VLD3Q_DUP:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD3_DUP/VLD3Q_DUP:32:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD3_DUP/VLD3Q_DUP:33:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD3_DUP/VLD3Q_DUP:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3_DUP/VLD3Q_DUP:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VLD3_DUP/VLD3Q_DUP chunk 2 output:
+VLD3_DUP/VLD3Q_DUP:36:result_int8x8 [] = { fffffff1, fffffff2, fffffff0, fffffff1, fffffff2, fffffff0, fffffff1, fffffff2,  }
+VLD3_DUP/VLD3Q_DUP:37:result_int16x4 [] = { fffffff2, fffffff0, fffffff1, fffffff2,  }
+VLD3_DUP/VLD3Q_DUP:38:result_int32x2 [] = { fffffff1, fffffff2,  }
+VLD3_DUP/VLD3Q_DUP:39:result_int64x1 [] = { fffffffffffffff2,  }
+VLD3_DUP/VLD3Q_DUP:40:result_uint8x8 [] = { f1, f2, f0, f1, f2, f0, f1, f2,  }
+VLD3_DUP/VLD3Q_DUP:41:result_uint16x4 [] = { fff2, fff0, fff1, fff2,  }
+VLD3_DUP/VLD3Q_DUP:42:result_uint32x2 [] = { fffffff1, fffffff2,  }
+VLD3_DUP/VLD3Q_DUP:43:result_uint64x1 [] = { fffffffffffffff2,  }
+VLD3_DUP/VLD3Q_DUP:44:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14,  }
+VLD3_DUP/VLD3Q_DUP:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD3_DUP/VLD3Q_DUP:46:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD3_DUP/VLD3Q_DUP:47:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD3_DUP/VLD3Q_DUP:48:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3_DUP/VLD3Q_DUP:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD3_DUP/VLD3Q_DUP:50:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD3_DUP/VLD3Q_DUP:51:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD3_DUP/VLD3Q_DUP:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD3_DUP/VLD3Q_DUP:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VLD4_DUP/VLD4Q_DUP chunk 0 output:
+VLD4_DUP/VLD4Q_DUP:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4_DUP/VLD4Q_DUP:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4_DUP/VLD4Q_DUP:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VLD4_DUP/VLD4Q_DUP:3:result_int64x1 [] = { fffffffffffffff0,  }
+VLD4_DUP/VLD4Q_DUP:4:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3,  }
+VLD4_DUP/VLD4Q_DUP:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VLD4_DUP/VLD4Q_DUP:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VLD4_DUP/VLD4Q_DUP:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VLD4_DUP/VLD4Q_DUP:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VLD4_DUP/VLD4Q_DUP:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_DUP/VLD4Q_DUP:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD4_DUP/VLD4Q_DUP:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD4_DUP/VLD4Q_DUP:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_DUP/VLD4Q_DUP:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_DUP/VLD4Q_DUP:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD4_DUP/VLD4Q_DUP:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD4_DUP/VLD4Q_DUP:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_DUP/VLD4Q_DUP:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VLD4_DUP/VLD4Q_DUP chunk 1 output:
+VLD4_DUP/VLD4Q_DUP:18:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4_DUP/VLD4Q_DUP:19:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4_DUP/VLD4Q_DUP:20:result_int32x2 [] = { fffffff2, fffffff3,  }
+VLD4_DUP/VLD4Q_DUP:21:result_int64x1 [] = { fffffffffffffff1,  }
+VLD4_DUP/VLD4Q_DUP:22:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3,  }
+VLD4_DUP/VLD4Q_DUP:23:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VLD4_DUP/VLD4Q_DUP:24:result_uint32x2 [] = { fffffff2, fffffff3,  }
+VLD4_DUP/VLD4Q_DUP:25:result_uint64x1 [] = { fffffffffffffff1,  }
+VLD4_DUP/VLD4Q_DUP:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+VLD4_DUP/VLD4Q_DUP:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_DUP/VLD4Q_DUP:28:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD4_DUP/VLD4Q_DUP:29:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD4_DUP/VLD4Q_DUP:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_DUP/VLD4Q_DUP:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_DUP/VLD4Q_DUP:32:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD4_DUP/VLD4Q_DUP:33:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD4_DUP/VLD4Q_DUP:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_DUP/VLD4Q_DUP:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VLD4_DUP/VLD4Q_DUP chunk 2 output:
+VLD4_DUP/VLD4Q_DUP:36:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4_DUP/VLD4Q_DUP:37:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4_DUP/VLD4Q_DUP:38:result_int32x2 [] = { fffffff0, fffffff1,  }
+VLD4_DUP/VLD4Q_DUP:39:result_int64x1 [] = { fffffffffffffff2,  }
+VLD4_DUP/VLD4Q_DUP:40:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3,  }
+VLD4_DUP/VLD4Q_DUP:41:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VLD4_DUP/VLD4Q_DUP:42:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VLD4_DUP/VLD4Q_DUP:43:result_uint64x1 [] = { fffffffffffffff2,  }
+VLD4_DUP/VLD4Q_DUP:44:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VLD4_DUP/VLD4Q_DUP:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_DUP/VLD4Q_DUP:46:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD4_DUP/VLD4Q_DUP:47:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD4_DUP/VLD4Q_DUP:48:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_DUP/VLD4Q_DUP:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_DUP/VLD4Q_DUP:50:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD4_DUP/VLD4Q_DUP:51:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD4_DUP/VLD4Q_DUP:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_DUP/VLD4Q_DUP:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VLD4_DUP/VLD4Q_DUP chunk 3 output:
+VLD4_DUP/VLD4Q_DUP:54:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4_DUP/VLD4Q_DUP:55:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VLD4_DUP/VLD4Q_DUP:56:result_int32x2 [] = { fffffff2, fffffff3,  }
+VLD4_DUP/VLD4Q_DUP:57:result_int64x1 [] = { fffffffffffffff3,  }
+VLD4_DUP/VLD4Q_DUP:58:result_uint8x8 [] = { f0, f1, f2, f3, f0, f1, f2, f3,  }
+VLD4_DUP/VLD4Q_DUP:59:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VLD4_DUP/VLD4Q_DUP:60:result_uint32x2 [] = { fffffff2, fffffff3,  }
+VLD4_DUP/VLD4Q_DUP:61:result_uint64x1 [] = { fffffffffffffff3,  }
+VLD4_DUP/VLD4Q_DUP:62:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+VLD4_DUP/VLD4Q_DUP:63:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_DUP/VLD4Q_DUP:64:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD4_DUP/VLD4Q_DUP:65:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD4_DUP/VLD4Q_DUP:66:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_DUP/VLD4Q_DUP:67:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VLD4_DUP/VLD4Q_DUP:68:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VLD4_DUP/VLD4Q_DUP:69:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VLD4_DUP/VLD4Q_DUP:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VLD4_DUP/VLD4Q_DUP:71:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VMLA output:
+VMLA:0:result_int8x8 [] = { ffffffdf, ffffffe0, ffffffe1, ffffffe2, ffffffe3, ffffffe4, ffffffe5, ffffffe6,  }
+VMLA:1:result_int16x4 [] = { 1f8c, 1f8d, 1f8e, 1f8f,  }
+VMLA:2:result_int32x2 [] = { 2bf7, 2bf8,  }
+VMLA:3:result_int64x1 [] = { 3333333333333333,  }
+VMLA:4:result_uint8x8 [] = { 20, 21, 22, 23, 24, 25, 26, 27,  }
+VMLA:5:result_uint16x4 [] = { 3e07, 3e08, 3e09, 3e0a,  }
+VMLA:6:result_uint32x2 [] = { 43ac, 43ad,  }
+VMLA:7:result_uint64x1 [] = { 3333333333333333,  }
+VMLA:8:result_float32x2 [] = { 43a14e76 0x1.429cecp+8 322.613, 43a1ce76 0x1.439cecp+8 323.613,  }
+VMLA:9:result_int8x16 [] = { f, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e,  }
+VMLA:10:result_int16x8 [] = { 4830, 4831, 4832, 4833, 4834, 4835, 4836, 4837,  }
+VMLA:11:result_int32x4 [] = { 470f, 4710, 4711, 4712,  }
+VMLA:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMLA:13:result_uint8x16 [] = { ac, ad, ae, af, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb,  }
+VMLA:14:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e,  }
+VMLA:15:result_uint32x4 [] = { 3620, 3621, 3622, 3623,  }
+VMLA:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMLA:17:result_float32x4 [] = { 45f0ae15 0x1.e15c2ap+12 7701.76, 45f0b615 0x1.e16c2ap+12 7702.76, 45f0be15 0x1.e17c2ap+12 7703.76, 45f0c615 0x1.e18c2ap+12 7704.76,  }
+
+VMLS output:
+VMLS:0:result_int8x8 [] = { 1, 2, 3, 4, 5, 6, 7, 8,  }
+VMLS:1:result_int16x4 [] = { ffffe054, ffffe055, ffffe056, ffffe057,  }
+VMLS:2:result_int32x2 [] = { ffffd3e9, ffffd3ea,  }
+VMLS:3:result_int64x1 [] = { 3333333333333333,  }
+VMLS:4:result_uint8x8 [] = { c0, c1, c2, c3, c4, c5, c6, c7,  }
+VMLS:5:result_uint16x4 [] = { c1d9, c1da, c1db, c1dc,  }
+VMLS:6:result_uint32x2 [] = { ffffbc34, ffffbc35,  }
+VMLS:7:result_uint64x1 [] = { 3333333333333333,  }
+VMLS:8:result_float32x2 [] = { c3b14e76 -0x1.629cecp+8 -354.613, c3b0ce76 -0x1.619cecp+8 -353.613,  }
+VMLS:9:result_int8x16 [] = { ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, ffffffd6, ffffffd7, ffffffd8, ffffffd9, ffffffda, ffffffdb, ffffffdc, ffffffdd, ffffffde, ffffffdf, ffffffe0,  }
+VMLS:10:result_int16x8 [] = { ffffb7b0, ffffb7b1, ffffb7b2, ffffb7b3, ffffb7b4, ffffb7b5, ffffb7b6, ffffb7b7,  }
+VMLS:11:result_int32x4 [] = { ffffb8d1, ffffb8d2, ffffb8d3, ffffb8d4,  }
+VMLS:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMLS:13:result_uint8x16 [] = { 34, 35, 36, 37, 38, 39, 3a, 3b, 3c, 3d, 3e, 3f, 40, 41, 42, 43,  }
+VMLS:14:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0,  }
+VMLS:15:result_uint32x4 [] = { ffffc9c0, ffffc9c1, ffffc9c2, ffffc9c3,  }
+VMLS:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMLS:17:result_float32x4 [] = { c5f1ae15 -0x1.e35c2ap+12 -7733.76, c5f1a615 -0x1.e34c2ap+12 -7732.76, c5f19e15 -0x1.e33c2ap+12 -7731.76, c5f19615 -0x1.e32c2ap+12 -7730.76,  }
+
+VMUL output:
+VMUL:0:result_int8x8 [] = { fffffff0, 1, 12, 23, 34, 45, 56, 67,  }
+VMUL:1:result_int16x4 [] = { fffffde0, fffffe02, fffffe24, fffffe46,  }
+VMUL:2:result_int32x2 [] = { fffffcd0, fffffd03,  }
+VMUL:3:result_int64x1 [] = { 3333333333333333,  }
+VMUL:4:result_uint8x8 [] = { c0, 4, 48, 8c, d0, 14, 58, 9c,  }
+VMUL:5:result_uint16x4 [] = { fab0, fb05, fb5a, fbaf,  }
+VMUL:6:result_uint32x2 [] = { fffff9a0, fffffa06,  }
+VMUL:7:result_uint64x1 [] = { 3333333333333333,  }
+VMUL:8:result_float32x2 [] = { c4053333 -0x1.0a6666p+9 -532.8, c3f9c000 -0x1.f38p+8 -499.5,  }
+VMUL:9:result_int8x16 [] = { ffffff90, 7, 7e, fffffff5, 6c, ffffffe3, 5a, ffffffd1, 48, ffffffbf, 36, ffffffad, 24, ffffff9b, 12, ffffff89,  }
+VMUL:10:result_int16x8 [] = { fffff780, fffff808, fffff890, fffff918, fffff9a0, fffffa28, fffffab0, fffffb38,  }
+VMUL:11:result_int32x4 [] = { fffff670, fffff709, fffff7a2, fffff83b,  }
+VMUL:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMUL:13:result_uint8x16 [] = { 60, a, b4, 5e, 8, b2, 5c, 6, b0, 5a, 4, ae, 58, 2, ac, 56,  }
+VMUL:14:result_uint16x8 [] = { f450, f50b, f5c6, f681, f73c, f7f7, f8b2, f96d,  }
+VMUL:15:result_uint32x4 [] = { fffff340, fffff40c, fffff4d8, fffff5a4,  }
+VMUL:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMUL:17:result_float32x4 [] = { c4c73333 -0x1.8e6666p+10 -1593.6, c4bac000 -0x1.758p+10 -1494, c4ae4ccd -0x1.5c999ap+10 -1394.4, c4a1d999 -0x1.43b332p+10 -1294.8,  }
+
+VMUL_LANE output:
+VMUL_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMUL_LANE:1:result_int16x4 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc,  }
+VMUL_LANE:2:result_int32x2 [] = { fffffde0, fffffe02,  }
+VMUL_LANE:3:result_int64x1 [] = { 3333333333333333,  }
+VMUL_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMUL_LANE:5:result_uint16x4 [] = { bbc0, c004, c448, c88c,  }
+VMUL_LANE:6:result_uint32x2 [] = { fffface0, ffffb212,  }
+VMUL_LANE:7:result_uint64x1 [] = { 3333333333333333,  }
+VMUL_LANE:8:result_float32x2 [] = { c3b66666 -0x1.6cccccp+8 -364.8, c3ab0000 -0x1.56p+8 -342,  }
+VMUL_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMUL_LANE:10:result_int16x8 [] = { ffffffc0, ffffffc4, ffffffc8, ffffffcc, ffffffd0, ffffffd4, ffffffd8, ffffffdc,  }
+VMUL_LANE:11:result_int32x4 [] = { fffffde0, fffffe02, fffffe24, fffffe46,  }
+VMUL_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMUL_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMUL_LANE:14:result_uint16x8 [] = { bbc0, c004, c448, c88c, ccd0, d114, d558, d99c,  }
+VMUL_LANE:15:result_uint32x4 [] = { fffface0, ffffb212, ffffb744, ffffbc76,  }
+VMUL_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMUL_LANE:17:result_float32x4 [] = { c3b66666 -0x1.6cccccp+8 -364.8, c3ab0000 -0x1.56p+8 -342, c39f9999 -0x1.3f3332p+8 -319.2, c3943333 -0x1.286666p+8 -296.4,  }
+
+VMUL_N output:
+VMUL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMUL_N:1:result_int16x4 [] = { fffffef0, ffffff01, ffffff12, ffffff23,  }
+VMUL_N:2:result_int32x2 [] = { fffffde0, fffffe02,  }
+VMUL_N:3:result_int64x1 [] = { 3333333333333333,  }
+VMUL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMUL_N:5:result_uint16x4 [] = { fcd0, fd03, fd36, fd69,  }
+VMUL_N:6:result_uint32x2 [] = { fffffbc0, fffffc04,  }
+VMUL_N:7:result_uint64x1 [] = { 3333333333333333,  }
+VMUL_N:8:result_float32x2 [] = { c3b26666 -0x1.64ccccp+8 -356.8, c3a74000 -0x1.4e8p+8 -334.5,  }
+VMUL_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMUL_N:10:result_int16x8 [] = { fffffab0, fffffb05, fffffb5a, fffffbaf, fffffc04, fffffc59, fffffcae, fffffd03,  }
+VMUL_N:11:result_int32x4 [] = { fffff9a0, fffffa06, fffffa6c, fffffad2,  }
+VMUL_N:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMUL_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMUL_N:14:result_uint16x8 [] = { f890, f907, f97e, f9f5, fa6c, fae3, fb5a, fbd1,  }
+VMUL_N:15:result_uint32x4 [] = { fffff780, fffff808, fffff890, fffff918,  }
+VMUL_N:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMUL_N:17:result_float32x4 [] = { c4b1cccd -0x1.63999ap+10 -1422.4, c4a6b000 -0x1.4d6p+10 -1333.5, c49b9333 -0x1.372666p+10 -1244.6, c4907667 -0x1.20eccep+10 -1155.7,  }
+
+VMULL_N output:
+VMULL_N:0:result_int32x4 [] = { 11000, 11000, 11000, 11000,  }
+VMULL_N:1:result_int64x2 [] = { 22000, 22000,  }
+VMULL_N:2:result_uint32x4 [] = { 33000, 33000, 33000, 33000,  }
+VMULL_N:3:result_uint64x2 [] = { 44000, 44000,  }
+
+VMLA_LANE output:
+VMLA_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLA_LANE:1:result_int16x4 [] = { 3e07, 3e08, 3e09, 3e0a,  }
+VMLA_LANE:2:result_int32x2 [] = { 3e07, 3e08,  }
+VMLA_LANE:3:result_int64x1 [] = { 3333333333333333,  }
+VMLA_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLA_LANE:5:result_uint16x4 [] = { 3e07, 3e08, 3e09, 3e0a,  }
+VMLA_LANE:6:result_uint32x2 [] = { 3e07, 3e08,  }
+VMLA_LANE:7:result_uint64x1 [] = { 3333333333333333,  }
+VMLA_LANE:8:result_float32x2 [] = { 4418c687 0x1.318d0ep+9 611.102, 44190687 0x1.320d0ep+9 612.102,  }
+VMLA_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLA_LANE:10:result_int16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e,  }
+VMLA_LANE:11:result_int32x4 [] = { 3e07, 3e08, 3e09, 3e0a,  }
+VMLA_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMLA_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLA_LANE:14:result_uint16x8 [] = { 3e07, 3e08, 3e09, 3e0a, 3e0b, 3e0c, 3e0d, 3e0e,  }
+VMLA_LANE:15:result_uint32x4 [] = { 3e07, 3e08, 3e09, 3e0a,  }
+VMLA_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMLA_LANE:17:result_float32x4 [] = { 441a3168 0x1.3462dp+9 616.772, 441a7168 0x1.34e2dp+9 617.772, 441ab168 0x1.3562dp+9 618.772, 441af168 0x1.35e2dp+9 619.772,  }
+
+VMLS_LANE output:
+VMLS_LANE:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLS_LANE:1:result_int16x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc,  }
+VMLS_LANE:2:result_int32x2 [] = { ffffc1d9, ffffc1da,  }
+VMLS_LANE:3:result_int64x1 [] = { 3333333333333333,  }
+VMLS_LANE:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLS_LANE:5:result_uint16x4 [] = { c1d9, c1da, c1db, c1dc,  }
+VMLS_LANE:6:result_uint32x2 [] = { ffffc1d9, ffffc1da,  }
+VMLS_LANE:7:result_uint64x1 [] = { 3333333333333333,  }
+VMLS_LANE:8:result_float32x2 [] = { c420c687 -0x1.418d0ep+9 -643.102, c4208687 -0x1.410d0ep+9 -642.102,  }
+VMLS_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLS_LANE:10:result_int16x8 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc, ffffc1dd, ffffc1de, ffffc1df, ffffc1e0,  }
+VMLS_LANE:11:result_int32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc,  }
+VMLS_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMLS_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLS_LANE:14:result_uint16x8 [] = { c1d9, c1da, c1db, c1dc, c1dd, c1de, c1df, c1e0,  }
+VMLS_LANE:15:result_uint32x4 [] = { ffffc1d9, ffffc1da, ffffc1db, ffffc1dc,  }
+VMLS_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMLS_LANE:17:result_float32x4 [] = { c4223168 -0x1.4462dp+9 -648.772, c421f168 -0x1.43e2dp+9 -647.772, c421b168 -0x1.4362dp+9 -646.772, c4217168 -0x1.42e2dp+9 -645.772,  }
+
+VMLA_N output:
+VMLA_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLA_N:1:result_int16x4 [] = { 595, 596, 597, 598,  }
+VMLA_N:2:result_int32x2 [] = { b3a, b3b,  }
+VMLA_N:3:result_int64x1 [] = { 3333333333333333,  }
+VMLA_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLA_N:5:result_uint16x4 [] = { 10df, 10e0, 10e1, 10e2,  }
+VMLA_N:6:result_uint32x2 [] = { 1684, 1685,  }
+VMLA_N:7:result_uint64x1 [] = { 3333333333333333,  }
+VMLA_N:8:result_float32x2 [] = { 4497deb8 0x1.2fbd7p+10 1214.96, 4497feb8 0x1.2ffd7p+10 1215.96,  }
+VMLA_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLA_N:10:result_int16x8 [] = { 1c29, 1c2a, 1c2b, 1c2c, 1c2d, 1c2e, 1c2f, 1c30,  }
+VMLA_N:11:result_int32x4 [] = { 21ce, 21cf, 21d0, 21d1,  }
+VMLA_N:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMLA_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLA_N:14:result_uint16x8 [] = { 2773, 2774, 2775, 2776, 2777, 2778, 2779, 277a,  }
+VMLA_N:15:result_uint32x4 [] = { 2d18, 2d19, 2d1a, 2d1b,  }
+VMLA_N:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMLA_N:17:result_float32x4 [] = { 4568087b 0x1.d010f6p+11 3712.53, 4568187b 0x1.d030f6p+11 3713.53, 4568287b 0x1.d050f6p+11 3714.53, 4568387b 0x1.d070f6p+11 3715.53,  }
+
+VMLS_N output:
+VMLS_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLS_N:1:result_int16x4 [] = { fffffa4b, fffffa4c, fffffa4d, fffffa4e,  }
+VMLS_N:2:result_int32x2 [] = { fffff4a6, fffff4a7,  }
+VMLS_N:3:result_int64x1 [] = { 3333333333333333,  }
+VMLS_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLS_N:5:result_uint16x4 [] = { ef01, ef02, ef03, ef04,  }
+VMLS_N:6:result_uint32x2 [] = { ffffe95c, ffffe95d,  }
+VMLS_N:7:result_uint64x1 [] = { 3333333333333333,  }
+VMLS_N:8:result_float32x2 [] = { c49bdeb8 -0x1.37bd7p+10 -1246.96, c49bbeb8 -0x1.377d7p+10 -1245.96,  }
+VMLS_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLS_N:10:result_int16x8 [] = { ffffe3b7, ffffe3b8, ffffe3b9, ffffe3ba, ffffe3bb, ffffe3bc, ffffe3bd, ffffe3be,  }
+VMLS_N:11:result_int32x4 [] = { ffffde12, ffffde13, ffffde14, ffffde15,  }
+VMLS_N:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMLS_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VMLS_N:14:result_uint16x8 [] = { d86d, d86e, d86f, d870, d871, d872, d873, d874,  }
+VMLS_N:15:result_uint32x4 [] = { ffffd2c8, ffffd2c9, ffffd2ca, ffffd2cb,  }
+VMLS_N:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMLS_N:17:result_float32x4 [] = { c56a087b -0x1.d410f6p+11 -3744.53, c569f87b -0x1.d3f0f6p+11 -3743.53, c569e87b -0x1.d3d0f6p+11 -3742.53, c569d87b -0x1.d3b0f6p+11 -3741.53,  }
+
+VSLI_N output:
+VSLI_N:0:result_int8x8 [] = { 20, 21, 22, 23, 24, 25, 26, 27,  }
+VSLI_N:1:result_int16x4 [] = { ffffffe0, ffffffe1, ffffffe2, ffffffe3,  }
+VSLI_N:2:result_int32x2 [] = { 6, 7,  }
+VSLI_N:3:result_int64x1 [] = { 64fffffff0,  }
+VSLI_N:4:result_uint8x8 [] = { 50, 51, 52, 53, 50, 51, 52, 53,  }
+VSLI_N:5:result_uint16x4 [] = { 7bf0, 7bf1, 7bf2, 7bf3,  }
+VSLI_N:6:result_uint32x2 [] = { 3ffffff0, 3ffffff1,  }
+VSLI_N:7:result_uint64x1 [] = { 10,  }
+VSLI_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSLI_N:9:result_int8x16 [] = { ffffffd0, ffffffd1, ffffffd2, ffffffd3, ffffffd4, ffffffd5, ffffffd6, ffffffd7, ffffffd8, ffffffd9, ffffffda, ffffffdb, ffffffdc, ffffffdd, ffffffde, ffffffdf,  }
+VSLI_N:10:result_int16x8 [] = { ffffff60, ffffff61, ffffff62, ffffff63, ffffff64, ffffff65, ffffff66, ffffff67,  }
+VSLI_N:11:result_int32x4 [] = { fe2ffff0, fe2ffff1, fe2ffff2, fe2ffff3,  }
+VSLI_N:12:result_int64x2 [] = { 18fff0, 18fff1,  }
+VSLI_N:13:result_uint8x16 [] = { 60, 61, 62, 63, 64, 65, 66, 67, 60, 61, 62, 63, 64, 65, 66, 67,  }
+VSLI_N:14:result_uint16x8 [] = { 3ff0, 3ff1, 3ff2, 3ff3, 3ff4, 3ff5, 3ff6, 3ff7,  }
+VSLI_N:15:result_uint32x4 [] = { 1bfffff0, 1bfffff1, 1bfffff2, 1bfffff3,  }
+VSLI_N:16:result_uint64x2 [] = { 7ffffffffffff0, 7ffffffffffff1,  }
+VSLI_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VSRI_N output:
+VSRI_N:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VSRI_N:1:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VSRI_N:2:result_int32x2 [] = { 80000001, 80000001,  }
+VSRI_N:3:result_int64x1 [] = { ffffffff00000000,  }
+VSRI_N:4:result_uint8x8 [] = { c5, c5, c5, c5, c5, c5, c5, c5,  }
+VSRI_N:5:result_uint16x4 [] = { ffc0, ffc0, ffc0, ffc0,  }
+VSRI_N:6:result_uint32x2 [] = { fffffff0, fffffff0,  }
+VSRI_N:7:result_uint64x1 [] = { e000000000000000,  }
+VSRI_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSRI_N:9:result_int8x16 [] = { fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, fffffff7, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff, ffffffff,  }
+VSRI_N:10:result_int16x8 [] = { fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd, fffffffd,  }
+VSRI_N:11:result_int32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VSRI_N:12:result_int64x2 [] = { ffff000000000000, ffff000000000000,  }
+VSRI_N:13:result_uint8x16 [] = { e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1, e1,  }
+VSRI_N:14:result_uint16x8 [] = { fff0, fff0, fff0, fff0, fff0, fff0, fff0, fff0,  }
+VSRI_N:15:result_uint32x4 [] = { fffffe00, fffffe00, fffffe00, fffffe00,  }
+VSRI_N:16:result_uint64x2 [] = { fffffffffffff800, fffffffffffff800,  }
+VSRI_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VTST/VTSTQ (signed input) output:
+VTST/VTSTQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTST/VTSTQ:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTST/VTSTQ:2:result_int32x2 [] = { 33333333, 33333333,  }
+VTST/VTSTQ:3:result_int64x1 [] = { 3333333333333333,  }
+VTST/VTSTQ:4:result_uint8x8 [] = { 0, ff, ff, ff, ff, ff, ff, ff,  }
+VTST/VTSTQ:5:result_uint16x4 [] = { 0, ffff, 0, ffff,  }
+VTST/VTSTQ:6:result_uint32x2 [] = { 0, ffffffff,  }
+VTST/VTSTQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VTST/VTSTQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VTST/VTSTQ:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTST/VTSTQ:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTST/VTSTQ:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTST/VTSTQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTST/VTSTQ:13:result_uint8x16 [] = { 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VTST/VTSTQ:14:result_uint16x8 [] = { 0, ffff, 0, ffff, ffff, ffff, ffff, ffff,  }
+VTST/VTSTQ:15:result_uint32x4 [] = { 0, ffffffff, 0, ffffffff,  }
+VTST/VTSTQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTST/VTSTQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VTST/VTSTQ (unsigned input) output:
+VTST/VTSTQ:18:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTST/VTSTQ:19:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTST/VTSTQ:20:result_int32x2 [] = { 33333333, 33333333,  }
+VTST/VTSTQ:21:result_int64x1 [] = { 3333333333333333,  }
+VTST/VTSTQ:22:result_uint8x8 [] = { 0, ff, ff, ff, ff, ff, ff, ff,  }
+VTST/VTSTQ:23:result_uint16x4 [] = { 0, ffff, 0, ffff,  }
+VTST/VTSTQ:24:result_uint32x2 [] = { 0, ffffffff,  }
+VTST/VTSTQ:25:result_uint64x1 [] = { 3333333333333333,  }
+VTST/VTSTQ:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VTST/VTSTQ:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTST/VTSTQ:28:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTST/VTSTQ:29:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTST/VTSTQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTST/VTSTQ:31:result_uint8x16 [] = { 0, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VTST/VTSTQ:32:result_uint16x8 [] = { 0, ffff, 0, ffff, ffff, ffff, ffff, ffff,  }
+VTST/VTSTQ:33:result_uint32x4 [] = { 0, ffffffff, 0, ffffffff,  }
+VTST/VTSTQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTST/VTSTQ:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VADDHN output:
+VADDHN:0:result_int8x8 [] = { 32, 32, 32, 32, 32, 32, 32, 32,  }
+VADDHN:1:result_int16x4 [] = { 32, 32, 32, 32,  }
+VADDHN:2:result_int32x2 [] = { 18, 18,  }
+VADDHN:3:result_int64x1 [] = { 3333333333333333,  }
+VADDHN:4:result_uint8x8 [] = { 3, 3, 3, 3, 3, 3, 3, 3,  }
+VADDHN:5:result_uint16x4 [] = { 37, 37, 37, 37,  }
+VADDHN:6:result_uint32x2 [] = { 3, 3,  }
+VADDHN:7:result_uint64x1 [] = { 3333333333333333,  }
+VADDHN:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VADDHN:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VADDHN:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VADDHN:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VADDHN:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VADDHN:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VADDHN:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VADDHN:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VADDHN:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VADDHN:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRADDHN output:
+VRADDHN:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VRADDHN:1:result_int16x4 [] = { 33, 33, 33, 33,  }
+VRADDHN:2:result_int32x2 [] = { 19, 19,  }
+VRADDHN:3:result_int64x1 [] = { 3333333333333333,  }
+VRADDHN:4:result_uint8x8 [] = { 4, 4, 4, 4, 4, 4, 4, 4,  }
+VRADDHN:5:result_uint16x4 [] = { 38, 38, 38, 38,  }
+VRADDHN:6:result_uint32x2 [] = { 4, 4,  }
+VRADDHN:7:result_uint64x1 [] = { 3333333333333333,  }
+VRADDHN:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRADDHN:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VRADDHN:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VRADDHN:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VRADDHN:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VRADDHN:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VRADDHN:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VRADDHN:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VRADDHN:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VRADDHN:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VADDL output:
+VADDL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VADDL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VADDL:2:result_int32x2 [] = { 33333333, 33333333,  }
+VADDL:3:result_int64x1 [] = { 3333333333333333,  }
+VADDL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VADDL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VADDL:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VADDL:7:result_uint64x1 [] = { 3333333333333333,  }
+VADDL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VADDL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VADDL:10:result_int16x8 [] = { ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea,  }
+VADDL:11:result_int32x4 [] = { ffffffe2, ffffffe3, ffffffe4, ffffffe5,  }
+VADDL:12:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1,  }
+VADDL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VADDL:14:result_uint16x8 [] = { 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1ea,  }
+VADDL:15:result_uint32x4 [] = { 1ffe1, 1ffe2, 1ffe3, 1ffe4,  }
+VADDL:16:result_uint64x2 [] = { 1ffffffe0, 1ffffffe1,  }
+VADDL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VADDW output:
+VADDW:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VADDW:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VADDW:2:result_int32x2 [] = { 33333333, 33333333,  }
+VADDW:3:result_int64x1 [] = { 3333333333333333,  }
+VADDW:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VADDW:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VADDW:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VADDW:7:result_uint64x1 [] = { 3333333333333333,  }
+VADDW:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VADDW:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VADDW:10:result_int16x8 [] = { ffffffe3, ffffffe4, ffffffe5, ffffffe6, ffffffe7, ffffffe8, ffffffe9, ffffffea,  }
+VADDW:11:result_int32x4 [] = { ffffffe2, ffffffe3, ffffffe4, ffffffe5,  }
+VADDW:12:result_int64x2 [] = { ffffffffffffffe0, ffffffffffffffe1,  }
+VADDW:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VADDW:14:result_uint16x8 [] = { e3, e4, e5, e6, e7, e8, e9, ea,  }
+VADDW:15:result_uint32x4 [] = { ffe1, ffe2, ffe3, ffe4,  }
+VADDW:16:result_uint64x2 [] = { ffffffe0, ffffffe1,  }
+VADDW:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VHADD/VHADDQ output:
+VHADD/VHADDQ:0:result_int8x8 [] = { fffffff1, fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5,  }
+VHADD/VHADDQ:1:result_int16x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2,  }
+VHADD/VHADDQ:2:result_int32x2 [] = { fffffff0, fffffff0,  }
+VHADD/VHADDQ:3:result_int64x1 [] = { 3333333333333333,  }
+VHADD/VHADDQ:4:result_uint8x8 [] = { f1, f2, f2, f3, f3, f4, f4, f5,  }
+VHADD/VHADDQ:5:result_uint16x4 [] = { fff0, fff1, fff1, fff2,  }
+VHADD/VHADDQ:6:result_uint32x2 [] = { fffffff0, fffffff0,  }
+VHADD/VHADDQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VHADD/VHADDQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VHADD/VHADDQ:9:result_int8x16 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, fffffff6, fffffff6, fffffff7, fffffff7, fffffff8, fffffff8, fffffff9, fffffff9,  }
+VHADD/VHADDQ:10:result_int16x8 [] = { fffffff1, fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5,  }
+VHADD/VHADDQ:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff2,  }
+VHADD/VHADDQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VHADD/VHADDQ:13:result_uint8x16 [] = { f4, f5, f5, f6, f6, f7, f7, f8, f8, f9, f9, fa, fa, fb, fb, fc,  }
+VHADD/VHADDQ:14:result_uint16x8 [] = { fff1, fff1, fff2, fff2, fff3, fff3, fff4, fff4,  }
+VHADD/VHADDQ:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff1, fffffff2,  }
+VHADD/VHADDQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VHADD/VHADDQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRHADD/VRHADDQ output:
+VRHADD/VRHADDQ:0:result_int8x8 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5,  }
+VRHADD/VRHADDQ:1:result_int16x4 [] = { fffffff1, fffffff2, fffffff2, fffffff3,  }
+VRHADD/VRHADDQ:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VRHADD/VRHADDQ:3:result_int64x1 [] = { 3333333333333333,  }
+VRHADD/VRHADDQ:4:result_uint8x8 [] = { f2, f2, f3, f3, f4, f4, f5, f5,  }
+VRHADD/VRHADDQ:5:result_uint16x4 [] = { fff1, fff1, fff2, fff2,  }
+VRHADD/VRHADDQ:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VRHADD/VRHADDQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VRHADD/VRHADDQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRHADD/VRHADDQ:9:result_int8x16 [] = { fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5, fffffff6, fffffff6, fffffff7, fffffff7, fffffff8, fffffff8, fffffff9, fffffff9, fffffffa,  }
+VRHADD/VRHADDQ:10:result_int16x8 [] = { fffffff2, fffffff2, fffffff3, fffffff3, fffffff4, fffffff4, fffffff5, fffffff5,  }
+VRHADD/VRHADDQ:11:result_int32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2,  }
+VRHADD/VRHADDQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VRHADD/VRHADDQ:13:result_uint8x16 [] = { f5, f5, f6, f6, f7, f7, f8, f8, f9, f9, fa, fa, fb, fb, fc, fc,  }
+VRHADD/VRHADDQ:14:result_uint16x8 [] = { fff1, fff2, fff2, fff3, fff3, fff4, fff4, fff5,  }
+VRHADD/VRHADDQ:15:result_uint32x4 [] = { fffffff1, fffffff1, fffffff2, fffffff2,  }
+VRHADD/VRHADDQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VRHADD/VRHADDQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VHSUB/VHSUBQ output:
+VHSUB/VHSUBQ:0:result_int8x8 [] = { fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2,  }
+VHSUB/VHSUBQ:1:result_int16x4 [] = { ffffffff, ffffffff, 0, 0,  }
+VHSUB/VHSUBQ:2:result_int32x2 [] = { 0, 0,  }
+VHSUB/VHSUBQ:3:result_int64x1 [] = { 3333333333333333,  }
+VHSUB/VHSUBQ:4:result_uint8x8 [] = { fe, ff, ff, 0, 0, 1, 1, 2,  }
+VHSUB/VHSUBQ:5:result_uint16x4 [] = { ffff, 0, 0, 1,  }
+VHSUB/VHSUBQ:6:result_uint32x2 [] = { 0, 0,  }
+VHSUB/VHSUBQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VHSUB/VHSUBQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VHSUB/VHSUBQ:9:result_int8x16 [] = { fffffffe, fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,  }
+VHSUB/VHSUBQ:10:result_int16x8 [] = { fffffffe, ffffffff, ffffffff, 0, 0, 1, 1, 2,  }
+VHSUB/VHSUBQ:11:result_int32x4 [] = { ffffffff, 0, 0, 1,  }
+VHSUB/VHSUBQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VHSUB/VHSUBQ:13:result_uint8x16 [] = { fb, fc, fc, fd, fd, fe, fe, ff, ff, 0, 0, 1, 1, 2, 2, 3,  }
+VHSUB/VHSUBQ:14:result_uint16x8 [] = { ffff, ffff, 0, 0, 1, 1, 2, 2,  }
+VHSUB/VHSUBQ:15:result_uint32x4 [] = { ffffffff, 0, 0, 1,  }
+VHSUB/VHSUBQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VHSUB/VHSUBQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VSUBL output:
+VSUBL:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSUBL:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VSUBL:2:result_int32x2 [] = { 33333333, 33333333,  }
+VSUBL:3:result_int64x1 [] = { 3333333333333333,  }
+VSUBL:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSUBL:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VSUBL:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VSUBL:7:result_uint64x1 [] = { 3333333333333333,  }
+VSUBL:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSUBL:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSUBL:10:result_int16x8 [] = { fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4,  }
+VSUBL:11:result_int32x4 [] = { fffffffe, ffffffff, 0, 1,  }
+VSUBL:12:result_int64x2 [] = { 0, 1,  }
+VSUBL:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSUBL:14:result_uint16x8 [] = { fffd, fffe, ffff, 0, 1, 2, 3, 4,  }
+VSUBL:15:result_uint32x4 [] = { ffffffff, 0, 1, 2,  }
+VSUBL:16:result_uint64x2 [] = { 0, 1,  }
+VSUBL:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VSUBW output:
+VSUBW:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSUBW:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VSUBW:2:result_int32x2 [] = { 33333333, 33333333,  }
+VSUBW:3:result_int64x1 [] = { 3333333333333333,  }
+VSUBW:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSUBW:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VSUBW:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VSUBW:7:result_uint64x1 [] = { 3333333333333333,  }
+VSUBW:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSUBW:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSUBW:10:result_int16x8 [] = { fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4,  }
+VSUBW:11:result_int32x4 [] = { fffffffe, ffffffff, 0, 1,  }
+VSUBW:12:result_int64x2 [] = { 0, 1,  }
+VSUBW:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSUBW:14:result_uint16x8 [] = { fefd, fefe, feff, ff00, ff01, ff02, ff03, ff04,  }
+VSUBW:15:result_uint32x4 [] = { fffeffff, ffff0000, ffff0001, ffff0002,  }
+VSUBW:16:result_uint64x2 [] = { ffffffff00000000, ffffffff00000001,  }
+VSUBW:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VSUBHN output:
+VSUBHN:0:result_int8x8 [] = { 31, 31, 31, 31, 31, 31, 31, 31,  }
+VSUBHN:1:result_int16x4 [] = { 31, 31, 31, 31,  }
+VSUBHN:2:result_int32x2 [] = { 17, 17,  }
+VSUBHN:3:result_int64x1 [] = { 3333333333333333,  }
+VSUBHN:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2,  }
+VSUBHN:5:result_uint16x4 [] = { 36, 36, 36, 36,  }
+VSUBHN:6:result_uint32x2 [] = { 2, 2,  }
+VSUBHN:7:result_uint64x1 [] = { 3333333333333333,  }
+VSUBHN:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSUBHN:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSUBHN:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VSUBHN:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VSUBHN:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VSUBHN:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSUBHN:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VSUBHN:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VSUBHN:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VSUBHN:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSUBHN output:
+VRSUBHN:0:result_int8x8 [] = { 31, 31, 31, 31, 31, 31, 31, 31,  }
+VRSUBHN:1:result_int16x4 [] = { 31, 31, 31, 31,  }
+VRSUBHN:2:result_int32x2 [] = { 17, 17,  }
+VRSUBHN:3:result_int64x1 [] = { 3333333333333333,  }
+VRSUBHN:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2,  }
+VRSUBHN:5:result_uint16x4 [] = { 36, 36, 36, 36,  }
+VRSUBHN:6:result_uint32x2 [] = { 2, 2,  }
+VRSUBHN:7:result_uint64x1 [] = { 3333333333333333,  }
+VRSUBHN:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSUBHN:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VRSUBHN:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VRSUBHN:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VRSUBHN:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VRSUBHN:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VRSUBHN:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VRSUBHN:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VRSUBHN:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VRSUBHN:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VMVN/VMVNQ output:
+VMVN/VMVNQ:0:result_int8x8 [] = { f, e, d, c, b, a, 9, 8,  }
+VMVN/VMVNQ:1:result_int16x4 [] = { f, e, d, c,  }
+VMVN/VMVNQ:2:result_int32x2 [] = { f, e,  }
+VMVN/VMVNQ:3:result_int64x1 [] = { 3333333333333333,  }
+VMVN/VMVNQ:4:result_uint8x8 [] = { f, e, d, c, b, a, 9, 8,  }
+VMVN/VMVNQ:5:result_uint16x4 [] = { f, e, d, c,  }
+VMVN/VMVNQ:6:result_uint32x2 [] = { f, e,  }
+VMVN/VMVNQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VMVN/VMVNQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VMVN/VMVNQ:9:result_int8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,  }
+VMVN/VMVNQ:10:result_int16x8 [] = { f, e, d, c, b, a, 9, 8,  }
+VMVN/VMVNQ:11:result_int32x4 [] = { f, e, d, c,  }
+VMVN/VMVNQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMVN/VMVNQ:13:result_uint8x16 [] = { f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,  }
+VMVN/VMVNQ:14:result_uint16x8 [] = { f, e, d, c, b, a, 9, 8,  }
+VMVN/VMVNQ:15:result_uint32x4 [] = { f, e, d, c,  }
+VMVN/VMVNQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VMVN/VMVNQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQMOVN overflow output:
+VQMOVN:0:vqmovn_s16 Neon overflow 0
+VQMOVN:1:vqmovn_s32 Neon overflow 0
+VQMOVN:2:vqmovn_s64 Neon overflow 0
+VQMOVN:3:vqmovn_u16 Neon overflow 0
+VQMOVN:4:vqmovn_u32 Neon overflow 0
+VQMOVN:5:vqmovn_u64 Neon overflow 0
+
+VQMOVN output:
+VQMOVN:6:result_int8x8 [] = { 12, 12, 12, 12, 12, 12, 12, 12,  }
+VQMOVN:7:result_int16x4 [] = { 1278, 1278, 1278, 1278,  }
+VQMOVN:8:result_int32x2 [] = { 12345678, 12345678,  }
+VQMOVN:9:result_int64x1 [] = { 3333333333333333,  }
+VQMOVN:10:result_uint8x8 [] = { 82, 82, 82, 82, 82, 82, 82, 82,  }
+VQMOVN:11:result_uint16x4 [] = { 8765, 8765, 8765, 8765,  }
+VQMOVN:12:result_uint32x2 [] = { 87654321, 87654321,  }
+VQMOVN:13:result_uint64x1 [] = { 3333333333333333,  }
+VQMOVN:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQMOVN:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQMOVN:16:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQMOVN:17:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQMOVN:18:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQMOVN:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQMOVN:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQMOVN:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQMOVN:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQMOVN:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQMOVN overflow output:
+VQMOVN:24:vqmovn_s16 Neon overflow 1
+VQMOVN:25:vqmovn_s32 Neon overflow 1
+VQMOVN:26:vqmovn_s64 Neon overflow 1
+VQMOVN:27:vqmovn_u16 Neon overflow 1
+VQMOVN:28:vqmovn_u32 Neon overflow 1
+VQMOVN:29:vqmovn_u64 Neon overflow 1
+
+VQMOVN output:
+VQMOVN:30:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQMOVN:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQMOVN:32:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQMOVN:33:result_int64x1 [] = { 3333333333333333,  }
+VQMOVN:34:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQMOVN:35:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQMOVN:36:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQMOVN:37:result_uint64x1 [] = { 3333333333333333,  }
+VQMOVN:38:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQMOVN:39:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQMOVN:40:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQMOVN:41:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQMOVN:42:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQMOVN:43:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQMOVN:44:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQMOVN:45:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQMOVN:46:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQMOVN:47:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQMOVUN overflow output:
+VQMOVUN:0:vqmovun_s16 Neon overflow 0
+VQMOVUN:1:vqmovun_s32 Neon overflow 0
+VQMOVUN:2:vqmovun_s64 Neon overflow 0
+
+VQMOVUN output:
+VQMOVUN:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQMOVUN:4:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQMOVUN:5:result_int32x2 [] = { 33333333, 33333333,  }
+VQMOVUN:6:result_int64x1 [] = { 3333333333333333,  }
+VQMOVUN:7:result_uint8x8 [] = { 34, 34, 34, 34, 34, 34, 34, 34,  }
+VQMOVUN:8:result_uint16x4 [] = { 5678, 5678, 5678, 5678,  }
+VQMOVUN:9:result_uint32x2 [] = { 12345678, 12345678,  }
+VQMOVUN:10:result_uint64x1 [] = { 3333333333333333,  }
+VQMOVUN:11:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQMOVUN:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQMOVUN:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQMOVUN:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQMOVUN:15:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQMOVUN:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQMOVUN:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQMOVUN:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQMOVUN:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQMOVUN:20:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQMOVUN (negative input) overflow output:
+VQMOVUN:21:vqmovun_s16 Neon overflow 1
+VQMOVUN:22:vqmovun_s32 Neon overflow 1
+VQMOVUN:23:vqmovun_s64 Neon overflow 1
+
+VQMOVUN (negative input) output:
+VQMOVUN:24:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQMOVUN:25:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQMOVUN:26:result_int32x2 [] = { 33333333, 33333333,  }
+VQMOVUN:27:result_int64x1 [] = { 3333333333333333,  }
+VQMOVUN:28:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQMOVUN:29:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VQMOVUN:30:result_uint32x2 [] = { 0, 0,  }
+VQMOVUN:31:result_uint64x1 [] = { 3333333333333333,  }
+VQMOVUN:32:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQMOVUN:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQMOVUN:34:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQMOVUN:35:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQMOVUN:36:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQMOVUN:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQMOVUN:38:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQMOVUN:39:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQMOVUN:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQMOVUN:41:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHR_N output:
+VRSHR_N:0:result_int8x8 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc,  }
+VRSHR_N:1:result_int16x4 [] = { 0, 0, 0, 0,  }
+VRSHR_N:2:result_int32x2 [] = { fffffffc, fffffffc,  }
+VRSHR_N:3:result_int64x1 [] = { 0,  }
+VRSHR_N:4:result_uint8x8 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e,  }
+VRSHR_N:5:result_uint16x4 [] = { 1ffe, 1ffe, 1ffe, 1ffe,  }
+VRSHR_N:6:result_uint32x2 [] = { 8000000, 8000000,  }
+VRSHR_N:7:result_uint64x1 [] = { 80000000,  }
+VRSHR_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHR_N:9:result_int8x16 [] = { fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb, fffffffc, fffffffc, fffffffd, fffffffd, fffffffe, fffffffe, ffffffff, ffffffff, 0,  }
+VRSHR_N:10:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHR_N:11:result_int32x4 [] = { fffffffc, fffffffc, fffffffd, fffffffd,  }
+VRSHR_N:12:result_int64x2 [] = { 0, 0,  }
+VRSHR_N:13:result_uint8x16 [] = { 3c, 3c, 3d, 3d, 3d, 3d, 3e, 3e, 3e, 3e, 3f, 3f, 3f, 3f, 40, 40,  }
+VRSHR_N:14:result_uint16x8 [] = { 1ffe, 1ffe, 1ffe, 1ffe, 1fff, 1fff, 1fff, 1fff,  }
+VRSHR_N:15:result_uint32x4 [] = { 8000000, 8000000, 8000000, 8000000,  }
+VRSHR_N:16:result_uint64x2 [] = { 80000000, 80000000,  }
+VRSHR_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHR_N (overflow test: max shift amount, positive input) output:
+VRSHR_N:18:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHR_N:19:result_int16x4 [] = { 0, 0, 0, 0,  }
+VRSHR_N:20:result_int32x2 [] = { 0, 0,  }
+VRSHR_N:21:result_int64x1 [] = { 0,  }
+VRSHR_N:22:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSHR_N:23:result_uint16x4 [] = { 1, 1, 1, 1,  }
+VRSHR_N:24:result_uint32x2 [] = { 1, 1,  }
+VRSHR_N:25:result_uint64x1 [] = { 1,  }
+VRSHR_N:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHR_N:27:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHR_N:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSHR_N:29:result_int32x4 [] = { 0, 0, 0, 0,  }
+VRSHR_N:30:result_int64x2 [] = { 0, 0,  }
+VRSHR_N:31:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSHR_N:32:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSHR_N:33:result_uint32x4 [] = { 1, 1, 1, 1,  }
+VRSHR_N:34:result_uint64x2 [] = { 1, 1,  }
+VRSHR_N:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHR_N (overflow test: shift by 1, with negative input) output:
+VRSHR_N:36:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40,  }
+VRSHR_N:37:result_int16x4 [] = { 4000, 4000, 4000, 4000,  }
+VRSHR_N:38:result_int32x2 [] = { 40000000, 40000000,  }
+VRSHR_N:39:result_int64x1 [] = { 4000000000000000,  }
+VRSHR_N:40:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80,  }
+VRSHR_N:41:result_uint16x4 [] = { 8000, 8000, 8000, 8000,  }
+VRSHR_N:42:result_uint32x2 [] = { 80000000, 80000000,  }
+VRSHR_N:43:result_uint64x1 [] = { 8000000000000000,  }
+VRSHR_N:44:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHR_N:45:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,  }
+VRSHR_N:46:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000,  }
+VRSHR_N:47:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000,  }
+VRSHR_N:48:result_int64x2 [] = { 4000000000000000, 4000000000000000,  }
+VRSHR_N:49:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80,  }
+VRSHR_N:50:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000,  }
+VRSHR_N:51:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+VRSHR_N:52:result_uint64x2 [] = { 8000000000000000, 8000000000000000,  }
+VRSHR_N:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHR_N (overflow test: shift by 3, positive input) output:
+VRSHR_N:54:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10,  }
+VRSHR_N:55:result_int16x4 [] = { 1000, 1000, 1000, 1000,  }
+VRSHR_N:56:result_int32x2 [] = { 10000000, 10000000,  }
+VRSHR_N:57:result_int64x1 [] = { 1000000000000000,  }
+VRSHR_N:58:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20,  }
+VRSHR_N:59:result_uint16x4 [] = { 2000, 2000, 2000, 2000,  }
+VRSHR_N:60:result_uint32x2 [] = { 20000000, 20000000,  }
+VRSHR_N:61:result_uint64x1 [] = { 2000000000000000,  }
+VRSHR_N:62:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHR_N:63:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  }
+VRSHR_N:64:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,  }
+VRSHR_N:65:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000,  }
+VRSHR_N:66:result_int64x2 [] = { 1000000000000000, 1000000000000000,  }
+VRSHR_N:67:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,  }
+VRSHR_N:68:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,  }
+VRSHR_N:69:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000,  }
+VRSHR_N:70:result_uint64x2 [] = { 2000000000000000, 2000000000000000,  }
+VRSHR_N:71:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHR_N (overflow test: shift by 1, with negative input) output:
+VRSHR_N:72:result_int8x8 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0,  }
+VRSHR_N:73:result_int16x4 [] = { ffffc000, ffffc000, ffffc000, ffffc000,  }
+VRSHR_N:74:result_int32x2 [] = { c0000000, c0000000,  }
+VRSHR_N:75:result_int64x1 [] = { c000000000000000,  }
+VRSHR_N:76:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80,  }
+VRSHR_N:77:result_uint16x4 [] = { 8000, 8000, 8000, 8000,  }
+VRSHR_N:78:result_uint32x2 [] = { 80000000, 80000000,  }
+VRSHR_N:79:result_uint64x1 [] = { 8000000000000000,  }
+VRSHR_N:80:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHR_N:81:result_int8x16 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0,  }
+VRSHR_N:82:result_int16x8 [] = { ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000,  }
+VRSHR_N:83:result_int32x4 [] = { c0000000, c0000000, c0000000, c0000000,  }
+VRSHR_N:84:result_int64x2 [] = { c000000000000000, c000000000000000,  }
+VRSHR_N:85:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80,  }
+VRSHR_N:86:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000,  }
+VRSHR_N:87:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+VRSHR_N:88:result_uint64x2 [] = { 8000000000000000, 8000000000000000,  }
+VRSHR_N:89:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSHR_N (overflow test: shift by 3, with negative input) output:
+VRSHR_N:90:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VRSHR_N:91:result_int16x4 [] = { fffff000, fffff000, fffff000, fffff000,  }
+VRSHR_N:92:result_int32x2 [] = { f0000000, f0000000,  }
+VRSHR_N:93:result_int64x1 [] = { f000000000000000,  }
+VRSHR_N:94:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20,  }
+VRSHR_N:95:result_uint16x4 [] = { 2000, 2000, 2000, 2000,  }
+VRSHR_N:96:result_uint32x2 [] = { 20000000, 20000000,  }
+VRSHR_N:97:result_uint64x1 [] = { 2000000000000000,  }
+VRSHR_N:98:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSHR_N:99:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VRSHR_N:100:result_int16x8 [] = { fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000,  }
+VRSHR_N:101:result_int32x4 [] = { f0000000, f0000000, f0000000, f0000000,  }
+VRSHR_N:102:result_int64x2 [] = { f000000000000000, f000000000000000,  }
+VRSHR_N:103:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,  }
+VRSHR_N:104:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,  }
+VRSHR_N:105:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000,  }
+VRSHR_N:106:result_uint64x2 [] = { 2000000000000000, 2000000000000000,  }
+VRSHR_N:107:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSRA_N output:
+VRSRA_N:0:result_int8x8 [] = { fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0,  }
+VRSRA_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VRSRA_N:2:result_int32x2 [] = { fffffffd, fffffffe,  }
+VRSRA_N:3:result_int64x1 [] = { fffffffffffffff0,  }
+VRSRA_N:4:result_uint8x8 [] = { 5, 6, 7, 8, 9, a, b, c,  }
+VRSRA_N:5:result_uint16x4 [] = { fffd, fffe, ffff, 0,  }
+VRSRA_N:6:result_uint32x2 [] = { fffffff4, fffffff5,  }
+VRSRA_N:7:result_uint64x1 [] = { fffffffffffffff0,  }
+VRSRA_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSRA_N:9:result_int8x16 [] = { fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 0, 1, 2, 3, 4, 5, 6, 7, 8,  }
+VRSRA_N:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7,  }
+VRSRA_N:11:result_int32x4 [] = { fffffffd, fffffffe, ffffffff, 0,  }
+VRSRA_N:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VRSRA_N:13:result_uint8x16 [] = { 5, 6, 7, 8, 9, a, b, c, d, e, f, 10, 11, 12, 13, 14,  }
+VRSRA_N:14:result_uint16x8 [] = { fffd, fffe, ffff, 0, 1, 2, 3, 4,  }
+VRSRA_N:15:result_uint32x4 [] = { fffffff4, fffffff5, fffffff6, fffffff7,  }
+VRSRA_N:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1,  }
+VRSRA_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSRA_N (checking overflow: shift by 1, positive input) output:
+VRSRA_N:18:result_int8x8 [] = { 40, 40, 40, 40, 40, 40, 40, 40,  }
+VRSRA_N:19:result_int16x4 [] = { 4000, 4000, 4000, 4000,  }
+VRSRA_N:20:result_int32x2 [] = { 40000000, 40000000,  }
+VRSRA_N:21:result_int64x1 [] = { 4000000000000000,  }
+VRSRA_N:22:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80,  }
+VRSRA_N:23:result_uint16x4 [] = { 8000, 8000, 8000, 8000,  }
+VRSRA_N:24:result_uint32x2 [] = { 80000000, 80000000,  }
+VRSRA_N:25:result_uint64x1 [] = { 8000000000000000,  }
+VRSRA_N:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSRA_N:27:result_int8x16 [] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,  }
+VRSRA_N:28:result_int16x8 [] = { 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000,  }
+VRSRA_N:29:result_int32x4 [] = { 40000000, 40000000, 40000000, 40000000,  }
+VRSRA_N:30:result_int64x2 [] = { 4000000000000000, 4000000000000000,  }
+VRSRA_N:31:result_uint8x16 [] = { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80,  }
+VRSRA_N:32:result_uint16x8 [] = { 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000,  }
+VRSRA_N:33:result_uint32x4 [] = { 80000000, 80000000, 80000000, 80000000,  }
+VRSRA_N:34:result_uint64x2 [] = { 8000000000000000, 8000000000000000,  }
+VRSRA_N:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSRA_N (checking overflow: shift by 3, positive input) output:
+VRSRA_N:36:result_int8x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10,  }
+VRSRA_N:37:result_int16x4 [] = { 1000, 1000, 1000, 1000,  }
+VRSRA_N:38:result_int32x2 [] = { 10000000, 10000000,  }
+VRSRA_N:39:result_int64x1 [] = { 1000000000000000,  }
+VRSRA_N:40:result_uint8x8 [] = { 20, 20, 20, 20, 20, 20, 20, 20,  }
+VRSRA_N:41:result_uint16x4 [] = { 2000, 2000, 2000, 2000,  }
+VRSRA_N:42:result_uint32x2 [] = { 20000000, 20000000,  }
+VRSRA_N:43:result_uint64x1 [] = { 2000000000000000,  }
+VRSRA_N:44:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSRA_N:45:result_int8x16 [] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  }
+VRSRA_N:46:result_int16x8 [] = { 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,  }
+VRSRA_N:47:result_int32x4 [] = { 10000000, 10000000, 10000000, 10000000,  }
+VRSRA_N:48:result_int64x2 [] = { 1000000000000000, 1000000000000000,  }
+VRSRA_N:49:result_uint8x16 [] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,  }
+VRSRA_N:50:result_uint16x8 [] = { 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,  }
+VRSRA_N:51:result_uint32x4 [] = { 20000000, 20000000, 20000000, 20000000,  }
+VRSRA_N:52:result_uint64x2 [] = { 2000000000000000, 2000000000000000,  }
+VRSRA_N:53:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSRA_N (checking overflow: shift by max, positive input) output:
+VRSRA_N:54:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSRA_N:55:result_int16x4 [] = { 0, 0, 0, 0,  }
+VRSRA_N:56:result_int32x2 [] = { 0, 0,  }
+VRSRA_N:57:result_int64x1 [] = { 0,  }
+VRSRA_N:58:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSRA_N:59:result_uint16x4 [] = { 1, 1, 1, 1,  }
+VRSRA_N:60:result_uint32x2 [] = { 1, 1,  }
+VRSRA_N:61:result_uint64x1 [] = { 1,  }
+VRSRA_N:62:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSRA_N:63:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSRA_N:64:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSRA_N:65:result_int32x4 [] = { 0, 0, 0, 0,  }
+VRSRA_N:66:result_int64x2 [] = { 0, 0,  }
+VRSRA_N:67:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSRA_N:68:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSRA_N:69:result_uint32x4 [] = { 1, 1, 1, 1,  }
+VRSRA_N:70:result_uint64x2 [] = { 1, 1,  }
+VRSRA_N:71:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSRA_N (checking overflow: shift by 1, negative input) output:
+VRSRA_N:72:result_int8x8 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0,  }
+VRSRA_N:73:result_int16x4 [] = { ffffc000, ffffc000, ffffc000, ffffc000,  }
+VRSRA_N:74:result_int32x2 [] = { c0000000, c0000000,  }
+VRSRA_N:75:result_int64x1 [] = { c000000000000000,  }
+VRSRA_N:76:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSRA_N:77:result_uint16x4 [] = { 1, 1, 1, 1,  }
+VRSRA_N:78:result_uint32x2 [] = { 1, 1,  }
+VRSRA_N:79:result_uint64x1 [] = { 1,  }
+VRSRA_N:80:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSRA_N:81:result_int8x16 [] = { ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0, ffffffc0,  }
+VRSRA_N:82:result_int16x8 [] = { ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000, ffffc000,  }
+VRSRA_N:83:result_int32x4 [] = { c0000000, c0000000, c0000000, c0000000,  }
+VRSRA_N:84:result_int64x2 [] = { c000000000000000, c000000000000000,  }
+VRSRA_N:85:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSRA_N:86:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSRA_N:87:result_uint32x4 [] = { 1, 1, 1, 1,  }
+VRSRA_N:88:result_uint64x2 [] = { 1, 1,  }
+VRSRA_N:89:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSRA_N (checking overflow: shift by max, negative input) output:
+VRSRA_N:90:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VRSRA_N:91:result_int16x4 [] = { fffff000, fffff000, fffff000, fffff000,  }
+VRSRA_N:92:result_int32x2 [] = { f0000000, f0000000,  }
+VRSRA_N:93:result_int64x1 [] = { f000000000000000,  }
+VRSRA_N:94:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSRA_N:95:result_uint16x4 [] = { 1, 1, 1, 1,  }
+VRSRA_N:96:result_uint32x2 [] = { 1, 1,  }
+VRSRA_N:97:result_uint64x1 [] = { 1,  }
+VRSRA_N:98:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSRA_N:99:result_int8x16 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0,  }
+VRSRA_N:100:result_int16x8 [] = { fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000, fffff000,  }
+VRSRA_N:101:result_int32x4 [] = { f0000000, f0000000, f0000000, f0000000,  }
+VRSRA_N:102:result_int64x2 [] = { f000000000000000, f000000000000000,  }
+VRSRA_N:103:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSRA_N:104:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSRA_N:105:result_uint32x4 [] = { 1, 1, 1, 1,  }
+VRSRA_N:106:result_uint64x2 [] = { 1, 1,  }
+VRSRA_N:107:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRSRA_N (checking overflow: shift by max, negative input) output:
+VRSRA_N:108:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSRA_N:109:result_int16x4 [] = { 0, 0, 0, 0,  }
+VRSRA_N:110:result_int32x2 [] = { 0, 0,  }
+VRSRA_N:111:result_int64x1 [] = { 0,  }
+VRSRA_N:112:result_uint8x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSRA_N:113:result_uint16x4 [] = { 1, 1, 1, 1,  }
+VRSRA_N:114:result_uint32x2 [] = { 1, 1,  }
+VRSRA_N:115:result_uint64x1 [] = { 1,  }
+VRSRA_N:116:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VRSRA_N:117:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSRA_N:118:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VRSRA_N:119:result_int32x4 [] = { 0, 0, 0, 0,  }
+VRSRA_N:120:result_int64x2 [] = { 0, 0,  }
+VRSRA_N:121:result_uint8x16 [] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSRA_N:122:result_uint16x8 [] = { 1, 1, 1, 1, 1, 1, 1, 1,  }
+VRSRA_N:123:result_uint32x4 [] = { 1, 1, 1, 1,  }
+VRSRA_N:124:result_uint64x2 [] = { 1, 1,  }
+VRSRA_N:125:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VSHLL_N output:
+VSHLL_N:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSHLL_N:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VSHLL_N:2:result_int32x2 [] = { 33333333, 33333333,  }
+VSHLL_N:3:result_int64x1 [] = { 3333333333333333,  }
+VSHLL_N:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSHLL_N:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VSHLL_N:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VSHLL_N:7:result_uint64x1 [] = { 3333333333333333,  }
+VSHLL_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VSHLL_N:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSHLL_N:10:result_int16x8 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6, ffffffe8, ffffffea, ffffffec, ffffffee,  }
+VSHLL_N:11:result_int32x4 [] = { ffffffe0, ffffffe2, ffffffe4, ffffffe6,  }
+VSHLL_N:12:result_int64x2 [] = { ffffffffffffff80, ffffffffffffff88,  }
+VSHLL_N:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VSHLL_N:14:result_uint16x8 [] = { 3c0, 3c4, 3c8, 3cc, 3d0, 3d4, 3d8, 3dc,  }
+VSHLL_N:15:result_uint32x4 [] = { fff00, fff10, fff20, fff30,  }
+VSHLL_N:16:result_uint64x2 [] = { 7ffffff80, 7ffffff88,  }
+VSHLL_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VPADDL/VPADDLQ output:
+VPADDL/VPADDLQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPADDL/VPADDLQ:1:result_int16x4 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed,  }
+VPADDL/VPADDLQ:2:result_int32x2 [] = { ffffffe1, ffffffe5,  }
+VPADDL/VPADDLQ:3:result_int64x1 [] = { ffffffffffffffe1,  }
+VPADDL/VPADDLQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPADDL/VPADDLQ:5:result_uint16x4 [] = { 1e1, 1e5, 1e9, 1ed,  }
+VPADDL/VPADDLQ:6:result_uint32x2 [] = { 1ffe1, 1ffe5,  }
+VPADDL/VPADDLQ:7:result_uint64x1 [] = { 1ffffffe1,  }
+VPADDL/VPADDLQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VPADDL/VPADDLQ:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPADDL/VPADDLQ:10:result_int16x8 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, fffffff1, fffffff5, fffffff9, fffffffd,  }
+VPADDL/VPADDLQ:11:result_int32x4 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed,  }
+VPADDL/VPADDLQ:12:result_int64x2 [] = { ffffffffffffffe1, ffffffffffffffe5,  }
+VPADDL/VPADDLQ:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPADDL/VPADDLQ:14:result_uint16x8 [] = { 1e1, 1e5, 1e9, 1ed, 1f1, 1f5, 1f9, 1fd,  }
+VPADDL/VPADDLQ:15:result_uint32x4 [] = { 1ffe1, 1ffe5, 1ffe9, 1ffed,  }
+VPADDL/VPADDLQ:16:result_uint64x2 [] = { 1ffffffe1, 1ffffffe5,  }
+VPADDL/VPADDLQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VPADD output:
+VPADD:0:result_int8x8 [] = { ffffffe1, ffffffe5, ffffffe9, ffffffed, ffffffe1, ffffffe5, ffffffe9, ffffffed,  }
+VPADD:1:result_int16x4 [] = { ffffffe1, ffffffe5, ffffffe1, ffffffe5,  }
+VPADD:2:result_int32x2 [] = { ffffffe1, ffffffe1,  }
+VPADD:3:result_int64x1 [] = { 3333333333333333,  }
+VPADD:4:result_uint8x8 [] = { e1, e5, e9, ed, e1, e5, e9, ed,  }
+VPADD:5:result_uint16x4 [] = { ffe1, ffe5, ffe1, ffe5,  }
+VPADD:6:result_uint32x2 [] = { ffffffe1, ffffffe1,  }
+VPADD:7:result_uint64x1 [] = { 3333333333333333,  }
+VPADD:8:result_float32x2 [] = { c1f80000 -0x1.fp+4 -31, c1f80000 -0x1.fp+4 -31,  }
+VPADD:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPADD:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VPADD:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VPADD:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VPADD:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPADD:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VPADD:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VPADD:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VPADD:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VPADAL/VPADALQ output:
+VPADAL/VPADALQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPADAL/VPADALQ:1:result_int16x4 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0,  }
+VPADAL/VPADALQ:2:result_int32x2 [] = { ffffffd1, ffffffd6,  }
+VPADAL/VPADALQ:3:result_int64x1 [] = { ffffffffffffffd1,  }
+VPADAL/VPADALQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPADAL/VPADALQ:5:result_uint16x4 [] = { 1d1, 1d6, 1db, 1e0,  }
+VPADAL/VPADALQ:6:result_uint32x2 [] = { 1ffd1, 1ffd6,  }
+VPADAL/VPADALQ:7:result_uint64x1 [] = { 1ffffffd1,  }
+VPADAL/VPADALQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VPADAL/VPADALQ:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPADAL/VPADALQ:10:result_int16x8 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0, ffffffe5, ffffffea, ffffffef, fffffff4,  }
+VPADAL/VPADALQ:11:result_int32x4 [] = { ffffffd1, ffffffd6, ffffffdb, ffffffe0,  }
+VPADAL/VPADALQ:12:result_int64x2 [] = { ffffffffffffffd1, ffffffffffffffd6,  }
+VPADAL/VPADALQ:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPADAL/VPADALQ:14:result_uint16x8 [] = { 1d1, 1d6, 1db, 1e0, 1e5, 1ea, 1ef, 1f4,  }
+VPADAL/VPADALQ:15:result_uint32x4 [] = { 1ffd1, 1ffd6, 1ffdb, 1ffe0,  }
+VPADAL/VPADALQ:16:result_uint64x2 [] = { 1ffffffd1, 1ffffffd6,  }
+VPADAL/VPADALQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHLU_N/VQSHLUQ_N (negative input) overflow output:
+VQSHLU_N/VQSHLUQ_N:0:vqshlu_n_s8 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:1:vqshlu_n_s16 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:2:vqshlu_n_s32 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:3:vqshlu_n_s64 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:4:vqshluq_n_s8 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:5:vqshluq_n_s16 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:6:vqshluq_n_s32 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:7:vqshluq_n_s64 Neon overflow 1
+
+VQSHLU_N/VQSHLUQ_N (negative input) output:
+VQSHLU_N/VQSHLUQ_N:8:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHLU_N/VQSHLUQ_N:9:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQSHLU_N/VQSHLUQ_N:10:result_int32x2 [] = { 33333333, 33333333,  }
+VQSHLU_N/VQSHLUQ_N:11:result_int64x1 [] = { 3333333333333333,  }
+VQSHLU_N/VQSHLUQ_N:12:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHLU_N/VQSHLUQ_N:13:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VQSHLU_N/VQSHLUQ_N:14:result_uint32x2 [] = { 0, 0,  }
+VQSHLU_N/VQSHLUQ_N:15:result_uint64x1 [] = { 0,  }
+VQSHLU_N/VQSHLUQ_N:16:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHLU_N/VQSHLUQ_N:17:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHLU_N/VQSHLUQ_N:18:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHLU_N/VQSHLUQ_N:19:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHLU_N/VQSHLUQ_N:20:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHLU_N/VQSHLUQ_N:21:result_uint8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHLU_N/VQSHLUQ_N:22:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHLU_N/VQSHLUQ_N:23:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VQSHLU_N/VQSHLUQ_N:24:result_uint64x2 [] = { 0, 0,  }
+VQSHLU_N/VQSHLUQ_N:25:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 1) overflow output:
+VQSHLU_N/VQSHLUQ_N:26:vqshlu_n_s8 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:27:vqshlu_n_s16 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:28:vqshlu_n_s32 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:29:vqshlu_n_s64 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:30:vqshluq_n_s8 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:31:vqshluq_n_s16 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:32:vqshluq_n_s32 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:33:vqshluq_n_s64 Neon overflow 0
+
+VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 1) output:
+VQSHLU_N/VQSHLUQ_N:34:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHLU_N/VQSHLUQ_N:35:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQSHLU_N/VQSHLUQ_N:36:result_int32x2 [] = { 33333333, 33333333,  }
+VQSHLU_N/VQSHLUQ_N:37:result_int64x1 [] = { 3333333333333333,  }
+VQSHLU_N/VQSHLUQ_N:38:result_uint8x8 [] = { fe, fe, fe, fe, fe, fe, fe, fe,  }
+VQSHLU_N/VQSHLUQ_N:39:result_uint16x4 [] = { fffe, fffe, fffe, fffe,  }
+VQSHLU_N/VQSHLUQ_N:40:result_uint32x2 [] = { fffffffe, fffffffe,  }
+VQSHLU_N/VQSHLUQ_N:41:result_uint64x1 [] = { fffffffffffffffe,  }
+VQSHLU_N/VQSHLUQ_N:42:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHLU_N/VQSHLUQ_N:43:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHLU_N/VQSHLUQ_N:44:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHLU_N/VQSHLUQ_N:45:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHLU_N/VQSHLUQ_N:46:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHLU_N/VQSHLUQ_N:47:result_uint8x16 [] = { fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe, fe,  }
+VQSHLU_N/VQSHLUQ_N:48:result_uint16x8 [] = { fffe, fffe, fffe, fffe, fffe, fffe, fffe, fffe,  }
+VQSHLU_N/VQSHLUQ_N:49:result_uint32x4 [] = { fffffffe, fffffffe, fffffffe, fffffffe,  }
+VQSHLU_N/VQSHLUQ_N:50:result_uint64x2 [] = { fffffffffffffffe, fffffffffffffffe,  }
+VQSHLU_N/VQSHLUQ_N:51:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 2) overflow output:
+VQSHLU_N/VQSHLUQ_N:52:vqshlu_n_s8 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:53:vqshlu_n_s16 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:54:vqshlu_n_s32 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:55:vqshlu_n_s64 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:56:vqshluq_n_s8 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:57:vqshluq_n_s16 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:58:vqshluq_n_s32 Neon overflow 1
+VQSHLU_N/VQSHLUQ_N:59:vqshluq_n_s64 Neon overflow 1
+
+VQSHLU_N/VQSHLUQ_N (check saturation/overflow: shift by 2) output:
+VQSHLU_N/VQSHLUQ_N:60:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHLU_N/VQSHLUQ_N:61:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQSHLU_N/VQSHLUQ_N:62:result_int32x2 [] = { 33333333, 33333333,  }
+VQSHLU_N/VQSHLUQ_N:63:result_int64x1 [] = { 3333333333333333,  }
+VQSHLU_N/VQSHLUQ_N:64:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHLU_N/VQSHLUQ_N:65:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQSHLU_N/VQSHLUQ_N:66:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQSHLU_N/VQSHLUQ_N:67:result_uint64x1 [] = { ffffffffffffffff,  }
+VQSHLU_N/VQSHLUQ_N:68:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHLU_N/VQSHLUQ_N:69:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHLU_N/VQSHLUQ_N:70:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHLU_N/VQSHLUQ_N:71:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHLU_N/VQSHLUQ_N:72:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHLU_N/VQSHLUQ_N:73:result_uint8x16 [] = { ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHLU_N/VQSHLUQ_N:74:result_uint16x8 [] = { ffff, ffff, ffff, ffff, ffff, ffff, ffff, ffff,  }
+VQSHLU_N/VQSHLUQ_N:75:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+VQSHLU_N/VQSHLUQ_N:76:result_uint64x2 [] = { ffffffffffffffff, ffffffffffffffff,  }
+VQSHLU_N/VQSHLUQ_N:77:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHLU_N/VQSHLUQ_N overflow output:
+VQSHLU_N/VQSHLUQ_N:78:vqshlu_n_s8 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:79:vqshlu_n_s16 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:80:vqshlu_n_s32 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:81:vqshlu_n_s64 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:82:vqshluq_n_s8 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:83:vqshluq_n_s16 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:84:vqshluq_n_s32 Neon overflow 0
+VQSHLU_N/VQSHLUQ_N:85:vqshluq_n_s64 Neon overflow 0
+
+VQSHLU_N/VQSHLUQ_N output:
+VQSHLU_N/VQSHLUQ_N:86:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHLU_N/VQSHLUQ_N:87:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQSHLU_N/VQSHLUQ_N:88:result_int32x2 [] = { 33333333, 33333333,  }
+VQSHLU_N/VQSHLUQ_N:89:result_int64x1 [] = { 3333333333333333,  }
+VQSHLU_N/VQSHLUQ_N:90:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2,  }
+VQSHLU_N/VQSHLUQ_N:91:result_uint16x4 [] = { 8, 8, 8, 8,  }
+VQSHLU_N/VQSHLUQ_N:92:result_uint32x2 [] = { 18, 18,  }
+VQSHLU_N/VQSHLUQ_N:93:result_uint64x1 [] = { 40,  }
+VQSHLU_N/VQSHLUQ_N:94:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHLU_N/VQSHLUQ_N:95:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHLU_N/VQSHLUQ_N:96:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHLU_N/VQSHLUQ_N:97:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHLU_N/VQSHLUQ_N:98:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHLU_N/VQSHLUQ_N:99:result_uint8x16 [] = { a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0, a0,  }
+VQSHLU_N/VQSHLUQ_N:100:result_uint16x8 [] = { 180, 180, 180, 180, 180, 180, 180, 180,  }
+VQSHLU_N/VQSHLUQ_N:101:result_uint32x4 [] = { 380, 380, 380, 380,  }
+VQSHLU_N/VQSHLUQ_N:102:result_uint64x2 [] = { 800, 800,  }
+VQSHLU_N/VQSHLUQ_N:103:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VCLZ/VCLZQ output:
+VCLZ/VCLZQ:0:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VCLZ/VCLZQ:1:result_int16x4 [] = { 3, 3, 3, 3,  }
+VCLZ/VCLZQ:2:result_int32x2 [] = { 11, 11,  }
+VCLZ/VCLZQ:3:result_int64x1 [] = { 3333333333333333,  }
+VCLZ/VCLZQ:4:result_uint8x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2,  }
+VCLZ/VCLZQ:5:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VCLZ/VCLZQ:6:result_uint32x2 [] = { 5, 5,  }
+VCLZ/VCLZQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VCLZ/VCLZQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VCLZ/VCLZQ:9:result_int8x16 [] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  }
+VCLZ/VCLZQ:10:result_int16x8 [] = { 3, 3, 3, 3, 3, 3, 3, 3,  }
+VCLZ/VCLZQ:11:result_int32x4 [] = { 3, 3, 3, 3,  }
+VCLZ/VCLZQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VCLZ/VCLZQ:13:result_uint8x16 [] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  }
+VCLZ/VCLZQ:14:result_uint16x8 [] = { d, d, d, d, d, d, d, d,  }
+VCLZ/VCLZQ:15:result_uint32x4 [] = { 1f, 1f, 1f, 1f,  }
+VCLZ/VCLZQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VCLZ/VCLZQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VCLS/VCLSQ (positive input) output:
+VCLS/VCLSQ:0:result_int8x8 [] = { 6, 6, 6, 6, 6, 6, 6, 6,  }
+VCLS/VCLSQ:1:result_int16x4 [] = { 2, 2, 2, 2,  }
+VCLS/VCLSQ:2:result_int32x2 [] = { 19, 19,  }
+VCLS/VCLSQ:3:result_int64x1 [] = { 3333333333333333,  }
+VCLS/VCLSQ:4:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VCLS/VCLSQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VCLS/VCLSQ:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VCLS/VCLSQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VCLS/VCLSQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VCLS/VCLSQ:9:result_int8x16 [] = { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,  }
+VCLS/VCLSQ:10:result_int16x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2,  }
+VCLS/VCLSQ:11:result_int32x4 [] = { 14, 14, 14, 14,  }
+VCLS/VCLSQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VCLS/VCLSQ:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VCLS/VCLSQ:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VCLS/VCLSQ:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VCLS/VCLSQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VCLS/VCLSQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VCLS/VCLSQ (positive input) output:
+VCLS/VCLSQ:18:result_int8x8 [] = { 7, 7, 7, 7, 7, 7, 7, 7,  }
+VCLS/VCLSQ:19:result_int16x4 [] = { 1, 1, 1, 1,  }
+VCLS/VCLSQ:20:result_int32x2 [] = { 1, 1,  }
+VCLS/VCLSQ:21:result_int64x1 [] = { 3333333333333333,  }
+VCLS/VCLSQ:22:result_uint8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VCLS/VCLSQ:23:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VCLS/VCLSQ:24:result_uint32x2 [] = { 33333333, 33333333,  }
+VCLS/VCLSQ:25:result_uint64x1 [] = { 3333333333333333,  }
+VCLS/VCLSQ:26:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VCLS/VCLSQ:27:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VCLS/VCLSQ:28:result_int16x8 [] = { 2, 2, 2, 2, 2, 2, 2, 2,  }
+VCLS/VCLSQ:29:result_int32x4 [] = { 0, 0, 0, 0,  }
+VCLS/VCLSQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VCLS/VCLSQ:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VCLS/VCLSQ:32:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VCLS/VCLSQ:33:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VCLS/VCLSQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VCLS/VCLSQ:35:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VCNT/VCNTQ output:
+VCNT/VCNTQ:0:result_int8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8,  }
+VCNT/VCNTQ:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VCNT/VCNTQ:2:result_int32x2 [] = { 33333333, 33333333,  }
+VCNT/VCNTQ:3:result_int64x1 [] = { 3333333333333333,  }
+VCNT/VCNTQ:4:result_uint8x8 [] = { 3, 3, 3, 3, 3, 3, 3, 3,  }
+VCNT/VCNTQ:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VCNT/VCNTQ:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VCNT/VCNTQ:7:result_uint64x1 [] = { 3333333333333333,  }
+VCNT/VCNTQ:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VCNT/VCNTQ:9:result_int8x16 [] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  }
+VCNT/VCNTQ:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VCNT/VCNTQ:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VCNT/VCNTQ:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VCNT/VCNTQ:13:result_uint8x16 [] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,  }
+VCNT/VCNTQ:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VCNT/VCNTQ:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VCNT/VCNTQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VCNT/VCNTQ:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHRN_N overflow output:
+VQSHRN_N:0:vqshrn_n_s16 Neon overflow 0
+VQSHRN_N:1:vqshrn_n_s32 Neon overflow 0
+VQSHRN_N:2:vqshrn_n_s64 Neon overflow 0
+VQSHRN_N:3:vqshrn_n_u16 Neon overflow 1
+VQSHRN_N:4:vqshrn_n_u32 Neon overflow 1
+VQSHRN_N:5:vqshrn_n_u64 Neon overflow 1
+
+VQSHRN_N output:
+VQSHRN_N:6:result_int8x8 [] = { fffffff8, fffffff8, fffffff9, fffffff9, fffffffa, fffffffa, fffffffb, fffffffb,  }
+VQSHRN_N:7:result_int16x4 [] = { fffffff8, fffffff8, fffffff9, fffffff9,  }
+VQSHRN_N:8:result_int32x2 [] = { fffffffc, fffffffc,  }
+VQSHRN_N:9:result_int64x1 [] = { 3333333333333333,  }
+VQSHRN_N:10:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHRN_N:11:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQSHRN_N:12:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQSHRN_N:13:result_uint64x1 [] = { 3333333333333333,  }
+VQSHRN_N:14:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHRN_N:15:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRN_N:16:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHRN_N:17:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHRN_N:18:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHRN_N:19:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRN_N:20:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHRN_N:21:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHRN_N:22:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHRN_N:23:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHRN_N (check saturation: shift by 3) overflow output:
+VQSHRN_N:24:vqshrn_n_s16 Neon overflow 1
+VQSHRN_N:25:vqshrn_n_s32 Neon overflow 1
+VQSHRN_N:26:vqshrn_n_s64 Neon overflow 1
+VQSHRN_N:27:vqshrn_n_u16 Neon overflow 1
+VQSHRN_N:28:vqshrn_n_u32 Neon overflow 1
+VQSHRN_N:29:vqshrn_n_u64 Neon overflow 1
+
+VQSHRN_N (check saturation: shift by 3) output:
+VQSHRN_N:30:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQSHRN_N:31:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQSHRN_N:32:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQSHRN_N:33:result_int64x1 [] = { 3333333333333333,  }
+VQSHRN_N:34:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHRN_N:35:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQSHRN_N:36:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQSHRN_N:37:result_uint64x1 [] = { 3333333333333333,  }
+VQSHRN_N:38:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHRN_N:39:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRN_N:40:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHRN_N:41:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHRN_N:42:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHRN_N:43:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRN_N:44:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHRN_N:45:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHRN_N:46:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHRN_N:47:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHRN_N (check saturation: shift by max) overflow output:
+VQSHRN_N:48:vqshrn_n_s16 Neon overflow 0
+VQSHRN_N:49:vqshrn_n_s32 Neon overflow 0
+VQSHRN_N:50:vqshrn_n_s64 Neon overflow 0
+VQSHRN_N:51:vqshrn_n_u16 Neon overflow 0
+VQSHRN_N:52:vqshrn_n_u32 Neon overflow 0
+VQSHRN_N:53:vqshrn_n_u64 Neon overflow 0
+
+VQSHRN_N (check saturation: shift by max) output:
+VQSHRN_N:54:result_int8x8 [] = { 7f, 7f, 7f, 7f, 7f, 7f, 7f, 7f,  }
+VQSHRN_N:55:result_int16x4 [] = { 7fff, 7fff, 7fff, 7fff,  }
+VQSHRN_N:56:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VQSHRN_N:57:result_int64x1 [] = { 3333333333333333,  }
+VQSHRN_N:58:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHRN_N:59:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQSHRN_N:60:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQSHRN_N:61:result_uint64x1 [] = { 3333333333333333,  }
+VQSHRN_N:62:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHRN_N:63:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRN_N:64:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHRN_N:65:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHRN_N:66:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHRN_N:67:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRN_N:68:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHRN_N:69:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHRN_N:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHRN_N:71:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VPMAX output:
+VPMAX:0:result_int8x8 [] = { fffffff1, fffffff3, fffffff5, fffffff7, fffffff1, fffffff3, fffffff5, fffffff7,  }
+VPMAX:1:result_int16x4 [] = { fffffff1, fffffff3, fffffff1, fffffff3,  }
+VPMAX:2:result_int32x2 [] = { fffffff1, fffffff1,  }
+VPMAX:3:result_int64x1 [] = { 3333333333333333,  }
+VPMAX:4:result_uint8x8 [] = { f1, f3, f5, f7, f1, f3, f5, f7,  }
+VPMAX:5:result_uint16x4 [] = { fff1, fff3, fff1, fff3,  }
+VPMAX:6:result_uint32x2 [] = { fffffff1, fffffff1,  }
+VPMAX:7:result_uint64x1 [] = { 3333333333333333,  }
+VPMAX:8:result_float32x2 [] = { c1700000 -0x1.ep+3 -15, c1700000 -0x1.ep+3 -15,  }
+VPMAX:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPMAX:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VPMAX:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VPMAX:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VPMAX:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPMAX:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VPMAX:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VPMAX:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VPMAX:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VPMIN output:
+VPMIN:0:result_int8x8 [] = { fffffff0, fffffff2, fffffff4, fffffff6, fffffff0, fffffff2, fffffff4, fffffff6,  }
+VPMIN:1:result_int16x4 [] = { fffffff0, fffffff2, fffffff0, fffffff2,  }
+VPMIN:2:result_int32x2 [] = { fffffff0, fffffff0,  }
+VPMIN:3:result_int64x1 [] = { 3333333333333333,  }
+VPMIN:4:result_uint8x8 [] = { f0, f2, f4, f6, f0, f2, f4, f6,  }
+VPMIN:5:result_uint16x4 [] = { fff0, fff2, fff0, fff2,  }
+VPMIN:6:result_uint32x2 [] = { fffffff0, fffffff0,  }
+VPMIN:7:result_uint64x1 [] = { 3333333333333333,  }
+VPMIN:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1800000 -0x1p+4 -16,  }
+VPMIN:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPMIN:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VPMIN:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VPMIN:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VPMIN:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VPMIN:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VPMIN:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VPMIN:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VPMIN:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHRUN_N (negative input) overflow output:
+VQSHRUN_N:0:vqshrun_n_s16 Neon overflow 1
+VQSHRUN_N:1:vqshrun_n_s32 Neon overflow 1
+VQSHRUN_N:2:vqshrun_n_s64 Neon overflow 1
+
+VQSHRUN_N (negative input) output:
+VQSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRUN_N:4:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQSHRUN_N:5:result_int32x2 [] = { 33333333, 33333333,  }
+VQSHRUN_N:6:result_int64x1 [] = { 3333333333333333,  }
+VQSHRUN_N:7:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQSHRUN_N:8:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VQSHRUN_N:9:result_uint32x2 [] = { 0, 0,  }
+VQSHRUN_N:10:result_uint64x1 [] = { 3333333333333333,  }
+VQSHRUN_N:11:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHRUN_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRUN_N:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHRUN_N:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHRUN_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHRUN_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRUN_N:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHRUN_N:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHRUN_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHRUN_N:20:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHRUN_N (check saturation/overflow) overflow output:
+VQSHRUN_N:21:vqshrun_n_s16 Neon overflow 1
+VQSHRUN_N:22:vqshrun_n_s32 Neon overflow 1
+VQSHRUN_N:23:vqshrun_n_s64 Neon overflow 1
+
+VQSHRUN_N (check saturation/overflow) output:
+VQSHRUN_N:24:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRUN_N:25:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQSHRUN_N:26:result_int32x2 [] = { 33333333, 33333333,  }
+VQSHRUN_N:27:result_int64x1 [] = { 3333333333333333,  }
+VQSHRUN_N:28:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQSHRUN_N:29:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQSHRUN_N:30:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQSHRUN_N:31:result_uint64x1 [] = { 3333333333333333,  }
+VQSHRUN_N:32:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHRUN_N:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRUN_N:34:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHRUN_N:35:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHRUN_N:36:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHRUN_N:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRUN_N:38:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHRUN_N:39:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHRUN_N:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHRUN_N:41:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQSHRUN_N overflow output:
+VQSHRUN_N:42:vqshrun_n_s16 Neon overflow 0
+VQSHRUN_N:43:vqshrun_n_s32 Neon overflow 1
+VQSHRUN_N:44:vqshrun_n_s64 Neon overflow 0
+
+VQSHRUN_N output:
+VQSHRUN_N:45:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRUN_N:46:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQSHRUN_N:47:result_int32x2 [] = { 33333333, 33333333,  }
+VQSHRUN_N:48:result_int64x1 [] = { 3333333333333333,  }
+VQSHRUN_N:49:result_uint8x8 [] = { 48, 48, 48, 48, 48, 48, 48, 48,  }
+VQSHRUN_N:50:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VQSHRUN_N:51:result_uint32x2 [] = { deadbe, deadbe,  }
+VQSHRUN_N:52:result_uint64x1 [] = { 3333333333333333,  }
+VQSHRUN_N:53:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQSHRUN_N:54:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRUN_N:55:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHRUN_N:56:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHRUN_N:57:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHRUN_N:58:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQSHRUN_N:59:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQSHRUN_N:60:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQSHRUN_N:61:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQSHRUN_N:62:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHRUN_N (negative input) overflow output:
+VQRSHRUN_N:0:vqrshrun_n_s16 Neon overflow 0
+VQRSHRUN_N:1:vqrshrun_n_s32 Neon overflow 0
+VQRSHRUN_N:2:vqrshrun_n_s64 Neon overflow 1
+
+VQRSHRUN_N (negative input) output:
+VQRSHRUN_N:3:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:4:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:5:result_int32x2 [] = { 33333333, 33333333,  }
+VQRSHRUN_N:6:result_int64x1 [] = { 3333333333333333,  }
+VQRSHRUN_N:7:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHRUN_N:8:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VQRSHRUN_N:9:result_uint32x2 [] = { 0, 0,  }
+VQRSHRUN_N:10:result_uint64x1 [] = { 3333333333333333,  }
+VQRSHRUN_N:11:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHRUN_N:12:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:13:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:14:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRUN_N:15:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRUN_N:16:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:17:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:18:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRUN_N:19:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRUN_N:20:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHRUN_N (check saturation/overflow: shift by 1) overflow output:
+VQRSHRUN_N:21:vqrshrun_n_s16 Neon overflow 1
+VQRSHRUN_N:22:vqrshrun_n_s32 Neon overflow 1
+VQRSHRUN_N:23:vqrshrun_n_s64 Neon overflow 1
+
+VQRSHRUN_N (check saturation/overflow: shift by 1) output:
+VQRSHRUN_N:24:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:25:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:26:result_int32x2 [] = { 33333333, 33333333,  }
+VQRSHRUN_N:27:result_int64x1 [] = { 3333333333333333,  }
+VQRSHRUN_N:28:result_uint8x8 [] = { ff, ff, ff, ff, ff, ff, ff, ff,  }
+VQRSHRUN_N:29:result_uint16x4 [] = { ffff, ffff, ffff, ffff,  }
+VQRSHRUN_N:30:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VQRSHRUN_N:31:result_uint64x1 [] = { 3333333333333333,  }
+VQRSHRUN_N:32:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHRUN_N:33:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:34:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:35:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRUN_N:36:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRUN_N:37:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:38:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:39:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRUN_N:40:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRUN_N:41:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHRUN_N (check saturation/overflow: shift by max, positive input) overflow output:
+VQRSHRUN_N:42:vqrshrun_n_s16 Neon overflow 0
+VQRSHRUN_N:43:vqrshrun_n_s32 Neon overflow 0
+VQRSHRUN_N:44:vqrshrun_n_s64 Neon overflow 0
+
+VQRSHRUN_N (check saturation/overflow: shift by max, positive input) output:
+VQRSHRUN_N:45:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:46:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:47:result_int32x2 [] = { 33333333, 33333333,  }
+VQRSHRUN_N:48:result_int64x1 [] = { 3333333333333333,  }
+VQRSHRUN_N:49:result_uint8x8 [] = { 80, 80, 80, 80, 80, 80, 80, 80,  }
+VQRSHRUN_N:50:result_uint16x4 [] = { 8000, 8000, 8000, 8000,  }
+VQRSHRUN_N:51:result_uint32x2 [] = { 80000000, 80000000,  }
+VQRSHRUN_N:52:result_uint64x1 [] = { 3333333333333333,  }
+VQRSHRUN_N:53:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHRUN_N:54:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:55:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:56:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRUN_N:57:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRUN_N:58:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:59:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:60:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRUN_N:61:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRUN_N:62:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHRUN_N (check saturation/overflow: shift by max, negative input) overflow output:
+VQRSHRUN_N:63:vqrshrun_n_s16 Neon overflow 1
+VQRSHRUN_N:64:vqrshrun_n_s32 Neon overflow 1
+VQRSHRUN_N:65:vqrshrun_n_s64 Neon overflow 1
+
+VQRSHRUN_N (check saturation/overflow: shift by max, negative input) output:
+VQRSHRUN_N:66:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:67:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:68:result_int32x2 [] = { 33333333, 33333333,  }
+VQRSHRUN_N:69:result_int64x1 [] = { 3333333333333333,  }
+VQRSHRUN_N:70:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VQRSHRUN_N:71:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VQRSHRUN_N:72:result_uint32x2 [] = { 0, 0,  }
+VQRSHRUN_N:73:result_uint64x1 [] = { 3333333333333333,  }
+VQRSHRUN_N:74:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHRUN_N:75:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:76:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:77:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRUN_N:78:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRUN_N:79:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:80:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:81:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRUN_N:82:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRUN_N:83:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VQRSHRUN_N overflow output:
+VQRSHRUN_N:84:vqrshrun_n_s16 Neon overflow 0
+VQRSHRUN_N:85:vqrshrun_n_s32 Neon overflow 1
+VQRSHRUN_N:86:vqrshrun_n_s64 Neon overflow 0
+
+VQRSHRUN_N output:
+VQRSHRUN_N:87:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:88:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:89:result_int32x2 [] = { 33333333, 33333333,  }
+VQRSHRUN_N:90:result_int64x1 [] = { 3333333333333333,  }
+VQRSHRUN_N:91:result_uint8x8 [] = { 49, 49, 49, 49, 49, 49, 49, 49,  }
+VQRSHRUN_N:92:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VQRSHRUN_N:93:result_uint32x2 [] = { deadbf, deadbf,  }
+VQRSHRUN_N:94:result_uint64x1 [] = { 3333333333333333,  }
+VQRSHRUN_N:95:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VQRSHRUN_N:96:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:97:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:98:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRUN_N:99:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRUN_N:100:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VQRSHRUN_N:101:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VQRSHRUN_N:102:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VQRSHRUN_N:103:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VQRSHRUN_N:104:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VST2_LANE/VST2Q_LANE chunk 0 output:
+VST2_LANE/VST2Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, 0, 0, 0, 0, 0, 0,  }
+VST2_LANE/VST2Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, 0, 0,  }
+VST2_LANE/VST2Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VST2_LANE/VST2Q_LANE:3:result_int64x1 [] = { 3333333333333333,  }
+VST2_LANE/VST2Q_LANE:4:result_uint8x8 [] = { f0, f1, 0, 0, 0, 0, 0, 0,  }
+VST2_LANE/VST2Q_LANE:5:result_uint16x4 [] = { fff0, fff1, 0, 0,  }
+VST2_LANE/VST2Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VST2_LANE/VST2Q_LANE:7:result_uint64x1 [] = { 3333333333333333,  }
+VST2_LANE/VST2Q_LANE:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VST2_LANE/VST2Q_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST2_LANE/VST2Q_LANE:10:result_int16x8 [] = { fffffff0, fffffff1, 0, 0, 0, 0, 0, 0,  }
+VST2_LANE/VST2Q_LANE:11:result_int32x4 [] = { fffffff0, fffffff1, 0, 0,  }
+VST2_LANE/VST2Q_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST2_LANE/VST2Q_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST2_LANE/VST2Q_LANE:14:result_uint16x8 [] = { fff0, fff1, 0, 0, 0, 0, 0, 0,  }
+VST2_LANE/VST2Q_LANE:15:result_uint32x4 [] = { fffffff0, fffffff1, 0, 0,  }
+VST2_LANE/VST2Q_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST2_LANE/VST2Q_LANE:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, 0 0x0p+0 0, 0 0x0p+0 0,  }
+
+VST2_LANE/VST2Q_LANE chunk 1 output:
+VST2_LANE/VST2Q_LANE:18:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST2_LANE/VST2Q_LANE:19:result_int16x4 [] = { 0, 0, 0, 0,  }
+VST2_LANE/VST2Q_LANE:20:result_int32x2 [] = { 0, 0,  }
+VST2_LANE/VST2Q_LANE:21:result_int64x1 [] = { 3333333333333333,  }
+VST2_LANE/VST2Q_LANE:22:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST2_LANE/VST2Q_LANE:23:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VST2_LANE/VST2Q_LANE:24:result_uint32x2 [] = { 0, 0,  }
+VST2_LANE/VST2Q_LANE:25:result_uint64x1 [] = { 3333333333333333,  }
+VST2_LANE/VST2Q_LANE:26:result_float32x2 [] = { 0 0x0p+0 0, 0 0x0p+0 0,  }
+VST2_LANE/VST2Q_LANE:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST2_LANE/VST2Q_LANE:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST2_LANE/VST2Q_LANE:29:result_int32x4 [] = { 0, 0, 0, 0,  }
+VST2_LANE/VST2Q_LANE:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST2_LANE/VST2Q_LANE:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST2_LANE/VST2Q_LANE:32:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST2_LANE/VST2Q_LANE:33:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VST2_LANE/VST2Q_LANE:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST2_LANE/VST2Q_LANE:35:result_float32x4 [] = { 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0,  }
+
+VST3_LANE/VST3Q_LANE chunk 0 output:
+VST3_LANE/VST3Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, 0, 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, 0,  }
+VST3_LANE/VST3Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VST3_LANE/VST3Q_LANE:3:result_int64x1 [] = { 3333333333333333,  }
+VST3_LANE/VST3Q_LANE:4:result_uint8x8 [] = { f0, f1, f2, 0, 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:5:result_uint16x4 [] = { fff0, fff1, fff2, 0,  }
+VST3_LANE/VST3Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VST3_LANE/VST3Q_LANE:7:result_uint64x1 [] = { 3333333333333333,  }
+VST3_LANE/VST3Q_LANE:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VST3_LANE/VST3Q_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST3_LANE/VST3Q_LANE:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, 0, 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, 0,  }
+VST3_LANE/VST3Q_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST3_LANE/VST3Q_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST3_LANE/VST3Q_LANE:14:result_uint16x8 [] = { fff0, fff1, fff2, 0, 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, 0,  }
+VST3_LANE/VST3Q_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST3_LANE/VST3Q_LANE:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, 0 0x0p+0 0,  }
+
+VST3_LANE/VST3Q_LANE chunk 1 output:
+VST3_LANE/VST3Q_LANE:18:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:19:result_int16x4 [] = { 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:20:result_int32x2 [] = { fffffff2, 0,  }
+VST3_LANE/VST3Q_LANE:21:result_int64x1 [] = { 3333333333333333,  }
+VST3_LANE/VST3Q_LANE:22:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:23:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:24:result_uint32x2 [] = { fffffff2, 0,  }
+VST3_LANE/VST3Q_LANE:25:result_uint64x1 [] = { 3333333333333333,  }
+VST3_LANE/VST3Q_LANE:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, 0 0x0p+0 0,  }
+VST3_LANE/VST3Q_LANE:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST3_LANE/VST3Q_LANE:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:29:result_int32x4 [] = { 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST3_LANE/VST3Q_LANE:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST3_LANE/VST3Q_LANE:32:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:33:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST3_LANE/VST3Q_LANE:35:result_float32x4 [] = { 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0,  }
+
+VST3_LANE/VST3Q_LANE chunk 2 output:
+VST3_LANE/VST3Q_LANE:36:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:37:result_int16x4 [] = { 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:38:result_int32x2 [] = { 0, 0,  }
+VST3_LANE/VST3Q_LANE:39:result_int64x1 [] = { 3333333333333333,  }
+VST3_LANE/VST3Q_LANE:40:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:41:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:42:result_uint32x2 [] = { 0, 0,  }
+VST3_LANE/VST3Q_LANE:43:result_uint64x1 [] = { 3333333333333333,  }
+VST3_LANE/VST3Q_LANE:44:result_float32x2 [] = { 0 0x0p+0 0, 0 0x0p+0 0,  }
+VST3_LANE/VST3Q_LANE:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST3_LANE/VST3Q_LANE:46:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:47:result_int32x4 [] = { 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:48:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST3_LANE/VST3Q_LANE:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST3_LANE/VST3Q_LANE:50:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:51:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VST3_LANE/VST3Q_LANE:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST3_LANE/VST3Q_LANE:53:result_float32x4 [] = { 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0,  }
+
+VST4_LANE/VST4Q_LANE chunk 0 output:
+VST4_LANE/VST4Q_LANE:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VST4_LANE/VST4Q_LANE:2:result_int32x2 [] = { fffffff0, fffffff1,  }
+VST4_LANE/VST4Q_LANE:3:result_int64x1 [] = { 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:4:result_uint8x8 [] = { f0, f1, f2, f3, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3,  }
+VST4_LANE/VST4Q_LANE:6:result_uint32x2 [] = { fffffff0, fffffff1,  }
+VST4_LANE/VST4Q_LANE:7:result_uint64x1 [] = { 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:8:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VST4_LANE/VST4Q_LANE:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST4_LANE/VST4Q_LANE:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VST4_LANE/VST4Q_LANE:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST4_LANE/VST4Q_LANE:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3,  }
+VST4_LANE/VST4Q_LANE:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:17:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+
+VST4_LANE/VST4Q_LANE chunk 1 output:
+VST4_LANE/VST4Q_LANE:18:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:19:result_int16x4 [] = { 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:20:result_int32x2 [] = { fffffff2, fffffff3,  }
+VST4_LANE/VST4Q_LANE:21:result_int64x1 [] = { 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:22:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:23:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:24:result_uint32x2 [] = { fffffff2, fffffff3,  }
+VST4_LANE/VST4Q_LANE:25:result_uint64x1 [] = { 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:26:result_float32x2 [] = { c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+VST4_LANE/VST4Q_LANE:27:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST4_LANE/VST4Q_LANE:28:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:29:result_int32x4 [] = { 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:30:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:31:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST4_LANE/VST4Q_LANE:32:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:33:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:35:result_float32x4 [] = { 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0,  }
+
+VST4_LANE/VST4Q_LANE chunk 2 output:
+VST4_LANE/VST4Q_LANE:36:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:37:result_int16x4 [] = { 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:38:result_int32x2 [] = { 0, 0,  }
+VST4_LANE/VST4Q_LANE:39:result_int64x1 [] = { 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:40:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:41:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:42:result_uint32x2 [] = { 0, 0,  }
+VST4_LANE/VST4Q_LANE:43:result_uint64x1 [] = { 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:44:result_float32x2 [] = { 0 0x0p+0 0, 0 0x0p+0 0,  }
+VST4_LANE/VST4Q_LANE:45:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST4_LANE/VST4Q_LANE:46:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:47:result_int32x4 [] = { 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:48:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:49:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST4_LANE/VST4Q_LANE:50:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:51:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:53:result_float32x4 [] = { 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0,  }
+
+VST4_LANE/VST4Q_LANE chunk 3 output:
+VST4_LANE/VST4Q_LANE:54:result_int8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:55:result_int16x4 [] = { 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:56:result_int32x2 [] = { 0, 0,  }
+VST4_LANE/VST4Q_LANE:57:result_int64x1 [] = { 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:58:result_uint8x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:59:result_uint16x4 [] = { 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:60:result_uint32x2 [] = { 0, 0,  }
+VST4_LANE/VST4Q_LANE:61:result_uint64x1 [] = { 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:62:result_float32x2 [] = { 0 0x0p+0 0, 0 0x0p+0 0,  }
+VST4_LANE/VST4Q_LANE:63:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST4_LANE/VST4Q_LANE:64:result_int16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:65:result_int32x4 [] = { 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:66:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:67:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VST4_LANE/VST4Q_LANE:68:result_uint16x8 [] = { 0, 0, 0, 0, 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:69:result_uint32x4 [] = { 0, 0, 0, 0,  }
+VST4_LANE/VST4Q_LANE:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VST4_LANE/VST4Q_LANE:71:result_float32x4 [] = { 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0, 0 0x0p+0 0,  }
+
+VTBL1 output:
+VTBL1:0:result_int8x8 [] = { 0, fffffff2, fffffff2, fffffff2, 0, 0, fffffff2, fffffff2,  }
+VTBL1:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBL1:2:result_int32x2 [] = { 33333333, 33333333,  }
+VTBL1:3:result_int64x1 [] = { 3333333333333333,  }
+VTBL1:4:result_uint8x8 [] = { 0, f3, f3, f3, 0, 0, f3, f3,  }
+VTBL1:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBL1:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VTBL1:7:result_uint64x1 [] = { 3333333333333333,  }
+VTBL1:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VTBL1:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBL1:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBL1:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBL1:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBL1:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBL1:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBL1:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBL1:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBL1:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VTBL2 output:
+VTBL2:0:result_int8x8 [] = { fffffff6, fffffff3, fffffff3, fffffff3, 0, 0, fffffff3, fffffff3,  }
+VTBL2:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBL2:2:result_int32x2 [] = { 33333333, 33333333,  }
+VTBL2:3:result_int64x1 [] = { 3333333333333333,  }
+VTBL2:4:result_uint8x8 [] = { f6, f5, f5, f5, 0, 0, f5, f5,  }
+VTBL2:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBL2:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VTBL2:7:result_uint64x1 [] = { 3333333333333333,  }
+VTBL2:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VTBL2:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBL2:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBL2:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBL2:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBL2:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBL2:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBL2:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBL2:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBL2:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VTBL3 output:
+VTBL3:0:result_int8x8 [] = { fffffff8, fffffff4, fffffff4, fffffff4, ffffffff, 0, fffffff4, fffffff4,  }
+VTBL3:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBL3:2:result_int32x2 [] = { 33333333, 33333333,  }
+VTBL3:3:result_int64x1 [] = { 3333333333333333,  }
+VTBL3:4:result_uint8x8 [] = { f8, f7, f7, f7, ff, 0, f7, f7,  }
+VTBL3:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBL3:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VTBL3:7:result_uint64x1 [] = { 3333333333333333,  }
+VTBL3:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VTBL3:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBL3:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBL3:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBL3:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBL3:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBL3:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBL3:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBL3:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBL3:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VTBL4 output:
+VTBL4:0:result_int8x8 [] = { fffffffa, fffffff5, fffffff5, fffffff5, 3, 0, fffffff5, fffffff5,  }
+VTBL4:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBL4:2:result_int32x2 [] = { 33333333, 33333333,  }
+VTBL4:3:result_int64x1 [] = { 3333333333333333,  }
+VTBL4:4:result_uint8x8 [] = { fa, f9, f9, f9, 3, 0, f9, f9,  }
+VTBL4:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBL4:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VTBL4:7:result_uint64x1 [] = { 3333333333333333,  }
+VTBL4:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VTBL4:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBL4:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBL4:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBL4:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBL4:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBL4:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBL4:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBL4:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBL4:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VTBX1 output:
+VTBX1:0:result_int8x8 [] = { 33, fffffff2, fffffff2, fffffff2, 33, 33, fffffff2, fffffff2,  }
+VTBX1:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBX1:2:result_int32x2 [] = { 33333333, 33333333,  }
+VTBX1:3:result_int64x1 [] = { 3333333333333333,  }
+VTBX1:4:result_uint8x8 [] = { cc, f3, f3, f3, cc, cc, f3, f3,  }
+VTBX1:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBX1:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VTBX1:7:result_uint64x1 [] = { 3333333333333333,  }
+VTBX1:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VTBX1:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBX1:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBX1:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBX1:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBX1:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBX1:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBX1:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBX1:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBX1:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VTBX2 output:
+VTBX2:0:result_int8x8 [] = { fffffff6, fffffff3, fffffff3, fffffff3, 33, 33, fffffff3, fffffff3,  }
+VTBX2:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBX2:2:result_int32x2 [] = { 33333333, 33333333,  }
+VTBX2:3:result_int64x1 [] = { 3333333333333333,  }
+VTBX2:4:result_uint8x8 [] = { f6, f5, f5, f5, cc, cc, f5, f5,  }
+VTBX2:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBX2:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VTBX2:7:result_uint64x1 [] = { 3333333333333333,  }
+VTBX2:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VTBX2:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBX2:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBX2:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBX2:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBX2:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBX2:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBX2:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBX2:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBX2:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VTBX3 output:
+VTBX3:0:result_int8x8 [] = { fffffff8, fffffff4, fffffff4, fffffff4, ffffffff, 33, fffffff4, fffffff4,  }
+VTBX3:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBX3:2:result_int32x2 [] = { 33333333, 33333333,  }
+VTBX3:3:result_int64x1 [] = { 3333333333333333,  }
+VTBX3:4:result_uint8x8 [] = { f8, f7, f7, f7, ff, cc, f7, f7,  }
+VTBX3:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBX3:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VTBX3:7:result_uint64x1 [] = { 3333333333333333,  }
+VTBX3:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VTBX3:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBX3:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBX3:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBX3:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBX3:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBX3:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBX3:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBX3:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBX3:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VTBX4 output:
+VTBX4:0:result_int8x8 [] = { fffffffa, fffffff5, fffffff5, fffffff5, 3, 33, fffffff5, fffffff5,  }
+VTBX4:1:result_int16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBX4:2:result_int32x2 [] = { 33333333, 33333333,  }
+VTBX4:3:result_int64x1 [] = { 3333333333333333,  }
+VTBX4:4:result_uint8x8 [] = { fa, f9, f9, f9, 3, cc, f9, f9,  }
+VTBX4:5:result_uint16x4 [] = { 3333, 3333, 3333, 3333,  }
+VTBX4:6:result_uint32x2 [] = { 33333333, 33333333,  }
+VTBX4:7:result_uint64x1 [] = { 3333333333333333,  }
+VTBX4:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+VTBX4:9:result_int8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBX4:10:result_int16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBX4:11:result_int32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBX4:12:result_int64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBX4:13:result_uint8x16 [] = { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,  }
+VTBX4:14:result_uint16x8 [] = { 3333, 3333, 3333, 3333, 3333, 3333, 3333, 3333,  }
+VTBX4:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333,  }
+VTBX4:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333,  }
+VTBX4:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08,  }
+
+VRECPE/VRECPEQ  (positive input) output:
+VRECPE/VRECPEQ:0:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VRECPE/VRECPEQ:1:result_uint32x4 [] = { bf000000, bf000000, bf000000, bf000000,  }
+VRECPE/VRECPEQ:2:result_float32x2 [] = { 3f068000 0x1.0dp-1 0.525391, 3f068000 0x1.0dp-1 0.525391,  }
+VRECPE/VRECPEQ:3:result_float32x4 [] = { 3c030000 0x1.06p-7 0.00799561, 3c030000 0x1.06p-7 0.00799561, 3c030000 0x1.06p-7 0.00799561, 3c030000 0x1.06p-7 0.00799561,  }
+
+VRECPE/VRECPEQ  (negative input) output:
+VRECPE/VRECPEQ:4:result_uint32x2 [] = { 80000000, 80000000,  }
+VRECPE/VRECPEQ:5:result_uint32x4 [] = { ee800000, ee800000, ee800000, ee800000,  }
+VRECPE/VRECPEQ:6:result_float32x2 [] = { bdcc8000 -0x1.99p-4 -0.0998535, bdcc8000 -0x1.99p-4 -0.0998535,  }
+VRECPE/VRECPEQ:7:result_float32x4 [] = { bc030000 -0x1.06p-7 -0.00799561, bc030000 -0x1.06p-7 -0.00799561, bc030000 -0x1.06p-7 -0.00799561, bc030000 -0x1.06p-7 -0.00799561,  }
+
+VRSQRTE/VRSQRTEQ output:
+VRSQRTE/VRSQRTEQ:0:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VRSQRTE/VRSQRTEQ:1:result_uint32x4 [] = { 9c800000, 9c800000, 9c800000, 9c800000,  }
+VRSQRTE/VRSQRTEQ:2:result_float32x2 [] = { 3e8e8000 0x1.1dp-2 0.27832, 3e8e8000 0x1.1dp-2 0.27832,  }
+VRSQRTE/VRSQRTEQ:3:result_float32x4 [] = { 3e700000 0x1.ep-3 0.234375, 3e700000 0x1.ep-3 0.234375, 3e700000 0x1.ep-3 0.234375, 3e700000 0x1.ep-3 0.234375,  }
+
+VRSQRTE/VRSQRTEQ (2) output:
+VRSQRTE/VRSQRTEQ:4:result_uint32x2 [] = { 80000000, 80000000,  }
+VRSQRTE/VRSQRTEQ:5:result_uint32x4 [] = { ae800000, ae800000, ae800000, ae800000,  }
+
+VRSQRTE/VRSQRTEQ (3) output:
+VRSQRTE/VRSQRTEQ:6:result_uint32x2 [] = { b4800000, b4800000,  }
+VRSQRTE/VRSQRTEQ:7:result_uint32x4 [] = { ed000000, ed000000, ed000000, ed000000,  }
+
+
+Integer (non-NEON) intrinsics
+__clz(0xffffffff) = 0
+__clz(0x7fffffff) = 1
+__clz(0x3fffffff) = 2
+__clz(0x1fffffff) = 3
+__clz(0xfffffff) = 4
+__clz(0x7ffffff) = 5
+__clz(0x3ffffff) = 6
+__clz(0x1ffffff) = 7
+__clz(0xffffff) = 8
+__clz(0x7fffff) = 9
+__clz(0x3fffff) = 10
+__clz(0x1fffff) = 11
+__clz(0xfffff) = 12
+__clz(0x7ffff) = 13
+__clz(0x3ffff) = 14
+__clz(0x1ffff) = 15
+__clz(0xffff) = 16
+__clz(0x7fff) = 17
+__clz(0x3fff) = 18
+__clz(0x1fff) = 19
+__clz(0xfff) = 20
+__clz(0x7ff) = 21
+__clz(0x3ff) = 22
+__clz(0x1ff) = 23
+__clz(0xff) = 24
+__clz(0x7f) = 25
+__clz(0x3f) = 26
+__clz(0x1f) = 27
+__clz(0xf) = 28
+__clz(0x7) = 29
+__clz(0x3) = 30
+__clz(0x1) = 31
+__clz(0) = 32
+__qadd(0x1, 0x2) = 0x3 sat 0
+__qadd(0xffffffff, 0xfffffffe) = 0xfffffffd sat 0
+__qadd(0xffffffff, 0x2) = 0x1 sat 0
+__qadd(0x7000, 0x7000) = 0xe000 sat 0
+__qadd(0x8fff, 0x8fff) = 0x11ffe sat 0
+__qadd(0x70000000, 0x70000000) = 0x7fffffff sat 1
+__qadd(0x8fffffff, 0x8fffffff) = 0x80000000 sat 1
+__qdbl(0x1) = 0x2 sat 0
+__qdbl(0x70000000) = 0x7fffffff sat 1
+__qdbl(0x8fffffff) = 0x80000000 sat 1
+__qdbl(0xefffffff) = 0xdffffffe sat 0
+__qsub(0x1, 0x2) = 0xffffffff sat 0
+__qsub(0xffffffff, 0xfffffffe) = 0x1 sat 0
+__qsub(0xffffffff, 0x2) = 0xfffffffd sat 0
+__qsub(0x7000, 0xffff9000) = 0xe000 sat 0
+__qsub(0x8fff, 0xffff7001) = 0x11ffe sat 0
+__qsub(0x70000000, 0x90000000) = 0x7fffffff sat 1
+__qsub(0x8fffffff, 0x70000001) = 0x80000000 sat 1
+__qsub(0, 0x80000000) = 0x7fffffff sat 1
+__rbit(0x12345678) = 0x1e6a2c48
+__rev(0x12345678) = 0x78563412
+__ssat(0x12345678, 30) = 0x12345678 sat 0
+__ssat(0x12345678, 19) = 0x3ffff sat 1
+__ssat(0x87654321, 29) = 0xf0000000 sat 1
+__ssat(0x87654321, 12) = 0xfffff800 sat 1
+__ssat(0x87654321, 32) = 0x87654321 sat 0
+__ssat(0x87654321, 1) = 0xffffffff sat 1
+__usat(0x12345678, 30) = 0x12345678 sat 0
+__usat(0x12345678, 19) = 0x7ffff sat 1
+__usat(0x87654321, 29) = 0 sat 1
+__usat(0x87654321, 12) = 0 sat 1
+__usat(0x87654321, 31) = 0 sat 1
+__usat(0x87654321, 0) = 0 sat 1
+
+VCAGE/VCAGEQ output:
+VCAGE/VCAGEQ:0:result_uint32x2 [] = { ffffffff, 0,  }
+VCAGE/VCAGEQ:1:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, 0,  }
+
+VCAGE/VCAGEQ output:
+VCAGE/VCAGEQ:2:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VCAGE/VCAGEQ:3:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+
+VCALE/VCALEQ output:
+VCALE/VCALEQ:0:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VCALE/VCALEQ:1:result_uint32x4 [] = { 0, 0, ffffffff, ffffffff,  }
+
+VCALE/VCALEQ output:
+VCALE/VCALEQ:2:result_uint32x2 [] = { 0, 0,  }
+VCALE/VCALEQ:3:result_uint32x4 [] = { 0, 0, 0, 0,  }
+
+VCAGT/VCAGTQ output:
+VCAGT/VCAGTQ:0:result_uint32x2 [] = { 0, 0,  }
+VCAGT/VCAGTQ:1:result_uint32x4 [] = { ffffffff, ffffffff, 0, 0,  }
+
+VCAGT/VCAGTQ output:
+VCAGT/VCAGTQ:2:result_uint32x2 [] = { ffffffff, ffffffff,  }
+VCAGT/VCAGTQ:3:result_uint32x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff,  }
+
+VCALT/VCALTQ output:
+VCALT/VCALTQ:0:result_uint32x2 [] = { 0, ffffffff,  }
+VCALT/VCALTQ:1:result_uint32x4 [] = { 0, 0, 0, ffffffff,  }
+
+VCALT/VCALTQ output:
+VCALT/VCALTQ:2:result_uint32x2 [] = { 0, 0,  }
+VCALT/VCALTQ:3:result_uint32x4 [] = { 0, 0, 0, 0,  }
+
+VCVT/VCVTQ output:
+VCVT/VCVTQ:0:result_float32x2 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15,  }
+VCVT/VCVTQ:1:result_float32x2 [] = { 4f800000 0x1p+32 4.29497e+09, 4f800000 0x1p+32 4.29497e+09,  }
+VCVT/VCVTQ:2:result_float32x4 [] = { c1800000 -0x1p+4 -16, c1700000 -0x1.ep+3 -15, c1600000 -0x1.cp+3 -14, c1500000 -0x1.ap+3 -13,  }
+VCVT/VCVTQ:3:result_float32x4 [] = { 4f800000 0x1p+32 4.29497e+09, 4f800000 0x1p+32 4.29497e+09, 4f800000 0x1p+32 4.29497e+09, 4f800000 0x1p+32 4.29497e+09,  }
+VCVT/VCVTQ:4:result_int32x2 [] = { fffffff1, 5,  }
+VCVT/VCVTQ:5:result_uint32x2 [] = { 0, 5,  }
+VCVT/VCVTQ:6:result_int32x4 [] = { fffffff0, fffffff1, fffffff1, 5,  }
+VCVT/VCVTQ:7:result_uint32x4 [] = { 0, 0, 0, 5,  }
+
+VCVT_N/VCVTQ_N output:
+VCVT_N/VCVTQ_N:8:result_float32x2 [] = { c0800000 -0x1p+2 -4, c0700000 -0x1.ep+1 -3.75,  }
+VCVT_N/VCVTQ_N:9:result_float32x2 [] = { 4c000000 0x1p+25 3.35544e+07, 4c000000 0x1p+25 3.35544e+07,  }
+VCVT_N/VCVTQ_N:10:result_float32x4 [] = { b2800000 -0x1p-26 -1.49012e-08, b2700000 -0x1.ep-27 -1.39698e-08, b2600000 -0x1.cp-27 -1.30385e-08, b2500000 -0x1.ap-27 -1.21072e-08,  }
+VCVT_N/VCVTQ_N:11:result_float32x4 [] = { 49800000 0x1p+20 1.04858e+06, 49800000 0x1p+20 1.04858e+06, 49800000 0x1p+20 1.04858e+06, 49800000 0x1p+20 1.04858e+06,  }
+VCVT_N/VCVTQ_N:12:result_int32x2 [] = { ff0b3333, 54cccd,  }
+VCVT_N/VCVTQ_N:13:result_uint32x2 [] = { 0, 15,  }
+VCVT_N/VCVTQ_N:14:result_int32x4 [] = { fffe0000, fffe2000, fffe1667, a999,  }
+VCVT_N/VCVTQ_N:15:result_uint32x4 [] = { 0, 0, 0, a,  }
+
+VCVT/VCVTQ (check rounding) output:
+VCVT/VCVTQ:16:result_int32x2 [] = { a, a,  }
+VCVT/VCVTQ:17:result_uint32x2 [] = { a, a,  }
+VCVT/VCVTQ:18:result_int32x4 [] = { 7d, 7d, 7d, 7d,  }
+VCVT/VCVTQ:19:result_uint32x4 [] = { 7d, 7d, 7d, 7d,  }
+
+VCVT_N/VCVTQ_N (check rounding) output:
+VCVT_N/VCVTQ_N:20:result_int32x2 [] = { a66666, a66666,  }
+VCVT_N/VCVTQ_N:21:result_uint32x2 [] = { a66666, a66666,  }
+VCVT_N/VCVTQ_N:22:result_int32x4 [] = { fbccc, fbccc, fbccc, fbccc,  }
+VCVT_N/VCVTQ_N:23:result_uint32x4 [] = { fbccc, fbccc, fbccc, fbccc,  }
+
+VCVT_N/VCVTQ_N (check saturation) output:
+VCVT_N/VCVTQ_N:24:result_int32x2 [] = { 7fffffff, 7fffffff,  }
+VCVT_N/VCVTQ_N:25:result_int32x4 [] = { 7fffffff, 7fffffff, 7fffffff, 7fffffff,  }
+
+VRECPS/VRECPSQ output:
+VRECPS/VRECPSQ:0:result_float32x2 [] = { c2e19eb7 -0x1.c33d6ep+6 -112.81, c2e19eb7 -0x1.c33d6ep+6 -112.81,  }
+VRECPS/VRECPSQ:1:result_float32x4 [] = { c1db851f -0x1.b70a3ep+4 -27.44, c1db851f -0x1.b70a3ep+4 -27.44, c1db851f -0x1.b70a3ep+4 -27.44, c1db851f -0x1.b70a3ep+4 -27.44,  }
+
+VRSQRTS/VRSQRTSQ output:
+VRSQRTS/VRSQRTSQ:0:result_float32x2 [] = { c2796b84 -0x1.f2d708p+5 -62.355, c2796b84 -0x1.f2d708p+5 -62.355,  }
+VRSQRTS/VRSQRTSQ:1:result_float32x4 [] = { c0e4a3d8 -0x1.c947bp+2 -7.145, c0e4a3d8 -0x1.c947bp+2 -7.145, c0e4a3d8 -0x1.c947bp+2 -7.145, c0e4a3d8 -0x1.c947bp+2 -7.145,  }
+
+
+DSP (non-NEON) intrinsics
+qadd(0x1, 0x2) = 0x3 sat 0
+qadd(0xffffffff, 0xfffffffe) = 0xfffffffd sat 0
+qadd(0xffffffff, 0x2) = 0x1 sat 0
+qadd(0x7000, 0x7000) = 0xe000 sat 0
+qadd(0x8fff, 0x8fff) = 0x11ffe sat 0
+qadd(0x70000000, 0x70000000) = 0x7fffffff sat 1
+qadd(0x8fffffff, 0x8fffffff) = 0x80000000 sat 1
+qsub(0x1, 0x2) = 0xffffffff sat 0
+qsub(0xffffffff, 0xfffffffe) = 0x1 sat 0
+qsub(0xffffffff, 0x2) = 0xfffffffd sat 0
+qsub(0x7000, 0xffff9000) = 0xe000 sat 0
+qsub(0x8fff, 0xffff7001) = 0x11ffe sat 0
+qsub(0x70000000, 0x90000000) = 0x7fffffff sat 1
+qsub(0x8fffffff, 0x70000001) = 0x80000000 sat 1
+qsub(0, 0x80000000) = 0x7fffffff sat 1
+qdadd(0x1, 0x2) = 0x5 sat 0
+qdadd(0xffffffff, 0xfffffffe) = 0xfffffffb sat 0
+qdadd(0xffffffff, 0x2) = 0x3 sat 0
+qdadd(0x7000, 0x7000) = 0x15000 sat 0
+qdadd(0x8fff, 0x8fff) = 0x1affd sat 0
+qdadd(0x70000000, 0x70000000) = 0x7fffffff sat 1
+qdadd(0, 0x70000000) = 0x7fffffff sat 1
+qdadd(0x8fffffff, 0x8fffffff) = 0x80000000 sat 1
+qdadd(0, 0x8fffffff) = 0x80000000 sat 1
+qdsub(0x1, 0x2) = 0xfffffffd sat 0
+qdsub(0xffffffff, 0xfffffffe) = 0x3 sat 0
+qdsub(0xffffffff, 0x2) = 0xfffffffb sat 0
+qdsub(0x7000, 0xffff9000) = 0x15000 sat 0
+qdsub(0x8fff, 0xffff7001) = 0x1affd sat 0
+qdsub(0x70000000, 0x90000000) = 0x7fffffff sat 1
+qdsub(0, 0x90000000) = 0x7fffffff sat 1
+qdsub(0x8fffffff, 0x70000001) = 0x80000000 sat 1
+qdsub(0, 0x70000001) = 0x80000001 sat 1
+smulbb(0x12345678, 0x12345678) = 0x1d34d840
+smulbt(0x12345678, 0x12345678) = 0x6260060
+smultb(0x12345678, 0x12345678) = 0x6260060
+smultt(0x12345678, 0x12345678) = 0x14b5a90
+smulbb(0xf123f456, 0xf123f456) = 0x880ce4
+smulbt(0xf123f456, 0xf123f456) = 0xad5dc2
+smultb(0xf123f456, 0xf123f456) = 0xad5dc2
+smultt(0xf123f456, 0xf123f456) = 0xdceac9
+smlabb(0x12345678, 0x12345678, 0x1020304) = 0x1e36db44
+smlabt(0x12345678, 0x12345678, 0x1020304) = 0x7280364
+smlatb(0x12345678, 0x12345678, 0x1020304) = 0x7280364
+smlatt(0x12345678, 0x12345678, 0x1020304) = 0x24d5d94
+smlabb(0xf123f456, 0xf123f456, 0x1020304) = 0x18a0fe8
+smlabt(0xf123f456, 0xf123f456, 0x1020304) = 0x1af60c6
+smlatb(0xf123f456, 0xf123f456, 0x1020304) = 0x1af60c6
+smlatt(0xf123f456, 0xf123f456, 0x1020304) = 0x1deedcd
+smlalbb(&0x9abcdef0, &0x12345678, 0x12345678, 0x12345678) = 0x123456780xb7f1b730
+smlalbt(&0x9abcdef0, &0x12345678, 0x12345678, 0x12345678) = 0x123456780xa0e2df50
+smlaltb(&0x9abcdef0, &0x12345678, 0x12345678, 0x12345678) = 0x123456780xa0e2df50
+smlaltt(&0x9abcdef0, &0x12345678, 0x12345678, 0x12345678) = 0x123456780x9c083980
+smlalbb(&0x9abcdef0, &0x12345678, 0xf123f456, 0xf123f456) = 0x123456780x9b44ebd4
+smlalbt(&0x9abcdef0, &0x12345678, 0xf123f456, 0xf123f456) = 0x123456780x9b6a3cb2
+smlaltb(&0x9abcdef0, &0x12345678, 0xf123f456, 0xf123f456) = 0x123456780x9b6a3cb2
+smlaltt(&0x9abcdef0, &0x12345678, 0xf123f456, 0xf123f456) = 0x123456780x9b99c9b9
+smlalbb(&0xffffffff, &0x12345678, 0x7fff7fff, 0x7fff7fff) = 0x123456790x3fff0000
+smlalbt(&0xffffffff, &0x12345678, 0x7fff7fff, 0x7fff7fff) = 0x123456790x3fff0000
+smlaltb(&0xffffffff, &0x12345678, 0x7fff7fff, 0x7fff7fff) = 0x123456790x3fff0000
+smlaltt(&0xffffffff, &0x12345678, 0x7fff7fff, 0x7fff7fff) = 0x123456790x3fff0000
+smulwb(0x12345678, 0x12345678) = 0x6261d94
+smulwt(0x12345678, 0x12345678) = 0x14b60b6
+smulwb(0xf123f456, 0xf123f456) = 0xad52a0
+smulwt(0xf123f456, 0xf123f456) = 0xdcdc99
+smlawb(0x12345678, 0x12345678, 0x1020304) = 0x7282098
+smlawt(0x12345678, 0x12345678, 0x1020304) = 0x24d63ba
+smlawb(0xf123f456, 0xf123f456, 0x1020304) = 0x1af55a4
+smlawt(0xf123f456, 0xf123f456, 0X1020304) = 0x1dedf9d
+
+
+DSP FNS (non-NEON/ITU) intrinsics with input Overflow=0 and input Carry=0
+Checking saturate with input Overflow=0 and input Carry=0
+saturate(0x1) = 0x1 overflow 0 carry 0
+saturate(0xffffffff) = 0xffffffff overflow 0 carry 0
+saturate(0x8000) = 0x7fff overflow 1 carry 0
+saturate(0xffff8000) = 0xffff8000 overflow 0 carry 0
+saturate(0xffff7fff) = 0xffff8000 overflow 1 carry 0
+add(0x1, 0x1) = 0x2 overflow 0 carry 0
+add(0xffffffff, 0xffffffff) = 0xfffffffe overflow 0 carry 0
+add(0x4e20, 0x4e20) = 0x7fff overflow 1 carry 0
+add(0xffffb1e0, 0xffffb1e0) = 0xffff8000 overflow 1 carry 0
+sub(0x1, 0x1) = 0 overflow 0 carry 0
+sub(0xffffffff, 0xffffffff) = 0 overflow 0 carry 0
+sub(0x4e20, 0x4e20) = 0 overflow 0 carry 0
+sub(0xffffb1e0, 0xffffb1e0) = 0 overflow 0 carry 0
+sub(0, 0xffff8000) = 0x7fff overflow 1 carry 0
+abs_s(0x1) = 0x1 overflow 0 carry 0
+abs_s(0xffffffff) = 0x1 overflow 0 carry 0
+abs_s(0xffff8000) = 0x7fff overflow 0 carry 0
+shl(0x1, 1) = 0x2 overflow 0 carry 0
+shl(0xa, 1) = 0x14 overflow 0 carry 0
+shl(0xfff, 10) = 0x7fff overflow 1 carry 0
+shl(0xfff, 20) = 0x7fff overflow 1 carry 0
+shl(0x1, -1) = 0 overflow 0 carry 0
+shl(0x14, -1) = 0xa overflow 0 carry 0
+shl(0xfff, -10) = 0x3 overflow 0 carry 0
+shl(0xfff, -64) = 0 overflow 0 carry 0
+shr(0x1, -1) = 0x2 overflow 0 carry 0
+shr(0xa, -1) = 0x14 overflow 0 carry 0
+shr(0xfff, -10) = 0x7fff overflow 1 carry 0
+shr(0xfff, -20) = 0x7fff overflow 1 carry 0
+shr(0x1, 1) = 0 overflow 0 carry 0
+shr(0x14, 1) = 0xa overflow 0 carry 0
+shr(0xfff, 10) = 0x3 overflow 0 carry 0
+shr(0xfff, 64) = 0 overflow 0 carry 0
+mult(0x2, 0x2) = 0 overflow 0 carry 0
+mult(0xffffffff, 0xffffffff) = 0 overflow 0 carry 0
+mult(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0
+L_mult(0x2, 0x2) = 0x8 overflow 0 carry 0
+L_mult(0xffffffff, 0xffffffff) = 0x2 overflow 0 carry 0
+L_mult(0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 0
+negate(0x1) = 0xffffffff overflow 0 carry 0
+negate(0xffffffff) = 0x1 overflow 0 carry 0
+negate(0xffff8000) = 0x7fff overflow 0 carry 0
+extract_h(0x1) = 0 overflow 0 carry 0
+extract_h(0xffffffff) = 0xffffffff overflow 0 carry 0
+extract_h(0xffff8000) = 0xffffffff overflow 0 carry 0
+extract_h(0x12345678) = 0x1234 overflow 0 carry 0
+extract_l(0x1) = 0x1 overflow 0 carry 0
+extract_l(0xffffffff) = 0xffffffff overflow 0 carry 0
+extract_l(0xffff8000) = 0xffff8000 overflow 0 carry 0
+extract_l(0x43218765) = 0xffff8765 overflow 0 carry 0
+round(0x1) = 0 overflow 0 carry 0
+round(0xffffffff) = 0 overflow 0 carry 0
+round(0xffff8000) = 0 overflow 0 carry 0
+round(0x43218765) = 0x4322 overflow 0 carry 0
+round(0x87654321) = 0xffff8765 overflow 0 carry 0
+L_mac(0x1234, 0x2, 0x2) = 0x123c overflow 0 carry 0
+L_mac(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 0 carry 0
+L_mac(0x1234, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 0
+L_mac(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 0
+L_msu(0x1234, 0x2, 0x2) = 0x122c overflow 0 carry 0
+L_msu(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 0 carry 0
+L_msu(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 0
+L_msu(0x1, 0xffff8000, 0xffff8000) = 0x80000002 overflow 1 carry 0
+L_add(0x1, 0x2) = 0x3 overflow 0 carry 0
+L_add(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 0 carry 0
+L_add(0xffffffff, 0x2) = 0x1 overflow 0 carry 0
+L_add(0x7000, 0x7000) = 0xe000 overflow 0 carry 0
+L_add(0x8fff, 0x8fff) = 0x11ffe overflow 0 carry 0
+L_add(0x70000000, 0x70000000) = 0x7fffffff overflow 1 carry 0
+L_add(0x8fffffff, 0x8fffffff) = 0x80000000 overflow 1 carry 0
+L_sub(0x1, 0x2) = 0xffffffff overflow 0 carry 0
+L_sub(0xffffffff, 0xfffffffe) = 0x1 overflow 0 carry 0
+L_sub(0xffffffff, 0x2) = 0xfffffffd overflow 0 carry 0
+L_sub(0x7000, 0xffff9000) = 0xe000 overflow 0 carry 0
+L_sub(0x8fff, 0xffff7001) = 0x11ffe overflow 0 carry 0
+L_sub(0x70000000, 0x90000000) = 0x7fffffff overflow 1 carry 0
+L_sub(0x8fffffff, 0x70000001) = 0x80000000 overflow 1 carry 0
+L_sub(0, 0x80000000) = 0x7fffffff overflow 1 carry 0
+Checking L_add_c with input Overflow=0 and input Carry=0
+L_add_c(0x1, 0x2) = 0x3 overflow 0 carry 0
+L_add_c(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 0 carry 1
+L_add_c(0xffffffff, 0x2) = 0x1 overflow 0 carry 1
+L_add_c(0x7000, 0x7000) = 0xe000 overflow 0 carry 0
+L_add_c(0x8fff, 0x8fff) = 0x11ffe overflow 0 carry 0
+L_add_c(0x70000000, 0x70000000) = 0xe0000000 overflow 1 carry 0
+L_add_c(0x8fffffff, 0x8fffffff) = 0x1ffffffe overflow 1 carry 1
+L_add_c(0x8fffffff, 0xffffffff) = 0x8ffffffe overflow 0 carry 1
+L_sub_c(0x1, 0x2) = 0xfffffffe overflow 0 carry 0
+L_sub_c(0xffffffff, 0xfffffffe) = 0 overflow 0 carry 1
+L_sub_c(0xffffffff, 0x2) = 0xfffffffc overflow 0 carry 1
+L_sub_c(0x7000, 0x7000) = 0xffffffff overflow 0 carry 0
+L_sub_c(0x8fff, 0x8fff) = 0xffffffff overflow 0 carry 0
+L_sub_c(0x70000000, 0x70000000) = 0xffffffff overflow 0 carry 0
+L_sub_c(0x8fffffff, 0x8fffffff) = 0xffffffff overflow 0 carry 0
+L_sub_c(0x1, 0x80000000) = 0x80000000 overflow 1 carry 0
+L_sub_c(0xffffffff, 0x7fffffff) = 0x7fffffff overflow 1 carry 1
+Checking L_macNs with input Overflow=0 and input Carry=0
+L_macNs(0x1234, 0x2, 0x2) = 0x123c overflow 0 carry 0
+L_macNs(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 0 carry 0
+L_macNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001233 overflow 1 carry 0
+L_macNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 1
+Checking L_msuNs with input Overflow=0 and input Carry=0
+L_msuNs(0x1234, 0x2, 0x2) = 0x122b overflow 0 carry 1
+L_msuNs(0x1234, 0xffffffff, 0xffffffff) = 0x1231 overflow 0 carry 1
+L_msuNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001234 overflow 1 carry 0
+L_msuNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1
+negate(0x1) = 0xffffffff overflow 0 carry 0
+negate(0xffffffff) = 0x1 overflow 0 carry 0
+negate(0xffff8000) = 0x7fff overflow 0 carry 0
+mult_r(0x2, 0x2) = 0 overflow 0 carry 0
+mult_r(0xffffffff, 0xffffffff) = 0 overflow 0 carry 0
+mult_r(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0
+norm_s(0x1) = 0xe overflow 0 carry 0
+norm_s(0xffffffff) = 0xf overflow 0 carry 0
+norm_s(0xffff8000) = 0 overflow 0 carry 0
+norm_s(0x2ee0) = 0x1 overflow 0 carry 0
+norm_l(0x1) = 0x1e overflow 0 carry 0
+norm_l(0xffffffff) = 0x1f overflow 0 carry 0
+norm_l(0xffff8000) = 0x10 overflow 0 carry 0
+norm_l(0x2ee0) = 0x11 overflow 0 carry 0
+norm_l(0x123456) = 0xa overflow 0 carry 0
+norm_l(0xabcdef) = 0x7 overflow 0 carry 0
+L_shl(0x1, 1) = 0x2 overflow 0 carry 0
+L_shl(0xa, 1) = 0x14 overflow 0 carry 0
+L_shl(0xfff, 10) = 0x3ffc00 overflow 0 carry 0
+L_shl(0xfff, 20) = 0x7fffffff overflow 1 carry 0
+L_shl(0x12345678, 2) = 0x48d159e0 overflow 0 carry 0
+L_shl(0x12345678, 40) = 0x7fffffff overflow 1 carry 0
+L_shl(0x1, -1) = 0 overflow 0 carry 0
+L_shl(0x14, -1) = 0xa overflow 0 carry 0
+L_shl(0xfff, -10) = 0x3 overflow 0 carry 0
+L_shl(0xfff, -64) = 0 overflow 0 carry 0
+L_shl(0x12345678, -10) = 0x48d15 overflow 0 carry 0
+L_shl(0x12345678, -64) = 0 overflow 0 carry 0
+L_shr(0x1, -1) = 0x2 overflow 0 carry 0
+L_shr(0xa, -1) = 0x14 overflow 0 carry 0
+L_shr(0xfff, -10) = 0x3ffc00 overflow 0 carry 0
+L_shr(0xfff, -20) = 0x7fffffff overflow 1 carry 0
+L_shr(0x12345678, -10) = 0x7fffffff overflow 1 carry 0
+L_shr(0x12345678, -40) = 0x7fffffff overflow 1 carry 0
+L_shr(0x1, 1) = 0 overflow 0 carry 0
+L_shr(0x14, 1) = 0xa overflow 0 carry 0
+L_shr(0xfff, 10) = 0x3 overflow 0 carry 0
+L_shr(0xfff, 64) = 0 overflow 0 carry 0
+L_shr(0x12345678, 10) = 0x48d15 overflow 0 carry 0
+L_shr(0x12345678, 64) = 0 overflow 0 carry 0
+shr_r(0x1, -1) = 0x2 overflow 0 carry 0
+shr_r(0xa, -1) = 0x14 overflow 0 carry 0
+shr_r(0xfff, -10) = 0x7fff overflow 1 carry 0
+shr_r(0xfff, -20) = 0x7fff overflow 1 carry 0
+shr_r(0x1, 1) = 0x1 overflow 0 carry 0
+shr_r(0x14, 1) = 0xa overflow 0 carry 0
+shr_r(0xfff, 10) = 0x4 overflow 0 carry 0
+shr_r(0xfff, 64) = 0 overflow 0 carry 0
+mac_r(0x1234, 0x2, 0x2) = 0 overflow 0 carry 0
+mac_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 0 carry 0
+mac_r(0x1234, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0
+mac_r(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0
+mac_r(0x123456, 0x244, 0x522) = 0x29 overflow 0 carry 0
+msu_r(0x1234, 0x2, 0x2) = 0 overflow 0 carry 0
+msu_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 0 carry 0
+msu_r(0x1234, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 0
+msu_r(0x1, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 0
+msu_r(0x123456, 0x321, 0x243) = 0x4 overflow 0 carry 0
+L_deposit_h(0x1) = 0x10000 overflow 0 carry 0
+L_deposit_h(0xffffffff) = 0xffff0000 overflow 0 carry 0
+L_deposit_h(0xffff8000) = 0x80000000 overflow 0 carry 0
+L_deposit_h(0x1234) = 0x12340000 overflow 0 carry 0
+L_deposit_l(0x1) = 0x1 overflow 0 carry 0
+L_deposit_l(0xffffffff) = 0xffffffff overflow 0 carry 0
+L_deposit_l(0xffff8000) = 0xffff8000 overflow 0 carry 0
+L_deposit_l(0x1234) = 0x1234 overflow 0 carry 0
+L_shr_r(0x1, -1) = 0x2 overflow 0 carry 0
+L_shr_r(0xa, -1) = 0x14 overflow 0 carry 0
+L_shr_r(0xfff, -10) = 0x3ffc00 overflow 0 carry 0
+L_shr_r(0xfff, -20) = 0x7fffffff overflow 1 carry 0
+L_shr_r(0x12345678, -10) = 0x7fffffff overflow 1 carry 0
+L_shr_r(0x12345678, -40) = 0x7fffffff overflow 1 carry 0
+L_shr_r(0x1, 1) = 0x1 overflow 0 carry 0
+L_shr_r(0x14, 1) = 0xa overflow 0 carry 0
+L_shr_r(0xfff, 10) = 0x4 overflow 0 carry 0
+L_shr_r(0xfff, 64) = 0 overflow 0 carry 0
+L_shr_r(0x12345678, 10) = 0x48d16 overflow 0 carry 0
+L_shr_r(0x12345678, 64) = 0 overflow 0 carry 0
+L_abs(0x1) = 0x1 overflow 0 carry 0
+L_abs(0xffffffff) = 0x1 overflow 0 carry 0
+L_abs(0x80000000) = 0x7fffffff overflow 1 carry 0
+L_sat(0x1) = 0x1 overflow 0 carry 0
+L_sat(0xffffffff) = 0xffffffff overflow 0 carry 0
+L_sat(0xffff8000) = 0xffff8000 overflow 0 carry 0
+L_sat(0x8000) = 0x8000 overflow 0 carry 0
+div_s(0x1, 0x1) = 0x7fff overflow 0 carry 0
+div_s(0x2710, 0x4e20) = 0x4000 overflow 0 carry 0
+div_s(0x2710, 0x4e20) = 0x4000 overflow 0 carry 0
+
+
+DSP FNS (non-NEON/ITU) intrinsics with input Overflow=0 and input Carry=1
+Checking saturate with input Overflow=0 and input Carry=1
+saturate(0x1) = 0x1 overflow 0 carry 1
+saturate(0xffffffff) = 0xffffffff overflow 0 carry 1
+saturate(0x8000) = 0x7fff overflow 1 carry 1
+saturate(0xffff8000) = 0xffff8000 overflow 0 carry 1
+saturate(0xffff7fff) = 0xffff8000 overflow 1 carry 1
+add(0x1, 0x1) = 0x2 overflow 0 carry 1
+add(0xffffffff, 0xffffffff) = 0xfffffffe overflow 0 carry 1
+add(0x4e20, 0x4e20) = 0x7fff overflow 1 carry 1
+add(0xffffb1e0, 0xffffb1e0) = 0xffff8000 overflow 1 carry 1
+sub(0x1, 0x1) = 0 overflow 0 carry 1
+sub(0xffffffff, 0xffffffff) = 0 overflow 0 carry 1
+sub(0x4e20, 0x4e20) = 0 overflow 0 carry 1
+sub(0xffffb1e0, 0xffffb1e0) = 0 overflow 0 carry 1
+sub(0, 0xffff8000) = 0x7fff overflow 1 carry 1
+abs_s(0x1) = 0x1 overflow 0 carry 1
+abs_s(0xffffffff) = 0x1 overflow 0 carry 1
+abs_s(0xffff8000) = 0x7fff overflow 0 carry 1
+shl(0x1, 1) = 0x2 overflow 0 carry 1
+shl(0xa, 1) = 0x14 overflow 0 carry 1
+shl(0xfff, 10) = 0x7fff overflow 1 carry 1
+shl(0xfff, 20) = 0x7fff overflow 1 carry 1
+shl(0x1, -1) = 0 overflow 0 carry 1
+shl(0x14, -1) = 0xa overflow 0 carry 1
+shl(0xfff, -10) = 0x3 overflow 0 carry 1
+shl(0xfff, -64) = 0 overflow 0 carry 1
+shr(0x1, -1) = 0x2 overflow 0 carry 1
+shr(0xa, -1) = 0x14 overflow 0 carry 1
+shr(0xfff, -10) = 0x7fff overflow 1 carry 1
+shr(0xfff, -20) = 0x7fff overflow 1 carry 1
+shr(0x1, 1) = 0 overflow 0 carry 1
+shr(0x14, 1) = 0xa overflow 0 carry 1
+shr(0xfff, 10) = 0x3 overflow 0 carry 1
+shr(0xfff, 64) = 0 overflow 0 carry 1
+mult(0x2, 0x2) = 0 overflow 0 carry 1
+mult(0xffffffff, 0xffffffff) = 0 overflow 0 carry 1
+mult(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1
+L_mult(0x2, 0x2) = 0x8 overflow 0 carry 1
+L_mult(0xffffffff, 0xffffffff) = 0x2 overflow 0 carry 1
+L_mult(0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1
+negate(0x1) = 0xffffffff overflow 0 carry 1
+negate(0xffffffff) = 0x1 overflow 0 carry 1
+negate(0xffff8000) = 0x7fff overflow 0 carry 1
+extract_h(0x1) = 0 overflow 0 carry 1
+extract_h(0xffffffff) = 0xffffffff overflow 0 carry 1
+extract_h(0xffff8000) = 0xffffffff overflow 0 carry 1
+extract_h(0x12345678) = 0x1234 overflow 0 carry 1
+extract_l(0x1) = 0x1 overflow 0 carry 1
+extract_l(0xffffffff) = 0xffffffff overflow 0 carry 1
+extract_l(0xffff8000) = 0xffff8000 overflow 0 carry 1
+extract_l(0x43218765) = 0xffff8765 overflow 0 carry 1
+round(0x1) = 0 overflow 0 carry 1
+round(0xffffffff) = 0 overflow 0 carry 1
+round(0xffff8000) = 0 overflow 0 carry 1
+round(0x43218765) = 0x4322 overflow 0 carry 1
+round(0x87654321) = 0xffff8765 overflow 0 carry 1
+L_mac(0x1234, 0x2, 0x2) = 0x123c overflow 0 carry 1
+L_mac(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 0 carry 1
+L_mac(0x1234, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1
+L_mac(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 1
+L_msu(0x1234, 0x2, 0x2) = 0x122c overflow 0 carry 1
+L_msu(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 0 carry 1
+L_msu(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 1
+L_msu(0x1, 0xffff8000, 0xffff8000) = 0x80000002 overflow 1 carry 1
+L_add(0x1, 0x2) = 0x3 overflow 0 carry 1
+L_add(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 0 carry 1
+L_add(0xffffffff, 0x2) = 0x1 overflow 0 carry 1
+L_add(0x7000, 0x7000) = 0xe000 overflow 0 carry 1
+L_add(0x8fff, 0x8fff) = 0x11ffe overflow 0 carry 1
+L_add(0x70000000, 0x70000000) = 0x7fffffff overflow 1 carry 1
+L_add(0x8fffffff, 0x8fffffff) = 0x80000000 overflow 1 carry 1
+L_sub(0x1, 0x2) = 0xffffffff overflow 0 carry 1
+L_sub(0xffffffff, 0xfffffffe) = 0x1 overflow 0 carry 1
+L_sub(0xffffffff, 0x2) = 0xfffffffd overflow 0 carry 1
+L_sub(0x7000, 0xffff9000) = 0xe000 overflow 0 carry 1
+L_sub(0x8fff, 0xffff7001) = 0x11ffe overflow 0 carry 1
+L_sub(0x70000000, 0x90000000) = 0x7fffffff overflow 1 carry 1
+L_sub(0x8fffffff, 0x70000001) = 0x80000000 overflow 1 carry 1
+L_sub(0, 0x80000000) = 0x7fffffff overflow 1 carry 1
+Checking L_add_c with input Overflow=0 and input Carry=1
+L_add_c(0x1, 0x2) = 0x4 overflow 0 carry 0
+L_add_c(0xffffffff, 0xfffffffe) = 0xfffffffe overflow 0 carry 1
+L_add_c(0xffffffff, 0x2) = 0x2 overflow 0 carry 1
+L_add_c(0x7000, 0x7000) = 0xe001 overflow 0 carry 0
+L_add_c(0x8fff, 0x8fff) = 0x11fff overflow 0 carry 0
+L_add_c(0x70000000, 0x70000000) = 0xe0000001 overflow 1 carry 0
+L_add_c(0x8fffffff, 0x8fffffff) = 0x1fffffff overflow 1 carry 1
+L_add_c(0x8fffffff, 0xffffffff) = 0x8fffffff overflow 0 carry 1
+L_sub_c(0x1, 0x2) = 0xffffffff overflow 0 carry 0
+L_sub_c(0xffffffff, 0xfffffffe) = 0x1 overflow 0 carry 1
+L_sub_c(0xffffffff, 0x2) = 0xfffffffd overflow 0 carry 1
+L_sub_c(0x7000, 0x7000) = 0 overflow 0 carry 1
+L_sub_c(0x8fff, 0x8fff) = 0 overflow 0 carry 1
+L_sub_c(0x70000000, 0x70000000) = 0 overflow 0 carry 1
+L_sub_c(0x8fffffff, 0x8fffffff) = 0 overflow 0 carry 1
+L_sub_c(0x1, 0x80000000) = 0x80000001 overflow 1 carry 0
+L_sub_c(0xffffffff, 0x7fffffff) = 0x80000000 overflow 0 carry 1
+Checking L_macNs with input Overflow=0 and input Carry=1
+L_macNs(0x1234, 0x2, 0x2) = 0x123d overflow 0 carry 0
+L_macNs(0x1234, 0xffffffff, 0xffffffff) = 0x1237 overflow 0 carry 0
+L_macNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001234 overflow 1 carry 0
+L_macNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1
+Checking L_msuNs with input Overflow=0 and input Carry=1
+L_msuNs(0x1234, 0x2, 0x2) = 0x122c overflow 0 carry 1
+L_msuNs(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 0 carry 1
+L_msuNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 0
+L_msuNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x80000000 overflow 1 carry 1
+negate(0x1) = 0xffffffff overflow 0 carry 1
+negate(0xffffffff) = 0x1 overflow 0 carry 1
+negate(0xffff8000) = 0x7fff overflow 0 carry 1
+mult_r(0x2, 0x2) = 0 overflow 0 carry 1
+mult_r(0xffffffff, 0xffffffff) = 0 overflow 0 carry 1
+mult_r(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1
+norm_s(0x1) = 0xe overflow 0 carry 1
+norm_s(0xffffffff) = 0xf overflow 0 carry 1
+norm_s(0xffff8000) = 0 overflow 0 carry 1
+norm_s(0x2ee0) = 0x1 overflow 0 carry 1
+norm_l(0x1) = 0x1e overflow 0 carry 1
+norm_l(0xffffffff) = 0x1f overflow 0 carry 1
+norm_l(0xffff8000) = 0x10 overflow 0 carry 1
+norm_l(0x2ee0) = 0x11 overflow 0 carry 1
+norm_l(0x123456) = 0xa overflow 0 carry 1
+norm_l(0xabcdef) = 0x7 overflow 0 carry 1
+L_shl(0x1, 1) = 0x2 overflow 0 carry 1
+L_shl(0xa, 1) = 0x14 overflow 0 carry 1
+L_shl(0xfff, 10) = 0x3ffc00 overflow 0 carry 1
+L_shl(0xfff, 20) = 0x7fffffff overflow 1 carry 1
+L_shl(0x12345678, 2) = 0x48d159e0 overflow 0 carry 1
+L_shl(0x12345678, 40) = 0x7fffffff overflow 1 carry 1
+L_shl(0x1, -1) = 0 overflow 0 carry 1
+L_shl(0x14, -1) = 0xa overflow 0 carry 1
+L_shl(0xfff, -10) = 0x3 overflow 0 carry 1
+L_shl(0xfff, -64) = 0 overflow 0 carry 1
+L_shl(0x12345678, -10) = 0x48d15 overflow 0 carry 1
+L_shl(0x12345678, -64) = 0 overflow 0 carry 1
+L_shr(0x1, -1) = 0x2 overflow 0 carry 1
+L_shr(0xa, -1) = 0x14 overflow 0 carry 1
+L_shr(0xfff, -10) = 0x3ffc00 overflow 0 carry 1
+L_shr(0xfff, -20) = 0x7fffffff overflow 1 carry 1
+L_shr(0x12345678, -10) = 0x7fffffff overflow 1 carry 1
+L_shr(0x12345678, -40) = 0x7fffffff overflow 1 carry 1
+L_shr(0x1, 1) = 0 overflow 0 carry 1
+L_shr(0x14, 1) = 0xa overflow 0 carry 1
+L_shr(0xfff, 10) = 0x3 overflow 0 carry 1
+L_shr(0xfff, 64) = 0 overflow 0 carry 1
+L_shr(0x12345678, 10) = 0x48d15 overflow 0 carry 1
+L_shr(0x12345678, 64) = 0 overflow 0 carry 1
+shr_r(0x1, -1) = 0x2 overflow 0 carry 1
+shr_r(0xa, -1) = 0x14 overflow 0 carry 1
+shr_r(0xfff, -10) = 0x7fff overflow 1 carry 1
+shr_r(0xfff, -20) = 0x7fff overflow 1 carry 1
+shr_r(0x1, 1) = 0x1 overflow 0 carry 1
+shr_r(0x14, 1) = 0xa overflow 0 carry 1
+shr_r(0xfff, 10) = 0x4 overflow 0 carry 1
+shr_r(0xfff, 64) = 0 overflow 0 carry 1
+mac_r(0x1234, 0x2, 0x2) = 0 overflow 0 carry 1
+mac_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 0 carry 1
+mac_r(0x1234, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1
+mac_r(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1
+mac_r(0x123456, 0x244, 0x522) = 0x29 overflow 0 carry 1
+msu_r(0x1234, 0x2, 0x2) = 0 overflow 0 carry 1
+msu_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 0 carry 1
+msu_r(0x1234, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 1
+msu_r(0x1, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 1
+msu_r(0x123456, 0x321, 0x243) = 0x4 overflow 0 carry 1
+L_deposit_h(0x1) = 0x10000 overflow 0 carry 1
+L_deposit_h(0xffffffff) = 0xffff0000 overflow 0 carry 1
+L_deposit_h(0xffff8000) = 0x80000000 overflow 0 carry 1
+L_deposit_h(0x1234) = 0x12340000 overflow 0 carry 1
+L_deposit_l(0x1) = 0x1 overflow 0 carry 1
+L_deposit_l(0xffffffff) = 0xffffffff overflow 0 carry 1
+L_deposit_l(0xffff8000) = 0xffff8000 overflow 0 carry 1
+L_deposit_l(0x1234) = 0x1234 overflow 0 carry 1
+L_shr_r(0x1, -1) = 0x2 overflow 0 carry 1
+L_shr_r(0xa, -1) = 0x14 overflow 0 carry 1
+L_shr_r(0xfff, -10) = 0x3ffc00 overflow 0 carry 1
+L_shr_r(0xfff, -20) = 0x7fffffff overflow 1 carry 1
+L_shr_r(0x12345678, -10) = 0x7fffffff overflow 1 carry 1
+L_shr_r(0x12345678, -40) = 0x7fffffff overflow 1 carry 1
+L_shr_r(0x1, 1) = 0x1 overflow 0 carry 1
+L_shr_r(0x14, 1) = 0xa overflow 0 carry 1
+L_shr_r(0xfff, 10) = 0x4 overflow 0 carry 1
+L_shr_r(0xfff, 64) = 0 overflow 0 carry 1
+L_shr_r(0x12345678, 10) = 0x48d16 overflow 0 carry 1
+L_shr_r(0x12345678, 64) = 0 overflow 0 carry 1
+L_abs(0x1) = 0x1 overflow 0 carry 1
+L_abs(0xffffffff) = 0x1 overflow 0 carry 1
+L_abs(0x80000000) = 0x7fffffff overflow 1 carry 1
+L_sat(0x1) = 0x1 overflow 0 carry 1
+L_sat(0xffffffff) = 0xffffffff overflow 0 carry 1
+L_sat(0xffff8000) = 0xffff8000 overflow 0 carry 1
+L_sat(0x8000) = 0x8000 overflow 0 carry 1
+div_s(0x1, 0x1) = 0x7fff overflow 0 carry 1
+div_s(0x2710, 0x4e20) = 0x4000 overflow 0 carry 1
+div_s(0x2710, 0x4e20) = 0x4000 overflow 0 carry 1
+
+
+DSP FNS (non-NEON/ITU) intrinsics with input Overflow=1 and input Carry=0
+Checking saturate with input Overflow=1 and input Carry=0
+saturate(0x1) = 0x1 overflow 1 carry 0
+saturate(0xffffffff) = 0xffffffff overflow 1 carry 0
+saturate(0x8000) = 0x7fff overflow 1 carry 0
+saturate(0xffff8000) = 0xffff8000 overflow 1 carry 0
+saturate(0xffff7fff) = 0xffff8000 overflow 1 carry 0
+add(0x1, 0x1) = 0x2 overflow 1 carry 0
+add(0xffffffff, 0xffffffff) = 0xfffffffe overflow 1 carry 0
+add(0x4e20, 0x4e20) = 0x7fff overflow 1 carry 0
+add(0xffffb1e0, 0xffffb1e0) = 0xffff8000 overflow 1 carry 0
+sub(0x1, 0x1) = 0 overflow 1 carry 0
+sub(0xffffffff, 0xffffffff) = 0 overflow 1 carry 0
+sub(0x4e20, 0x4e20) = 0 overflow 1 carry 0
+sub(0xffffb1e0, 0xffffb1e0) = 0 overflow 1 carry 0
+sub(0, 0xffff8000) = 0x7fff overflow 1 carry 0
+abs_s(0x1) = 0x1 overflow 1 carry 0
+abs_s(0xffffffff) = 0x1 overflow 1 carry 0
+abs_s(0xffff8000) = 0x7fff overflow 1 carry 0
+shl(0x1, 1) = 0x2 overflow 1 carry 0
+shl(0xa, 1) = 0x14 overflow 1 carry 0
+shl(0xfff, 10) = 0x7fff overflow 1 carry 0
+shl(0xfff, 20) = 0x7fff overflow 1 carry 0
+shl(0x1, -1) = 0 overflow 1 carry 0
+shl(0x14, -1) = 0xa overflow 1 carry 0
+shl(0xfff, -10) = 0x3 overflow 1 carry 0
+shl(0xfff, -64) = 0 overflow 1 carry 0
+shr(0x1, -1) = 0x2 overflow 1 carry 0
+shr(0xa, -1) = 0x14 overflow 1 carry 0
+shr(0xfff, -10) = 0x7fff overflow 1 carry 0
+shr(0xfff, -20) = 0x7fff overflow 1 carry 0
+shr(0x1, 1) = 0 overflow 1 carry 0
+shr(0x14, 1) = 0xa overflow 1 carry 0
+shr(0xfff, 10) = 0x3 overflow 1 carry 0
+shr(0xfff, 64) = 0 overflow 1 carry 0
+mult(0x2, 0x2) = 0 overflow 1 carry 0
+mult(0xffffffff, 0xffffffff) = 0 overflow 1 carry 0
+mult(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0
+L_mult(0x2, 0x2) = 0x8 overflow 1 carry 0
+L_mult(0xffffffff, 0xffffffff) = 0x2 overflow 1 carry 0
+L_mult(0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 0
+negate(0x1) = 0xffffffff overflow 1 carry 0
+negate(0xffffffff) = 0x1 overflow 1 carry 0
+negate(0xffff8000) = 0x7fff overflow 1 carry 0
+extract_h(0x1) = 0 overflow 1 carry 0
+extract_h(0xffffffff) = 0xffffffff overflow 1 carry 0
+extract_h(0xffff8000) = 0xffffffff overflow 1 carry 0
+extract_h(0x12345678) = 0x1234 overflow 1 carry 0
+extract_l(0x1) = 0x1 overflow 1 carry 0
+extract_l(0xffffffff) = 0xffffffff overflow 1 carry 0
+extract_l(0xffff8000) = 0xffff8000 overflow 1 carry 0
+extract_l(0x43218765) = 0xffff8765 overflow 1 carry 0
+round(0x1) = 0 overflow 1 carry 0
+round(0xffffffff) = 0 overflow 1 carry 0
+round(0xffff8000) = 0 overflow 1 carry 0
+round(0x43218765) = 0x4322 overflow 1 carry 0
+round(0x87654321) = 0xffff8765 overflow 1 carry 0
+L_mac(0x1234, 0x2, 0x2) = 0x123c overflow 1 carry 0
+L_mac(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 1 carry 0
+L_mac(0x1234, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 0
+L_mac(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 0
+L_msu(0x1234, 0x2, 0x2) = 0x122c overflow 1 carry 0
+L_msu(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 1 carry 0
+L_msu(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 0
+L_msu(0x1, 0xffff8000, 0xffff8000) = 0x80000002 overflow 1 carry 0
+L_add(0x1, 0x2) = 0x3 overflow 1 carry 0
+L_add(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 1 carry 0
+L_add(0xffffffff, 0x2) = 0x1 overflow 1 carry 0
+L_add(0x7000, 0x7000) = 0xe000 overflow 1 carry 0
+L_add(0x8fff, 0x8fff) = 0x11ffe overflow 1 carry 0
+L_add(0x70000000, 0x70000000) = 0x7fffffff overflow 1 carry 0
+L_add(0x8fffffff, 0x8fffffff) = 0x80000000 overflow 1 carry 0
+L_sub(0x1, 0x2) = 0xffffffff overflow 1 carry 0
+L_sub(0xffffffff, 0xfffffffe) = 0x1 overflow 1 carry 0
+L_sub(0xffffffff, 0x2) = 0xfffffffd overflow 1 carry 0
+L_sub(0x7000, 0xffff9000) = 0xe000 overflow 1 carry 0
+L_sub(0x8fff, 0xffff7001) = 0x11ffe overflow 1 carry 0
+L_sub(0x70000000, 0x90000000) = 0x7fffffff overflow 1 carry 0
+L_sub(0x8fffffff, 0x70000001) = 0x80000000 overflow 1 carry 0
+L_sub(0, 0x80000000) = 0x7fffffff overflow 1 carry 0
+Checking L_add_c with input Overflow=1 and input Carry=0
+L_add_c(0x1, 0x2) = 0x3 overflow 1 carry 0
+L_add_c(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 1 carry 1
+L_add_c(0xffffffff, 0x2) = 0x1 overflow 1 carry 1
+L_add_c(0x7000, 0x7000) = 0xe000 overflow 1 carry 0
+L_add_c(0x8fff, 0x8fff) = 0x11ffe overflow 1 carry 0
+L_add_c(0x70000000, 0x70000000) = 0xe0000000 overflow 1 carry 0
+L_add_c(0x8fffffff, 0x8fffffff) = 0x1ffffffe overflow 1 carry 1
+L_add_c(0x8fffffff, 0xffffffff) = 0x8ffffffe overflow 1 carry 1
+L_sub_c(0x1, 0x2) = 0xfffffffe overflow 1 carry 0
+L_sub_c(0xffffffff, 0xfffffffe) = 0 overflow 1 carry 1
+L_sub_c(0xffffffff, 0x2) = 0xfffffffc overflow 1 carry 1
+L_sub_c(0x7000, 0x7000) = 0xffffffff overflow 1 carry 0
+L_sub_c(0x8fff, 0x8fff) = 0xffffffff overflow 1 carry 0
+L_sub_c(0x70000000, 0x70000000) = 0xffffffff overflow 1 carry 0
+L_sub_c(0x8fffffff, 0x8fffffff) = 0xffffffff overflow 1 carry 0
+L_sub_c(0x1, 0x80000000) = 0x80000000 overflow 1 carry 0
+L_sub_c(0xffffffff, 0x7fffffff) = 0x7fffffff overflow 1 carry 1
+Checking L_macNs with input Overflow=1 and input Carry=0
+L_macNs(0x1234, 0x2, 0x2) = 0x123c overflow 0 carry 0
+L_macNs(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 0 carry 0
+L_macNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001233 overflow 1 carry 0
+L_macNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 1
+Checking L_msuNs with input Overflow=1 and input Carry=0
+L_msuNs(0x1234, 0x2, 0x2) = 0x122b overflow 0 carry 1
+L_msuNs(0x1234, 0xffffffff, 0xffffffff) = 0x1231 overflow 0 carry 1
+L_msuNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001234 overflow 1 carry 0
+L_msuNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1
+negate(0x1) = 0xffffffff overflow 1 carry 0
+negate(0xffffffff) = 0x1 overflow 1 carry 0
+negate(0xffff8000) = 0x7fff overflow 1 carry 0
+mult_r(0x2, 0x2) = 0 overflow 1 carry 0
+mult_r(0xffffffff, 0xffffffff) = 0 overflow 1 carry 0
+mult_r(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0
+norm_s(0x1) = 0xe overflow 1 carry 0
+norm_s(0xffffffff) = 0xf overflow 1 carry 0
+norm_s(0xffff8000) = 0 overflow 1 carry 0
+norm_s(0x2ee0) = 0x1 overflow 1 carry 0
+norm_l(0x1) = 0x1e overflow 1 carry 0
+norm_l(0xffffffff) = 0x1f overflow 1 carry 0
+norm_l(0xffff8000) = 0x10 overflow 1 carry 0
+norm_l(0x2ee0) = 0x11 overflow 1 carry 0
+norm_l(0x123456) = 0xa overflow 1 carry 0
+norm_l(0xabcdef) = 0x7 overflow 1 carry 0
+L_shl(0x1, 1) = 0x2 overflow 1 carry 0
+L_shl(0xa, 1) = 0x14 overflow 1 carry 0
+L_shl(0xfff, 10) = 0x3ffc00 overflow 1 carry 0
+L_shl(0xfff, 20) = 0x7fffffff overflow 1 carry 0
+L_shl(0x12345678, 2) = 0x48d159e0 overflow 1 carry 0
+L_shl(0x12345678, 40) = 0x7fffffff overflow 1 carry 0
+L_shl(0x1, -1) = 0 overflow 1 carry 0
+L_shl(0x14, -1) = 0xa overflow 1 carry 0
+L_shl(0xfff, -10) = 0x3 overflow 1 carry 0
+L_shl(0xfff, -64) = 0 overflow 1 carry 0
+L_shl(0x12345678, -10) = 0x48d15 overflow 1 carry 0
+L_shl(0x12345678, -64) = 0 overflow 1 carry 0
+L_shr(0x1, -1) = 0x2 overflow 1 carry 0
+L_shr(0xa, -1) = 0x14 overflow 1 carry 0
+L_shr(0xfff, -10) = 0x3ffc00 overflow 1 carry 0
+L_shr(0xfff, -20) = 0x7fffffff overflow 1 carry 0
+L_shr(0x12345678, -10) = 0x7fffffff overflow 1 carry 0
+L_shr(0x12345678, -40) = 0x7fffffff overflow 1 carry 0
+L_shr(0x1, 1) = 0 overflow 1 carry 0
+L_shr(0x14, 1) = 0xa overflow 1 carry 0
+L_shr(0xfff, 10) = 0x3 overflow 1 carry 0
+L_shr(0xfff, 64) = 0 overflow 1 carry 0
+L_shr(0x12345678, 10) = 0x48d15 overflow 1 carry 0
+L_shr(0x12345678, 64) = 0 overflow 1 carry 0
+shr_r(0x1, -1) = 0x2 overflow 1 carry 0
+shr_r(0xa, -1) = 0x14 overflow 1 carry 0
+shr_r(0xfff, -10) = 0x7fff overflow 1 carry 0
+shr_r(0xfff, -20) = 0x7fff overflow 1 carry 0
+shr_r(0x1, 1) = 0x1 overflow 1 carry 0
+shr_r(0x14, 1) = 0xa overflow 1 carry 0
+shr_r(0xfff, 10) = 0x4 overflow 1 carry 0
+shr_r(0xfff, 64) = 0 overflow 1 carry 0
+mac_r(0x1234, 0x2, 0x2) = 0 overflow 1 carry 0
+mac_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 1 carry 0
+mac_r(0x1234, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0
+mac_r(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 0
+mac_r(0x123456, 0x244, 0x522) = 0x29 overflow 1 carry 0
+msu_r(0x1234, 0x2, 0x2) = 0 overflow 1 carry 0
+msu_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 1 carry 0
+msu_r(0x1234, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 0
+msu_r(0x1, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 0
+msu_r(0x123456, 0x321, 0x243) = 0x4 overflow 1 carry 0
+L_deposit_h(0x1) = 0x10000 overflow 1 carry 0
+L_deposit_h(0xffffffff) = 0xffff0000 overflow 1 carry 0
+L_deposit_h(0xffff8000) = 0x80000000 overflow 1 carry 0
+L_deposit_h(0x1234) = 0x12340000 overflow 1 carry 0
+L_deposit_l(0x1) = 0x1 overflow 1 carry 0
+L_deposit_l(0xffffffff) = 0xffffffff overflow 1 carry 0
+L_deposit_l(0xffff8000) = 0xffff8000 overflow 1 carry 0
+L_deposit_l(0x1234) = 0x1234 overflow 1 carry 0
+L_shr_r(0x1, -1) = 0x2 overflow 1 carry 0
+L_shr_r(0xa, -1) = 0x14 overflow 1 carry 0
+L_shr_r(0xfff, -10) = 0x3ffc00 overflow 1 carry 0
+L_shr_r(0xfff, -20) = 0x7fffffff overflow 1 carry 0
+L_shr_r(0x12345678, -10) = 0x7fffffff overflow 1 carry 0
+L_shr_r(0x12345678, -40) = 0x7fffffff overflow 1 carry 0
+L_shr_r(0x1, 1) = 0x1 overflow 1 carry 0
+L_shr_r(0x14, 1) = 0xa overflow 1 carry 0
+L_shr_r(0xfff, 10) = 0x4 overflow 1 carry 0
+L_shr_r(0xfff, 64) = 0 overflow 1 carry 0
+L_shr_r(0x12345678, 10) = 0x48d16 overflow 1 carry 0
+L_shr_r(0x12345678, 64) = 0 overflow 1 carry 0
+L_abs(0x1) = 0x1 overflow 1 carry 0
+L_abs(0xffffffff) = 0x1 overflow 1 carry 0
+L_abs(0x80000000) = 0x7fffffff overflow 1 carry 0
+L_sat(0x1) = 0x7fffffff overflow 0 carry 0
+L_sat(0xffffffff) = 0x7fffffff overflow 0 carry 0
+L_sat(0xffff8000) = 0x7fffffff overflow 0 carry 0
+L_sat(0x8000) = 0x7fffffff overflow 0 carry 0
+div_s(0x1, 0x1) = 0x7fff overflow 1 carry 0
+div_s(0x2710, 0x4e20) = 0x4000 overflow 1 carry 0
+div_s(0x2710, 0x4e20) = 0x4000 overflow 1 carry 0
+
+
+DSP FNS (non-NEON/ITU) intrinsics with input Overflow=1 and input Carry=1
+Checking saturate with input Overflow=1 and input Carry=1
+saturate(0x1) = 0x1 overflow 1 carry 1
+saturate(0xffffffff) = 0xffffffff overflow 1 carry 1
+saturate(0x8000) = 0x7fff overflow 1 carry 1
+saturate(0xffff8000) = 0xffff8000 overflow 1 carry 1
+saturate(0xffff7fff) = 0xffff8000 overflow 1 carry 1
+add(0x1, 0x1) = 0x2 overflow 1 carry 1
+add(0xffffffff, 0xffffffff) = 0xfffffffe overflow 1 carry 1
+add(0x4e20, 0x4e20) = 0x7fff overflow 1 carry 1
+add(0xffffb1e0, 0xffffb1e0) = 0xffff8000 overflow 1 carry 1
+sub(0x1, 0x1) = 0 overflow 1 carry 1
+sub(0xffffffff, 0xffffffff) = 0 overflow 1 carry 1
+sub(0x4e20, 0x4e20) = 0 overflow 1 carry 1
+sub(0xffffb1e0, 0xffffb1e0) = 0 overflow 1 carry 1
+sub(0, 0xffff8000) = 0x7fff overflow 1 carry 1
+abs_s(0x1) = 0x1 overflow 1 carry 1
+abs_s(0xffffffff) = 0x1 overflow 1 carry 1
+abs_s(0xffff8000) = 0x7fff overflow 1 carry 1
+shl(0x1, 1) = 0x2 overflow 1 carry 1
+shl(0xa, 1) = 0x14 overflow 1 carry 1
+shl(0xfff, 10) = 0x7fff overflow 1 carry 1
+shl(0xfff, 20) = 0x7fff overflow 1 carry 1
+shl(0x1, -1) = 0 overflow 1 carry 1
+shl(0x14, -1) = 0xa overflow 1 carry 1
+shl(0xfff, -10) = 0x3 overflow 1 carry 1
+shl(0xfff, -64) = 0 overflow 1 carry 1
+shr(0x1, -1) = 0x2 overflow 1 carry 1
+shr(0xa, -1) = 0x14 overflow 1 carry 1
+shr(0xfff, -10) = 0x7fff overflow 1 carry 1
+shr(0xfff, -20) = 0x7fff overflow 1 carry 1
+shr(0x1, 1) = 0 overflow 1 carry 1
+shr(0x14, 1) = 0xa overflow 1 carry 1
+shr(0xfff, 10) = 0x3 overflow 1 carry 1
+shr(0xfff, 64) = 0 overflow 1 carry 1
+mult(0x2, 0x2) = 0 overflow 1 carry 1
+mult(0xffffffff, 0xffffffff) = 0 overflow 1 carry 1
+mult(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1
+L_mult(0x2, 0x2) = 0x8 overflow 1 carry 1
+L_mult(0xffffffff, 0xffffffff) = 0x2 overflow 1 carry 1
+L_mult(0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1
+negate(0x1) = 0xffffffff overflow 1 carry 1
+negate(0xffffffff) = 0x1 overflow 1 carry 1
+negate(0xffff8000) = 0x7fff overflow 1 carry 1
+extract_h(0x1) = 0 overflow 1 carry 1
+extract_h(0xffffffff) = 0xffffffff overflow 1 carry 1
+extract_h(0xffff8000) = 0xffffffff overflow 1 carry 1
+extract_h(0x12345678) = 0x1234 overflow 1 carry 1
+extract_l(0x1) = 0x1 overflow 1 carry 1
+extract_l(0xffffffff) = 0xffffffff overflow 1 carry 1
+extract_l(0xffff8000) = 0xffff8000 overflow 1 carry 1
+extract_l(0x43218765) = 0xffff8765 overflow 1 carry 1
+round(0x1) = 0 overflow 1 carry 1
+round(0xffffffff) = 0 overflow 1 carry 1
+round(0xffff8000) = 0 overflow 1 carry 1
+round(0x43218765) = 0x4322 overflow 1 carry 1
+round(0x87654321) = 0xffff8765 overflow 1 carry 1
+L_mac(0x1234, 0x2, 0x2) = 0x123c overflow 1 carry 1
+L_mac(0x1234, 0xffffffff, 0xffffffff) = 0x1236 overflow 1 carry 1
+L_mac(0x1234, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1
+L_mac(0xffffffff, 0xffff8000, 0xffff8000) = 0x7ffffffe overflow 1 carry 1
+L_msu(0x1234, 0x2, 0x2) = 0x122c overflow 1 carry 1
+L_msu(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 1 carry 1
+L_msu(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 1
+L_msu(0x1, 0xffff8000, 0xffff8000) = 0x80000002 overflow 1 carry 1
+L_add(0x1, 0x2) = 0x3 overflow 1 carry 1
+L_add(0xffffffff, 0xfffffffe) = 0xfffffffd overflow 1 carry 1
+L_add(0xffffffff, 0x2) = 0x1 overflow 1 carry 1
+L_add(0x7000, 0x7000) = 0xe000 overflow 1 carry 1
+L_add(0x8fff, 0x8fff) = 0x11ffe overflow 1 carry 1
+L_add(0x70000000, 0x70000000) = 0x7fffffff overflow 1 carry 1
+L_add(0x8fffffff, 0x8fffffff) = 0x80000000 overflow 1 carry 1
+L_sub(0x1, 0x2) = 0xffffffff overflow 1 carry 1
+L_sub(0xffffffff, 0xfffffffe) = 0x1 overflow 1 carry 1
+L_sub(0xffffffff, 0x2) = 0xfffffffd overflow 1 carry 1
+L_sub(0x7000, 0xffff9000) = 0xe000 overflow 1 carry 1
+L_sub(0x8fff, 0xffff7001) = 0x11ffe overflow 1 carry 1
+L_sub(0x70000000, 0x90000000) = 0x7fffffff overflow 1 carry 1
+L_sub(0x8fffffff, 0x70000001) = 0x80000000 overflow 1 carry 1
+L_sub(0, 0x80000000) = 0x7fffffff overflow 1 carry 1
+Checking L_add_c with input Overflow=1 and input Carry=1
+L_add_c(0x1, 0x2) = 0x4 overflow 1 carry 0
+L_add_c(0xffffffff, 0xfffffffe) = 0xfffffffe overflow 1 carry 1
+L_add_c(0xffffffff, 0x2) = 0x2 overflow 1 carry 1
+L_add_c(0x7000, 0x7000) = 0xe001 overflow 1 carry 0
+L_add_c(0x8fff, 0x8fff) = 0x11fff overflow 1 carry 0
+L_add_c(0x70000000, 0x70000000) = 0xe0000001 overflow 1 carry 0
+L_add_c(0x8fffffff, 0x8fffffff) = 0x1fffffff overflow 1 carry 1
+L_add_c(0x8fffffff, 0xffffffff) = 0x8fffffff overflow 1 carry 1
+L_sub_c(0x1, 0x2) = 0xffffffff overflow 1 carry 0
+L_sub_c(0xffffffff, 0xfffffffe) = 0x1 overflow 1 carry 1
+L_sub_c(0xffffffff, 0x2) = 0xfffffffd overflow 1 carry 1
+L_sub_c(0x7000, 0x7000) = 0 overflow 1 carry 1
+L_sub_c(0x8fff, 0x8fff) = 0 overflow 1 carry 1
+L_sub_c(0x70000000, 0x70000000) = 0 overflow 1 carry 1
+L_sub_c(0x8fffffff, 0x8fffffff) = 0 overflow 1 carry 1
+L_sub_c(0x1, 0x80000000) = 0x80000001 overflow 1 carry 0
+L_sub_c(0xffffffff, 0x7fffffff) = 0x80000000 overflow 1 carry 1
+Checking L_macNs with input Overflow=1 and input Carry=1
+L_macNs(0x1234, 0x2, 0x2) = 0x123d overflow 0 carry 0
+L_macNs(0x1234, 0xffffffff, 0xffffffff) = 0x1237 overflow 0 carry 0
+L_macNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001234 overflow 1 carry 0
+L_macNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fffffff overflow 1 carry 1
+Checking L_msuNs with input Overflow=1 and input Carry=1
+L_msuNs(0x1234, 0x2, 0x2) = 0x122c overflow 0 carry 1
+L_msuNs(0x1234, 0xffffffff, 0xffffffff) = 0x1232 overflow 0 carry 1
+L_msuNs(0x1234, 0xffff8000, 0xffff8000) = 0x80001235 overflow 1 carry 0
+L_msuNs(0xffffffff, 0xffff8000, 0xffff8000) = 0x80000000 overflow 1 carry 1
+negate(0x1) = 0xffffffff overflow 1 carry 1
+negate(0xffffffff) = 0x1 overflow 1 carry 1
+negate(0xffff8000) = 0x7fff overflow 1 carry 1
+mult_r(0x2, 0x2) = 0 overflow 1 carry 1
+mult_r(0xffffffff, 0xffffffff) = 0 overflow 1 carry 1
+mult_r(0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1
+norm_s(0x1) = 0xe overflow 1 carry 1
+norm_s(0xffffffff) = 0xf overflow 1 carry 1
+norm_s(0xffff8000) = 0 overflow 1 carry 1
+norm_s(0x2ee0) = 0x1 overflow 1 carry 1
+norm_l(0x1) = 0x1e overflow 1 carry 1
+norm_l(0xffffffff) = 0x1f overflow 1 carry 1
+norm_l(0xffff8000) = 0x10 overflow 1 carry 1
+norm_l(0x2ee0) = 0x11 overflow 1 carry 1
+norm_l(0x123456) = 0xa overflow 1 carry 1
+norm_l(0xabcdef) = 0x7 overflow 1 carry 1
+L_shl(0x1, 1) = 0x2 overflow 1 carry 1
+L_shl(0xa, 1) = 0x14 overflow 1 carry 1
+L_shl(0xfff, 10) = 0x3ffc00 overflow 1 carry 1
+L_shl(0xfff, 20) = 0x7fffffff overflow 1 carry 1
+L_shl(0x12345678, 2) = 0x48d159e0 overflow 1 carry 1
+L_shl(0x12345678, 40) = 0x7fffffff overflow 1 carry 1
+L_shl(0x1, -1) = 0 overflow 1 carry 1
+L_shl(0x14, -1) = 0xa overflow 1 carry 1
+L_shl(0xfff, -10) = 0x3 overflow 1 carry 1
+L_shl(0xfff, -64) = 0 overflow 1 carry 1
+L_shl(0x12345678, -10) = 0x48d15 overflow 1 carry 1
+L_shl(0x12345678, -64) = 0 overflow 1 carry 1
+L_shr(0x1, -1) = 0x2 overflow 1 carry 1
+L_shr(0xa, -1) = 0x14 overflow 1 carry 1
+L_shr(0xfff, -10) = 0x3ffc00 overflow 1 carry 1
+L_shr(0xfff, -20) = 0x7fffffff overflow 1 carry 1
+L_shr(0x12345678, -10) = 0x7fffffff overflow 1 carry 1
+L_shr(0x12345678, -40) = 0x7fffffff overflow 1 carry 1
+L_shr(0x1, 1) = 0 overflow 1 carry 1
+L_shr(0x14, 1) = 0xa overflow 1 carry 1
+L_shr(0xfff, 10) = 0x3 overflow 1 carry 1
+L_shr(0xfff, 64) = 0 overflow 1 carry 1
+L_shr(0x12345678, 10) = 0x48d15 overflow 1 carry 1
+L_shr(0x12345678, 64) = 0 overflow 1 carry 1
+shr_r(0x1, -1) = 0x2 overflow 1 carry 1
+shr_r(0xa, -1) = 0x14 overflow 1 carry 1
+shr_r(0xfff, -10) = 0x7fff overflow 1 carry 1
+shr_r(0xfff, -20) = 0x7fff overflow 1 carry 1
+shr_r(0x1, 1) = 0x1 overflow 1 carry 1
+shr_r(0x14, 1) = 0xa overflow 1 carry 1
+shr_r(0xfff, 10) = 0x4 overflow 1 carry 1
+shr_r(0xfff, 64) = 0 overflow 1 carry 1
+mac_r(0x1234, 0x2, 0x2) = 0 overflow 1 carry 1
+mac_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 1 carry 1
+mac_r(0x1234, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1
+mac_r(0xffffffff, 0xffff8000, 0xffff8000) = 0x7fff overflow 1 carry 1
+mac_r(0x123456, 0x244, 0x522) = 0x29 overflow 1 carry 1
+msu_r(0x1234, 0x2, 0x2) = 0 overflow 1 carry 1
+msu_r(0x1234, 0xffffffff, 0xffffffff) = 0 overflow 1 carry 1
+msu_r(0x1234, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 1
+msu_r(0x1, 0xffff8000, 0xffff8000) = 0xffff8000 overflow 1 carry 1
+msu_r(0x123456, 0x321, 0x243) = 0x4 overflow 1 carry 1
+L_deposit_h(0x1) = 0x10000 overflow 1 carry 1
+L_deposit_h(0xffffffff) = 0xffff0000 overflow 1 carry 1
+L_deposit_h(0xffff8000) = 0x80000000 overflow 1 carry 1
+L_deposit_h(0x1234) = 0x12340000 overflow 1 carry 1
+L_deposit_l(0x1) = 0x1 overflow 1 carry 1
+L_deposit_l(0xffffffff) = 0xffffffff overflow 1 carry 1
+L_deposit_l(0xffff8000) = 0xffff8000 overflow 1 carry 1
+L_deposit_l(0x1234) = 0x1234 overflow 1 carry 1
+L_shr_r(0x1, -1) = 0x2 overflow 1 carry 1
+L_shr_r(0xa, -1) = 0x14 overflow 1 carry 1
+L_shr_r(0xfff, -10) = 0x3ffc00 overflow 1 carry 1
+L_shr_r(0xfff, -20) = 0x7fffffff overflow 1 carry 1
+L_shr_r(0x12345678, -10) = 0x7fffffff overflow 1 carry 1
+L_shr_r(0x12345678, -40) = 0x7fffffff overflow 1 carry 1
+L_shr_r(0x1, 1) = 0x1 overflow 1 carry 1
+L_shr_r(0x14, 1) = 0xa overflow 1 carry 1
+L_shr_r(0xfff, 10) = 0x4 overflow 1 carry 1
+L_shr_r(0xfff, 64) = 0 overflow 1 carry 1
+L_shr_r(0x12345678, 10) = 0x48d16 overflow 1 carry 1
+L_shr_r(0x12345678, 64) = 0 overflow 1 carry 1
+L_abs(0x1) = 0x1 overflow 1 carry 1
+L_abs(0xffffffff) = 0x1 overflow 1 carry 1
+L_abs(0x80000000) = 0x7fffffff overflow 1 carry 1
+L_sat(0x1) = 0x80000000 overflow 0 carry 0
+L_sat(0xffffffff) = 0x80000000 overflow 0 carry 0
+L_sat(0xffff8000) = 0x80000000 overflow 0 carry 0
+L_sat(0x8000) = 0x80000000 overflow 0 carry 0
+div_s(0x1, 0x1) = 0x7fff overflow 1 carry 1
+div_s(0x2710, 0x4e20) = 0x4000 overflow 1 carry 1
+div_s(0x2710, 0x4e20) = 0x4000 overflow 1 carry 1
diff --git a/ref_dsp.c b/ref_dsp.c
new file mode 100644
index 0000000..85de61e
--- /dev/null
+++ b/ref_dsp.c
@@ -0,0 +1,411 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#include <stdio.h>
+#include <stdint.h>
+
+#ifdef __arm__
+#include <armdsp.h>
+#else
+#include "stm-armdsp.h"
+#endif
+#include <dspfns.h> /* For Overflow */
+
+extern FILE* ref_file;
+
+void exec_dsp (void)
+{
+  int32_t svar1, svar2, sacc, sres;
+  int32_t lo, hi;
+
+
+  fprintf(ref_file, "\n\nDSP (non-NEON) intrinsics\n");
+
+  /* qadd */
+  /* int32_t qadd(int32_t val1, int32_t val2); */
+  svar1 = 1;
+  svar2 = 2;
+  Overflow = 0;
+  sres = qadd(svar1, svar2);
+  fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = -1;
+  svar2 = -2;
+  Overflow = 0;
+  sres = qadd(svar1, svar2);
+  fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = -1;
+  svar2 = 2;
+  Overflow = 0;
+  sres = qadd(svar1, svar2);
+  fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x7000;
+  svar2 = 0x7000;
+  Overflow = 0;
+  sres = qadd(svar1, svar2);
+  fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x8FFF;
+  svar2 = 0x8FFF;
+  Overflow = 0;
+  sres = qadd(svar1, svar2);
+  fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x70000000;
+  svar2 = 0x70000000;
+  Overflow = 0;
+  sres = qadd(svar1, svar2);
+  fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x8FFFFFFF;
+  svar2 = 0x8FFFFFFF;
+  Overflow = 0;
+  sres = qadd(svar1, svar2);
+  fprintf(ref_file, "qadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  /* qsub */
+  /* int32_t qsub(int32_t val1, int32_t val2); */
+  svar1 = 1;
+  svar2 = 2;
+  Overflow = 0;
+  sres = qsub(svar1, svar2);
+  fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = -1;
+  svar2 = -2;
+  Overflow = 0;
+  sres = qsub(svar1, svar2);
+  fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = -1;
+  svar2 = 2;
+  Overflow = 0;
+  sres = qsub(svar1, svar2);
+  fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x7000;
+  svar2 = 0xFFFF9000;
+  Overflow = 0;
+  sres = qsub(svar1, svar2);
+  fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x8FFF;
+  svar2 = 0xFFFF7001;
+  Overflow = 0;
+  sres = qsub(svar1, svar2);
+  fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x70000000;
+  svar2 = 0x90000000;
+  Overflow = 0;
+  sres = qsub(svar1, svar2);
+  fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x8FFFFFFF;
+  svar2 = 0x70000001;
+  Overflow = 0;
+  sres = qsub(svar1, svar2);
+  fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0;
+  svar2 = 0x80000000;
+  Overflow = 0;
+  sres = qsub(svar1, svar2);
+  fprintf(ref_file, "qsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+
+  /* qdadd */
+  /* int32_t qdadd(int32_t val1, int32_t val2); */
+  svar1 = 1;
+  svar2 = 2;
+  Overflow = 0;
+  sres = qdadd(svar1, svar2);
+  fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = -1;
+  svar2 = -2;
+  Overflow = 0;
+  sres = qdadd(svar1, svar2);
+  fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = -1;
+  svar2 = 2;
+  Overflow = 0;
+  sres = qdadd(svar1, svar2);
+  fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x7000;
+  svar2 = 0x7000;
+  Overflow = 0;
+  sres = qdadd(svar1, svar2);
+  fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x8FFF;
+  svar2 = 0x8FFF;
+  Overflow = 0;
+  sres = qdadd(svar1, svar2);
+  fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x70000000;
+  svar2 = 0x70000000;
+  Overflow = 0;
+  sres = qdadd(svar1, svar2);
+  fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0;
+  svar2 = 0x70000000;
+  Overflow = 0;
+  sres = qdadd(svar1, svar2);
+  fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x8FFFFFFF;
+  svar2 = 0x8FFFFFFF;
+  Overflow = 0;
+  sres = qdadd(svar1, svar2);
+  fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0;
+  svar2 = 0x8FFFFFFF;
+  Overflow = 0;
+  sres = qdadd(svar1, svar2);
+  fprintf(ref_file, "qdadd(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  /* qdsub */
+  /* int32_t qdsub(int32_t val1, int32_t val2); */
+  svar1 = 1;
+  svar2 = 2;
+  Overflow = 0;
+  sres = qdsub(svar1, svar2);
+  fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = -1;
+  svar2 = -2;
+  Overflow = 0;
+  sres = qdsub(svar1, svar2);
+  fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = -1;
+  svar2 = 2;
+  Overflow = 0;
+  sres = qdsub(svar1, svar2);
+  fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x7000;
+  svar2 = 0xFFFF9000;
+  Overflow = 0;
+  sres = qdsub(svar1, svar2);
+  fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x8FFF;
+  svar2 = 0xFFFF7001;
+  Overflow = 0;
+  sres = qdsub(svar1, svar2);
+  fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x70000000;
+  svar2 = 0x90000000;
+  Overflow = 0;
+  sres = qdsub(svar1, svar2);
+  fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0;
+  svar2 = 0x90000000;
+  Overflow = 0;
+  sres = qdsub(svar1, svar2);
+  fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0x8FFFFFFF;
+  svar2 = 0x70000001;
+  Overflow = 0;
+  sres = qdsub(svar1, svar2);
+  fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+  svar1 = 0;
+  svar2 = 0x70000001;
+  Overflow = 0;
+  sres = qdsub(svar1, svar2);
+  fprintf(ref_file, "qdsub(%#x, %#x) = %#x sat %d\n", svar1, svar2, sres, Overflow);
+
+
+  /* smulbb, smulbt, smultb, smultt */
+  /* int32_t smulbb(int32_t val1, int32_t val2); */
+  svar1 = 0x12345678;
+  svar2 = 0x12345678;
+  sres = smulbb(svar1, svar2);
+  fprintf(ref_file, "smulbb(%#x, %#x) = %#x\n", svar1, svar2, sres);
+  sres = smulbt(svar1, svar2);
+  fprintf(ref_file, "smulbt(%#x, %#x) = %#x\n", svar1, svar2, sres);
+  sres = smultb(svar1, svar2);
+  fprintf(ref_file, "smultb(%#x, %#x) = %#x\n", svar1, svar2, sres);
+  sres = smultt(svar1, svar2);
+  fprintf(ref_file, "smultt(%#x, %#x) = %#x\n", svar1, svar2, sres);
+
+  svar1 = 0xF123F456;
+  svar2 = 0xF123F456;
+  sres = smulbb(svar1, svar2);
+  fprintf(ref_file, "smulbb(%#x, %#x) = %#x\n", svar1, svar2, sres);
+  sres = smulbt(svar1, svar2);
+  fprintf(ref_file, "smulbt(%#x, %#x) = %#x\n", svar1, svar2, sres);
+  sres = smultb(svar1, svar2);
+  fprintf(ref_file, "smultb(%#x, %#x) = %#x\n", svar1, svar2, sres);
+  sres = smultt(svar1, svar2);
+  fprintf(ref_file, "smultt(%#x, %#x) = %#x\n", svar1, svar2, sres);
+
+
+  /* smlabb, smlabt, smlatb, smlatt */
+  /* int32_t smlabb(int32_t val1, int32_t val2, int32_t acc); */
+  sacc = 0x01020304;
+  svar1 = 0x12345678;
+  svar2 = 0x12345678;
+  sres = smlabb(svar1, svar2, sacc);
+  fprintf(ref_file, "smlabb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres);
+  sres = smlabt(svar1, svar2, sacc);
+  fprintf(ref_file, "smlabt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres);
+  sres = smlatb(svar1, svar2, sacc);
+  fprintf(ref_file, "smlatb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres);
+  sres = smlatt(svar1, svar2, sacc);
+  fprintf(ref_file, "smlatt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres);
+
+  svar1 = 0xF123F456;
+  svar2 = 0xF123F456;
+  sres = smlabb(svar1, svar2, sacc);
+  fprintf(ref_file, "smlabb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres);
+  sres = smlabt(svar1, svar2, sacc);
+  fprintf(ref_file, "smlabt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres);
+  sres = smlatb(svar1, svar2, sacc);
+  fprintf(ref_file, "smlatb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres);
+  sres = smlatt(svar1, svar2, sacc);
+  fprintf(ref_file, "smlatt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres);
+
+
+  /* smlalbb, smlalbt, smlaltb, smlaltt */
+  /* int32_t smlalbb(int32_t *lo, int32_t *hi, int32_t val1, int32_t val2); */
+  svar1 = 0x12345678;
+  svar2 = 0x12345678;
+  hi = 0x12345678;
+  lo = 0x9ABCDEF0;
+  fprintf(ref_file, "smlalbb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2);
+  smlalbb(&lo, &hi, svar1, svar2);
+  fprintf(ref_file, "%#x%#x\n", hi, lo);
+  hi = 0x12345678;
+  lo = 0x9ABCDEF0;
+  fprintf(ref_file, "smlalbt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2);
+  smlalbt(&lo, &hi, svar1, svar2);
+  fprintf(ref_file, "%#x%#x\n", hi, lo);
+  hi = 0x12345678;
+  lo = 0x9ABCDEF0;
+  fprintf(ref_file, "smlaltb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2);
+  smlaltb(&lo, &hi, svar1, svar2);
+  fprintf(ref_file, "%#x%#x\n", hi, lo);
+  hi = 0x12345678;
+  lo = 0x9ABCDEF0;
+  fprintf(ref_file, "smlaltt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2);
+  smlaltt(&lo, &hi, svar1, svar2);
+  fprintf(ref_file, "%#x%#x\n", hi, lo);
+
+  svar1 = 0xF123F456;
+  svar2 = 0xF123F456;
+  hi = 0x12345678;
+  lo = 0x9ABCDEF0;
+  fprintf(ref_file, "smlalbb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2);
+  smlalbb(&lo, &hi, svar1, svar2);
+  fprintf(ref_file, "%#x%#x\n", hi, lo);
+  hi = 0x12345678;
+  lo = 0x9ABCDEF0;
+  fprintf(ref_file, "smlalbt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2);
+  smlalbt(&lo, &hi, svar1, svar2);
+  fprintf(ref_file, "%#x%#x\n", hi, lo);
+  hi = 0x12345678;
+  lo = 0x9ABCDEF0;
+  fprintf(ref_file, "smlaltb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2);
+  smlaltb(&lo, &hi, svar1, svar2);
+  fprintf(ref_file, "%#x%#x\n", hi, lo);
+  hi = 0x12345678;
+  lo = 0x9ABCDEF0;
+  fprintf(ref_file, "smlaltt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2);
+  smlaltt(&lo, &hi, svar1, svar2);
+  fprintf(ref_file, "%#x%#x\n", hi, lo);
+
+  svar1 = 0x7FFF7FFF;
+  svar2 = 0x7FFF7FFF;
+  hi = 0x12345678;
+  lo = 0xFFFFFFFF;
+  fprintf(ref_file, "smlalbb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2);
+  smlalbb(&lo, &hi, svar1, svar2);
+  fprintf(ref_file, "%#x%#x\n", hi, lo);
+  hi = 0x12345678;
+  lo = 0xFFFFFFFF;
+  fprintf(ref_file, "smlalbt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2);
+  smlalbt(&lo, &hi, svar1, svar2);
+  fprintf(ref_file, "%#x%#x\n", hi, lo);
+  hi = 0x12345678;
+  lo = 0xFFFFFFFF;
+  fprintf(ref_file, "smlaltb(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2);
+  smlaltb(&lo, &hi, svar1, svar2);
+  fprintf(ref_file, "%#x%#x\n", hi, lo);
+  hi = 0x12345678;
+  lo = 0xFFFFFFFF;
+  fprintf(ref_file, "smlaltt(&%#x, &%#x, %#x, %#x) = ", lo, hi, svar1, svar2);
+  smlaltt(&lo, &hi, svar1, svar2);
+  fprintf(ref_file, "%#x%#x\n", hi, lo);
+
+
+  /* smulwb, smulwt */
+  /* int32_t smulwb(int32_t val1, int32_t val2); */
+  svar1 = 0x12345678;
+  svar2 = 0x12345678;
+  sres = smulwb(svar1, svar2);
+  fprintf(ref_file, "smulwb(%#x, %#x) = %#x\n", svar1, svar2, sres);
+  sres = smulwt(svar1, svar2);
+  fprintf(ref_file, "smulwt(%#x, %#x) = %#x\n", svar1, svar2, sres);
+
+  svar1 = 0xF123F456;
+  svar2 = 0xF123F456;
+  sres = smulwb(svar1, svar2);
+  fprintf(ref_file, "smulwb(%#x, %#x) = %#x\n", svar1, svar2, sres);
+  sres = smulwt(svar1, svar2);
+  fprintf(ref_file, "smulwt(%#x, %#x) = %#x\n", svar1, svar2, sres);
+
+
+  /* smlawb, smlawt */
+  /* int32_t smlawb(int32_t val1, int32_t val2, int32_t acc); */
+  sacc = 0x01020304;
+  svar1 = 0x12345678;
+  svar2 = 0x12345678;
+  sres = smlawb(svar1, svar2, sacc);
+  fprintf(ref_file, "smlawb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres);
+  sres = smlawt(svar1, svar2, sacc);
+  fprintf(ref_file, "smlawt(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres);
+
+  svar1 = 0xF123F456;
+  svar2 = 0xF123F456;
+  sres = smlawb(svar1, svar2, sacc);
+  fprintf(ref_file, "smlawb(%#x, %#x, %#x) = %#x\n", svar1, svar2, sacc, sres);
+  sres = smlawt(svar1, svar2, sacc);
+  fprintf(ref_file, "smlawt(%#x, %#x, %#X) = %#x\n", svar1, svar2, sacc, sres);
+
+}
diff --git a/ref_dspfns.c b/ref_dspfns.c
new file mode 100644
index 0000000..5680779
--- /dev/null
+++ b/ref_dspfns.c
@@ -0,0 +1,1493 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#include <stdio.h>
+#include <stdint.h>
+
+#ifdef __arm__
+#include <dspfns.h>
+#else
+#include <dspfns.h>
+#endif
+
+extern FILE* ref_file;
+
+#define __xSTR(X) #X
+#define __STR(X) __xSTR(X)
+
+#define FN(X) X, __STR(X)
+
+typedef int32_t func32_32_32_ptr(int32_t, int32_t);
+typedef int16_t func16_32_ptr(int32_t);
+typedef int32_t func32_32_16_16_ptr(int32_t, int16_t, int16_t);
+
+void test_16_fn_32(func16_32_ptr func, char* func_name,
+		   int init_Overflow, int init_Carry)
+{
+  int32_t svar32_a;
+  int16_t svar16_a;
+
+  fprintf(ref_file, "Checking %s with input Overflow=%d and input Carry=%d\n",
+	  func_name, init_Overflow, init_Carry);
+
+  svar32_a = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = func(svar32_a);
+  fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = func(svar32_a);
+  fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = 32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = func(svar32_a);
+  fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = func(svar32_a);
+  fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -32769;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = func(svar32_a);
+  fprintf(ref_file, "%s(%#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar16_a, Overflow, Carry);
+}
+
+void test_32_fn_32_32(func32_32_32_ptr func, char* func_name,
+		      int init_Overflow, int init_Carry)
+{
+  int32_t svar32_a, svar32_b, svar32_c;
+
+  fprintf(ref_file, "Checking %s with input Overflow=%d and input Carry=%d\n",
+	  func_name, init_Overflow, init_Carry);
+
+  svar32_a = 1;
+  svar32_b = 2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = func(svar32_a, svar32_b);
+  fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = -1;
+  svar32_b = -2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = func(svar32_a, svar32_b);
+  fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = -1;
+  svar32_b = 2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = func(svar32_a, svar32_b);
+  fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x7000;
+  svar32_b = 0x7000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = func(svar32_a, svar32_b);
+  fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x8FFF;
+  svar32_b = 0x8FFF;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = func(svar32_a, svar32_b);
+  fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x70000000;
+  svar32_b = 0x70000000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = func(svar32_a, svar32_b);
+  fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x8FFFFFFF;
+  svar32_b = 0x8FFFFFFF;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = func(svar32_a, svar32_b);
+  fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x8FFFFFFF;
+  svar32_b = 0xFFFFFFFF;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = func(svar32_a, svar32_b);
+  fprintf(ref_file, "%s(%#x, %#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar32_b, svar32_c, Overflow, Carry);
+}
+
+void test_32_fn_32_16_16(func32_32_16_16_ptr func, char* func_name,
+		      int init_Overflow, int init_Carry)
+{
+  int32_t svar32_a, svar32_b;
+  int16_t svar16_a, svar16_b;
+
+  fprintf(ref_file, "Checking %s with input Overflow=%d and input Carry=%d\n",
+	  func_name, init_Overflow, init_Carry);
+
+  svar16_a = 2;
+  svar16_b = 2;
+  svar32_a = 0x1234;
+  Overflow = 0;
+  Carry = init_Carry;
+  svar32_b = func(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "%s(%#x, %#x, %#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry);
+
+  svar16_a = -1;
+  svar16_b = -1;
+  Overflow = 0;
+  Carry = init_Carry;
+  svar32_b = func(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "%s(%#x, %#x, %#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  Overflow = 0;
+  Carry = init_Carry;
+  svar32_b = func(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "%s(%#x, %#x, %#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  Overflow = 0;
+  Carry = init_Carry;
+  svar32_a = -1;
+  svar32_b = func(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "%s(%#x, %#x, %#x) = %#x overflow %d carry %d\n",
+	  func_name, svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry);
+}
+
+void exec_dspfns1 (int init_Overflow, int init_Carry)
+{
+  int32_t svar32_a, svar32_b, svar32_c;
+  int16_t svar16_a, svar16_b, svar16_c;
+
+
+  fprintf(ref_file, "\n\nDSP FNS (non-NEON/ITU) intrinsics with input Overflow=%d and input Carry=%d\n", init_Overflow, init_Carry);
+
+  /* saturate */
+  /* int16_t saturate(int32_t x) */
+  test_16_fn_32(FN(saturate), init_Overflow, init_Carry);
+
+
+  /* add */
+  /* int16_t add(int16_t x, int16_t y) */
+  svar16_a = 1;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = add(svar16_a, svar16_b);
+  fprintf(ref_file, "add(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = -1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = add(svar16_a, svar16_b);
+  fprintf(ref_file, "add(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 20000;
+  svar16_b = 20000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = add(svar16_a, svar16_b);
+  fprintf(ref_file, "add(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = -20000;
+  svar16_b = -20000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = add(svar16_a, svar16_b);
+  fprintf(ref_file, "add(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+
+  /* sub */
+  /* int16_t sub(int16_t x, int16_t y) */
+  svar16_a = 1;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = sub(svar16_a, svar16_b);
+  fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = -1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = sub(svar16_a, svar16_b);
+  fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 20000;
+  svar16_b = 20000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = sub(svar16_a, svar16_b);
+  fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = -20000;
+  svar16_b = -20000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = sub(svar16_a, svar16_b);
+  fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0;
+  svar16_b = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = sub(svar16_a, svar16_b);
+  fprintf(ref_file, "sub(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+
+  /* abs_s */
+  /* int16_t abs_s(int16_t x) */
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = abs_s(svar16_b);
+  fprintf(ref_file, "abs_s(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry);
+
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = abs_s(svar16_b);
+  fprintf(ref_file, "abs_s(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry);
+
+  svar16_b = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = abs_s(svar16_b);
+  fprintf(ref_file, "abs_s(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry);
+
+
+  /* shl */
+  /* int16_t shl(int16_t x, int16_t y) */
+  svar16_a = 1;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shl(svar16_a, svar16_b);
+  fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 10;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shl(svar16_a, svar16_b);
+  fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0xFFF;
+  svar16_b = 10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shl(svar16_a, svar16_b);
+  fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0xFFF;
+  svar16_b = 20;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shl(svar16_a, svar16_b);
+  fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shl(svar16_a, svar16_b);
+  fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 20;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shl(svar16_a, svar16_b);
+  fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0xFFF;
+  svar16_b = -10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shl(svar16_a, svar16_b);
+  fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0xFFF;
+  svar16_b = -64;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shl(svar16_a, svar16_b);
+  fprintf(ref_file, "shl(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+
+  /* shr */
+  /* int16_t shr(int16_t x, int16_t y) */
+  svar16_a = 1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr(svar16_a, svar16_b);
+  fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 10;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr(svar16_a, svar16_b);
+  fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0xFFF;
+  svar16_b = -10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr(svar16_a, svar16_b);
+  fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0xFFF;
+  svar16_b = -20;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr(svar16_a, svar16_b);
+  fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 1;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr(svar16_a, svar16_b);
+  fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 20;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr(svar16_a, svar16_b);
+  fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0xFFF;
+  svar16_b = 10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr(svar16_a, svar16_b);
+  fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0xFFF;
+  svar16_b = 64;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr(svar16_a, svar16_b);
+  fprintf(ref_file, "shr(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+
+  /* mult */
+  /* int16_t mult(int16_t x, int16_t y) */
+  svar16_a = 2;
+  svar16_b = 2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = mult(svar16_a, svar16_b);
+  fprintf(ref_file, "mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = -1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = mult(svar16_a, svar16_b);
+  fprintf(ref_file, "mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = mult(svar16_a, svar16_b);
+  fprintf(ref_file, "mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+
+  /* L_mult */
+  /* int32_t L_mult(int16_t x, int16_t y) */
+  svar16_a = 2;
+  svar16_b = 2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_mult(svar16_a, svar16_b);
+  fprintf(ref_file, "L_mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar32_a, Overflow, Carry);
+
+  svar16_a = -1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_mult(svar16_a, svar16_b);
+  fprintf(ref_file, "L_mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar32_a, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_mult(svar16_a, svar16_b);
+  fprintf(ref_file, "L_mult(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar32_a, Overflow, Carry);
+
+
+  /* negate */
+  /* int16_t negate(int16_t x) */
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = negate(svar16_b);
+  fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry);
+
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = negate(svar16_b);
+  fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry);
+
+  svar16_b = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = negate(svar16_b);
+  fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar16_b, svar16_a, Overflow, Carry);
+
+
+  /* extract_h */
+  /* int16_t extract_h(int32_t x) */
+  svar32_a = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = extract_h(svar32_a);
+  fprintf(ref_file, "extract_h(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = extract_h(svar32_a);
+  fprintf(ref_file, "extract_h(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = extract_h(svar32_a);
+  fprintf(ref_file, "extract_h(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = extract_h(svar32_a);
+  fprintf(ref_file, "extract_h(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+
+  /* extract_l */
+  /* int16_t extract_l(int32_t x) */
+  svar32_a = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = extract_l(svar32_a);
+  fprintf(ref_file, "extract_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = extract_l(svar32_a);
+  fprintf(ref_file, "extract_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = extract_l(svar32_a);
+  fprintf(ref_file, "extract_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = 0x43218765;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = extract_l(svar32_a);
+  fprintf(ref_file, "extract_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+
+  /* round */
+  /* int16_t round(int32_t x) */
+  svar32_a = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = round(svar32_a);
+  fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = round(svar32_a);
+  fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = round(svar32_a);
+  fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = 0x43218765;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = round(svar32_a);
+  fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = 0x87654321;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = round(svar32_a);
+  fprintf(ref_file, "round(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+
+  /* L_mac */
+  /* int32_t L_mac(int32_t acc, int16_t x, int16_t y) */
+  svar16_a = 2;
+  svar16_b = 2;
+  svar32_a = 0x1234;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_b = L_mac(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "L_mac(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry);
+
+  svar16_a = -1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_b = L_mac(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "L_mac(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_b = L_mac(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "L_mac(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = -1;
+  svar32_b = L_mac(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "L_mac(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry);
+
+
+  /* L_msu */
+  /* int32_t L_msu(int32_t acc, int16_t x, int16_t y) */
+  svar16_a = 2;
+  svar16_b = 2;
+  svar32_a = 0x1234;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_b = L_msu(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "L_msu(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry);
+
+  svar16_a = -1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_b = L_msu(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "L_msu(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_b = L_msu(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "L_msu(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  svar32_a = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_b = L_msu(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "L_msu(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar32_b, Overflow, Carry);
+
+
+  /* L_add */
+  /* int32_t L_add(int32_t val1, int32_t val2); */
+  svar32_a = 1;
+  svar32_b = 2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_add(svar32_a, svar32_b);
+  fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = -1;
+  svar32_b = -2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_add(svar32_a, svar32_b);
+  fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = -1;
+  svar32_b = 2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_add(svar32_a, svar32_b);
+  fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x7000;
+  svar32_b = 0x7000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_add(svar32_a, svar32_b);
+  fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x8FFF;
+  svar32_b = 0x8FFF;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_add(svar32_a, svar32_b);
+  fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x70000000;
+  svar32_b = 0x70000000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_add(svar32_a, svar32_b);
+  fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x8FFFFFFF;
+  svar32_b = 0x8FFFFFFF;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_add(svar32_a, svar32_b);
+  fprintf(ref_file, "L_add(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  /* L_sub */
+  /* int32_t L_sub(int32_t val1, int32_t val2); */
+  svar32_a = 1;
+  svar32_b = 2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_sub(svar32_a, svar32_b);
+  fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = -1;
+  svar32_b = -2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_sub(svar32_a, svar32_b);
+  fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = -1;
+  svar32_b = 2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_sub(svar32_a, svar32_b);
+  fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x7000;
+  svar32_b = 0xFFFF9000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_sub(svar32_a, svar32_b);
+  fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x8FFF;
+  svar32_b = 0xFFFF7001;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_sub(svar32_a, svar32_b);
+  fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x70000000;
+  svar32_b = 0x90000000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_sub(svar32_a, svar32_b);
+  fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x8FFFFFFF;
+  svar32_b = 0x70000001;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_sub(svar32_a, svar32_b);
+  fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0;
+  svar32_b = 0x80000000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_sub(svar32_a, svar32_b);
+  fprintf(ref_file, "L_sub(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+
+  /* L_add_c */
+  /* int32_t L_add_c(int32_t val1, int32_t val2); */
+  test_32_fn_32_32(FN(L_add_c), init_Overflow, init_Carry);
+
+
+
+  /* L_sub_c */
+  /* int32_t L_sub_c(int32_t val1, int32_t val2); */
+#undef MYFN
+#define MYFN L_sub_c
+  svar32_a = 1;
+  svar32_b = 2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = MYFN(svar32_a, svar32_b);
+  fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = -1;
+  svar32_b = -2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = MYFN(svar32_a, svar32_b);
+  fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = -1;
+  svar32_b = 2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = MYFN(svar32_a, svar32_b);
+  fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x7000;
+  svar32_b = 0x7000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = MYFN(svar32_a, svar32_b);
+  fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x8FFF;
+  svar32_b = 0x8FFF;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = MYFN(svar32_a, svar32_b);
+  fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x70000000;
+  svar32_b = 0x70000000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = MYFN(svar32_a, svar32_b);
+  fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x8FFFFFFF;
+  svar32_b = 0x8FFFFFFF;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = MYFN(svar32_a, svar32_b);
+  fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x1;
+  svar32_b = 0x80000000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = MYFN(svar32_a, svar32_b);
+  fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFFFFFFF;
+  svar32_b = 0x7FFFFFFF;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = MYFN(svar32_a, svar32_b);
+  fprintf(ref_file, __STR(MYFN) "(%#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar32_b, svar32_c, Overflow, Carry);
+
+
+  /* L_macNs */
+  /* int32_t L_macNs(int32_t acc, int16_t x, int16_t y) */
+  test_32_fn_32_16_16(FN(L_macNs), init_Overflow, init_Carry);
+
+  /* L_msuNs */
+  /* int32_t L_msuNs(int32_t acc, int16_t x, int16_t y) */
+  test_32_fn_32_16_16(FN(L_msuNs), init_Overflow, init_Carry);
+
+
+  /* negate */
+  /* int32_t negate(int32_t x) */
+  svar32_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = negate(svar32_b);
+  fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry);
+
+  svar32_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = negate(svar32_b);
+  fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry);
+
+  svar32_b = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = negate(svar32_b);
+  fprintf(ref_file, "negate(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry);
+
+
+  /* mult_r */
+  /* int16_t mult_r(int16_t x, int16_t y) */
+  svar16_a = 2;
+  svar16_b = 2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = mult_r(svar16_a, svar16_b);
+  fprintf(ref_file, "mult_r(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = -1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = mult_r(svar16_a, svar16_b);
+  fprintf(ref_file, "mult_r(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = mult_r(svar16_a, svar16_b);
+  fprintf(ref_file, "mult_r(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+
+  /* norm_s */
+  /* int16_t norm_s(int32_t x) */
+  svar32_a = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = norm_s(svar32_a);
+  fprintf(ref_file, "norm_s(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = norm_s(svar32_a);
+  fprintf(ref_file, "norm_s(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = norm_s(svar32_a);
+  fprintf(ref_file, "norm_s(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = 12000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = norm_s(svar32_a);
+  fprintf(ref_file, "norm_s(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+
+  /* norm_l */
+  /* int16_t norm_l(int16_t x) */
+  svar32_a = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = norm_l(svar32_a);
+  fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = norm_l(svar32_a);
+  fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = norm_l(svar32_a);
+  fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = 12000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = norm_l(svar32_a);
+  fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = 0x123456;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = norm_l(svar32_a);
+  fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+  svar32_a = 0xABCDEF;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_a = norm_l(svar32_a);
+  fprintf(ref_file, "norm_l(%#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, Overflow, Carry);
+
+
+  /* L_shl */
+  /* int32_t L_shl(int32_t x, int16_t y) */
+  svar32_a = 1;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shl(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 10;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shl(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFF;
+  svar16_b = 10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shl(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFF;
+  svar16_b = 20;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shl(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  svar16_b = 2;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shl(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  svar16_b = 40;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shl(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shl(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 20;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shl(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFF;
+  svar16_b = -10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shl(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFF;
+  svar16_b = -64;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shl(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  svar16_b = -10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shl(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  svar16_b = -64;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shl(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shl(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+
+  /* L_shr */
+  /* int32_t L_shr(int32_t x, int16_t y) */
+  svar32_a = 1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 10;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFF;
+  svar16_b = -10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFF;
+  svar16_b = -20;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  svar16_b = -10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  svar16_b = -40;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 1;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 20;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFF;
+  svar16_b = 10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFF;
+  svar16_b = 64;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  svar16_b = 10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  svar16_b = 64;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+
+  /* shr_r */
+  /* int16_t shr_r(int16_t x, int16_t y) */
+  svar16_a = 1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr_r(svar16_a, svar16_b);
+  fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 10;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr_r(svar16_a, svar16_b);
+  fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0xFFF;
+  svar16_b = -10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr_r(svar16_a, svar16_b);
+  fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0xFFF;
+  svar16_b = -20;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr_r(svar16_a, svar16_b);
+  fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 1;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr_r(svar16_a, svar16_b);
+  fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 20;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr_r(svar16_a, svar16_b);
+  fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0xFFF;
+  svar16_b = 10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr_r(svar16_a, svar16_b);
+  fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0xFFF;
+  svar16_b = 64;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = shr_r(svar16_a, svar16_b);
+  fprintf(ref_file, "shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+
+  /* mac_r */
+  /* int16_t mac_r(int32_t acc, int16_t x, int16_t y) */
+  svar16_a = 2;
+  svar16_b = 2;
+  svar32_a = 0x1234;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = mac_r(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = -1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = mac_r(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = mac_r(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = -1;
+  svar16_c = mac_r(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0x244;
+  svar16_b = 0x522;
+  svar32_a = 0x123456;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = mac_r(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "mac_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+
+  /* msu_r */
+  /* int32_t msu_r(int32_t acc, int16_t x, int16_t y) */
+  svar16_a = 2;
+  svar16_b = 2;
+  svar32_a = 0x1234;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = msu_r(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = -1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = msu_r(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = msu_r(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0x8000;
+  svar16_b = 0x8000;
+  svar32_a = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = msu_r(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 0x321;
+  svar16_b = 0x243;
+  svar32_a = 0x123456;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = msu_r(svar32_a, svar16_a, svar16_b);
+  fprintf(ref_file, "msu_r(%#x, %#x, %#x) = %#x overflow %d carry %d\n", svar32_a, svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+
+  /* L_deposit_h */
+  /* int32_t L_deposit_h(int16_t x) */
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_deposit_h(svar16_b);
+  fprintf(ref_file, "L_deposit_h(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry);
+
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_deposit_h(svar16_b);
+  fprintf(ref_file, "L_deposit_h(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry);
+
+  svar16_b = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_deposit_h(svar16_b);
+  fprintf(ref_file, "L_deposit_h(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry);
+
+  svar16_b = 0x1234;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_deposit_h(svar16_b);
+  fprintf(ref_file, "L_deposit_h(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry);
+
+
+  /* L_deposit_l */
+  /* int32_t L_deposit_l(int16_t x) */
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_deposit_l(svar16_b);
+  fprintf(ref_file, "L_deposit_l(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry);
+
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_deposit_l(svar16_b);
+  fprintf(ref_file, "L_deposit_l(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry);
+
+  svar16_b = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_deposit_l(svar16_b);
+  fprintf(ref_file, "L_deposit_l(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry);
+
+  svar16_b = 0x1234;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_deposit_l(svar16_b);
+  fprintf(ref_file, "L_deposit_l(%#x) = %#x overflow %d carry %d\n", svar16_b, svar32_a, Overflow, Carry);
+
+
+  /* L_shr_r */
+  /* int32_t L_shr_r(int32_t x, int16_t y) */
+  svar32_a = 1;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr_r(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 10;
+  svar16_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr_r(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFF;
+  svar16_b = -10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr_r(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFF;
+  svar16_b = -20;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr_r(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  svar16_b = -10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr_r(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  svar16_b = -40;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr_r(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 1;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr_r(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 20;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr_r(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFF;
+  svar16_b = 10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr_r(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0xFFF;
+  svar16_b = 64;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr_r(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  svar16_b = 10;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr_r(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+  svar32_a = 0x12345678;
+  svar16_b = 64;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_c = L_shr_r(svar32_a, svar16_b);
+  fprintf(ref_file, "L_shr_r(%#x, %d) = %#x overflow %d carry %d\n", svar32_a, svar16_b, svar32_c, Overflow, Carry);
+
+
+  /* L_abs */
+  /* int32_t L_abs(int32_t x) */
+  svar32_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_abs(svar32_b);
+  fprintf(ref_file, "L_abs(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry);
+
+  svar32_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_abs(svar32_b);
+  fprintf(ref_file, "L_abs(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry);
+
+  svar32_b = 0x80000000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_abs(svar32_b);
+  fprintf(ref_file, "L_abs(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry);
+
+
+  /* L_sat */
+  /* int32_t L_sat(int32_t x) */
+  svar32_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_sat(svar32_b);
+  fprintf(ref_file, "L_sat(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry);
+
+  svar32_b = -1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_sat(svar32_b);
+  fprintf(ref_file, "L_sat(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry);
+
+  svar32_b = -32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_sat(svar32_b);
+  fprintf(ref_file, "L_sat(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry);
+
+  svar32_b = 32768;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar32_a = L_sat(svar32_b);
+  fprintf(ref_file, "L_sat(%#x) = %#x overflow %d carry %d\n", svar32_b, svar32_a, Overflow, Carry);
+
+
+  /* div_s */
+  /* int16_t div_s(int16_t x, int16_t y) */
+  svar16_a = 1;
+  svar16_b = 1;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = div_s(svar16_a, svar16_b);
+  fprintf(ref_file, "div_s(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+  svar16_a = 10000;
+  svar16_b = 20000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = div_s(svar16_a, svar16_b);
+  fprintf(ref_file, "div_s(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+
+  svar16_a = 10000;
+  svar16_b = 20000;
+  Overflow = init_Overflow;
+  Carry = init_Carry;
+  svar16_c = div_s(svar16_a, svar16_b);
+  fprintf(ref_file, "div_s(%#x, %#x) = %#x overflow %d carry %d\n", svar16_a, svar16_b, svar16_c, Overflow, Carry);
+
+}
+
+void exec_dspfns(void)
+{
+  Overflow = 0;
+
+  exec_dspfns1(0, 0);
+  exec_dspfns1(0, 1);
+  exec_dspfns1(1, 0);
+  exec_dspfns1(1, 1);
+}
diff --git a/ref_integer.c b/ref_integer.c
new file mode 100644
index 0000000..0b6151a
--- /dev/null
+++ b/ref_integer.c
@@ -0,0 +1,279 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#include <stdio.h>
+#include <stdint.h>
+
+#ifndef __arm__
+#error Target not supported
+#endif
+#include <dspfns.h> /* For Overflow */
+
+extern FILE* ref_file;
+
+void exec_integer (void)
+{
+  int i;
+  uint32_t uvar, ures;
+  int32_t svar1, svar2, sres;
+
+  uint8_t clz;
+
+  fprintf(ref_file, "\n\nInteger (non-NEON) intrinsics\n");
+
+  /* __clz */
+  /* uint8_t __clz(uint32_t val); */
+  uvar = 0xFFFFFFFF;
+  for(i=0; i<=32; i++) {
+    clz = __clz(uvar);
+    fprintf(ref_file, "__clz(%#x) = %d\n", (unsigned int)uvar, clz);
+    uvar >>= 1;
+  }
+
+  /* __qadd */
+  /* int32_t __qadd(int32_t val1, int32_t val2); */
+  svar1 = 1;
+  svar2 = 2;
+  Overflow = 0;
+  sres = __qadd(svar1, svar2);
+  fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = -1;
+  svar2 = -2;
+  Overflow = 0;
+  sres = __qadd(svar1, svar2);
+  fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = -1;
+  svar2 = 2;
+  Overflow = 0;
+  sres = __qadd(svar1, svar2);
+  fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = 0x7000;
+  svar2 = 0x7000;
+  Overflow = 0;
+  sres = __qadd(svar1, svar2);
+  fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = 0x8FFF;
+  svar2 = 0x8FFF;
+  Overflow = 0;
+  sres = __qadd(svar1, svar2);
+  fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = 0x70000000;
+  svar2 = 0x70000000;
+  Overflow = 0;
+  sres = __qadd(svar1, svar2);
+  fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = 0x8FFFFFFF;
+  svar2 = 0x8FFFFFFF;
+  Overflow = 0;
+  sres = __qadd(svar1, svar2);
+  fprintf(ref_file, "__qadd(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  /* __qdbl */
+  /* int32_t __qdbl(int32_t val); */
+  svar1 = 1;
+  Overflow = 0;
+  sres = __qdbl(svar1);
+  fprintf(ref_file, "__qdbl(%#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0x70000000;
+  Overflow = 0;
+  sres = __qdbl(svar1);
+  fprintf(ref_file, "__qdbl(%#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0x8FFFFFFF;
+  Overflow = 0;
+  sres = __qdbl(svar1);
+  fprintf(ref_file, "__qdbl(%#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0xEFFFFFFF;
+  Overflow = 0;
+  sres = __qdbl(svar1);
+  fprintf(ref_file, "__qdbl(%#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  /* __qsub */
+  /* int32_t __qsub(int32_t val1, int32_t val2); */
+  svar1 = 1;
+  svar2 = 2;
+  Overflow = 0;
+  sres = __qsub(svar1, svar2);
+  fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = -1;
+  svar2 = -2;
+  Overflow = 0;
+  sres = __qsub(svar1, svar2);
+  fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = -1;
+  svar2 = 2;
+  Overflow = 0;
+  sres = __qsub(svar1, svar2);
+  fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = 0x7000;
+  svar2 = 0xFFFF9000;
+  Overflow = 0;
+  sres = __qsub(svar1, svar2);
+  fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = 0x8FFF;
+  svar2 = 0xFFFF7001;
+  Overflow = 0;
+  sres = __qsub(svar1, svar2);
+  fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = 0x70000000;
+  svar2 = 0x90000000;
+  Overflow = 0;
+  sres = __qsub(svar1, svar2);
+  fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = 0x8FFFFFFF;
+  svar2 = 0x70000001;
+  Overflow = 0;
+  sres = __qsub(svar1, svar2);
+  fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  svar1 = 0;
+  svar2 = 0x80000000;
+  Overflow = 0;
+  sres = __qsub(svar1, svar2);
+  fprintf(ref_file, "__qsub(%#x, %#x) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)svar2, (unsigned int)sres, Overflow);
+
+  /* __rbit */
+  /* uint32_t __rbit(uint32_t val); */
+  uvar = 0x12345678;
+  ures = __rbit(uvar);
+  fprintf(ref_file, "__rbit(%#x) = %#x\n",
+          (unsigned int)uvar, (unsigned int)ures);
+
+  /* __rev */
+  /* uint32_t __rev(uint32_t val); */
+  uvar = 0x12345678;
+  ures = __rev(uvar);
+  fprintf(ref_file, "__rev(%#x) = %#x\n",
+          (unsigned int)uvar, (unsigned int)ures);
+
+  /* __ssat */
+  /* int32_t __ssat(int32_t val, uint32_t sat); */
+  svar1 = 0x12345678;
+  Overflow = 0;
+  sres = __ssat(svar1, 30);
+  fprintf(ref_file, "__ssat(%#x, 30) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0x12345678;
+  Overflow = 0;
+  sres = __ssat(svar1, 19);
+  fprintf(ref_file, "__ssat(%#x, 19) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0x87654321;
+  Overflow = 0;
+  sres = __ssat(svar1, 29);
+  fprintf(ref_file, "__ssat(%#x, 29) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0x87654321;
+  Overflow = 0;
+  sres = __ssat(svar1, 12);
+  fprintf(ref_file, "__ssat(%#x, 12) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0x87654321;
+  Overflow = 0;
+  sres = __ssat(svar1, 32);
+  fprintf(ref_file, "__ssat(%#x, 32) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0x87654321;
+  Overflow = 0;
+  sres = __ssat(svar1, 1);
+  fprintf(ref_file, "__ssat(%#x, 1) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  /* __usat */
+  /* int32_t __usat(uint32_t val, uint32_t sat); */
+  svar1 = 0x12345678;
+  Overflow = 0;
+  sres = __usat(svar1, 30);
+  fprintf(ref_file, "__usat(%#x, 30) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0x12345678;
+  Overflow = 0;
+  sres = __usat(svar1, 19);
+  fprintf(ref_file, "__usat(%#x, 19) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0x87654321;
+  Overflow = 0;
+  sres = __usat(svar1, 29);
+  fprintf(ref_file, "__usat(%#x, 29) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0x87654321;
+  Overflow = 0;
+  sres = __usat(svar1, 12);
+  fprintf(ref_file, "__usat(%#x, 12) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0x87654321;
+  Overflow = 0;
+  sres = __usat(svar1, 31);
+  fprintf(ref_file, "__usat(%#x, 31) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+
+  svar1 = 0x87654321;
+  Overflow = 0;
+  sres = __usat(svar1, 0);
+  fprintf(ref_file, "__usat(%#x, 0) = %#x sat %d\n",
+          (unsigned int)svar1, (unsigned int)sres, Overflow);
+}
diff --git a/ref_v_binary_op.c b/ref_v_binary_op.c
new file mode 100644
index 0000000..c09e710
--- /dev/null
+++ b/ref_v_binary_op.c
@@ -0,0 +1,88 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+/* Template file for binary operator validation */
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x,x), then store the result.  */
+#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N)				\
+  TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)				\
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement  */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Fill input vector2 with arbitrary values */
+  TEST_VDUP(vector2, , int, s, 8, 8, 2);
+  TEST_VDUP(vector2, , int, s, 16, 4, -4);
+  TEST_VDUP(vector2, , int, s, 32, 2, 3);
+  TEST_VDUP(vector2, , int, s, 64, 1, 100);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 20);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 30);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 40);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 2);
+  TEST_VDUP(vector2, q, int, s, 8, 16, -10);
+  TEST_VDUP(vector2, q, int, s, 16, 8, -20);
+  TEST_VDUP(vector2, q, int, s, 32, 4, -30);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 24);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 12);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 3);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 55);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 3);
+
+  /* Apply a binary operator named INSN_NAME  */
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_BINARY_OP, INSN_NAME);
+
+  dump_results_hex (TEST_MSG);
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
diff --git a/ref_v_binary_sat_op.c b/ref_v_binary_sat_op.c
new file mode 100644
index 0000000..5608104
--- /dev/null
+++ b/ref_v_binary_sat_op.c
@@ -0,0 +1,108 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+/* Template file for binary saturating operator validation */
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* vector_res = OP(vector1,vector2), then store the result.  */
+#define TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N)	\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W, N) =			\
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),	\
+		      VECT_VAR(vector2, T1, W, N));	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
+		    VECT_VAR(vector_res, T1, W, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W))
+
+#define TEST_BINARY_SAT_OP(INSN, Q, T1, T2, W, N)	\
+  TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector1, buffer);
+
+  /* Choose arbitrary initialization values */
+  TEST_VDUP(vector2, , int, s, 8, 8, 0x11);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x22);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x33);
+  TEST_VDUP(vector2, , int, s, 64, 1, 0x44);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0x55);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x66);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x77);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 0x88);
+
+  TEST_VDUP(vector2, q, int, s, 8, 16, 0x11);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x22);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x33);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 0x44);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0x55);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0x66);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0x77);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88);
+
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1);
+
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2);
+
+  dump_results_hex (TEST_MSG);
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
diff --git a/ref_v_comp_f_op.c b/ref_v_comp_f_op.c
new file mode 100644
index 0000000..9ec7e32
--- /dev/null
+++ b/ref_v_comp_f_op.c
@@ -0,0 +1,87 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  int i;
+
+  /* Basic test: y=vcomp(x,x), then store the result.  */
+#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)				\
+  VECT_VAR(vector_res, T3, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vector_res, T3, W, N))
+
+#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N)				\
+  TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily, will be used for vector
+     comparison */
+  TEST_VDUP(vector2, , float, f, 32, 2, -16.0);
+  TEST_VDUP(vector2, q, float, f, 32, 4, -14.0);
+
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+
+  TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+
+  TEST_VDUP(vector2, , float, f, 32, 2, -10.0);
+  TEST_VDUP(vector2, q, float, f, 32, 4, 10.0);
+
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+
+  TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+}
diff --git a/ref_v_comp_op.c b/ref_v_comp_op.c
new file mode 100644
index 0000000..6def17d
--- /dev/null
+++ b/ref_v_comp_op.c
@@ -0,0 +1,178 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  int i;
+
+  /* Basic test: y=vcomp(x,x), then store the result.  */
+#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)				\
+  VECT_VAR(vector_res, T3, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vector_res, T3, W, N))
+
+#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N)				\
+  TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  /* No need for 64 bits elements */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, uint, 8, 16);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector2, int, 8, 8);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 8, 8);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, int, 8, 16);
+  DECL_VARIABLE(vector2, int, 16, 8);
+  DECL_VARIABLE(vector2, int, 32, 4);
+  DECL_VARIABLE(vector2, uint, 8, 16);
+  DECL_VARIABLE(vector2, uint, 16, 8);
+  DECL_VARIABLE(vector2, uint, 32, 4);
+  DECL_VARIABLE(vector2, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 16);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  clean_results ();
+
+  /* There is no 64 bits variant, we can't use the generic initializer  */
+  TEST_VLOAD(vector, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, , uint, u, 8, 8);
+  TEST_VLOAD(vector, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+
+  TEST_VLOAD(vector, buffer, q, int, s, 8, 16);
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 8, 16);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily, will be used for vector
+     comparison */
+  TEST_VDUP(vector2, , int, s, 8, 8, -10);
+  TEST_VDUP(vector2, , int, s, 16, 4, -14);
+  TEST_VDUP(vector2, , int, s, 32, 2, -16);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0xF3);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0xFFF2);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF1);
+  TEST_VDUP(vector2, , float, f, 32, 2, -15.0);
+
+  TEST_VDUP(vector2, q, int, s, 8, 16, -4);
+  TEST_VDUP(vector2, q, int, s, 16, 8, -10);
+  TEST_VDUP(vector2, q, int, s, 32, 4, -14);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0xF4);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0xFFF6);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFF2);
+  TEST_VDUP(vector2, q, float, f, 32, 4, -14.0);
+
+  /* The same result buffers are used multiple times, so output them
+     before overwriting them  */
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG);
+  TEST_VCOMP(INSN_NAME, , int, s, uint, 8, 8);
+  TEST_VCOMP(INSN_NAME, , int, s, uint, 16, 4);
+  TEST_VCOMP(INSN_NAME, , int, s, uint, 32, 2);
+  DUMP(TEST_MSG, uint, 8, 8, PRIx8);
+  DUMP(TEST_MSG, uint, 16, 4, PRIx16);
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+
+  TEST_VCOMP(INSN_NAME, , uint, u, uint, 8, 8);
+  TEST_VCOMP(INSN_NAME, , uint, u, uint, 16, 4);
+  TEST_VCOMP(INSN_NAME, , uint, u, uint, 32, 2);
+  DUMP(TEST_MSG, uint, 8, 8, PRIx8);
+  DUMP(TEST_MSG, uint, 16, 4, PRIx16);
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+
+  TEST_VCOMP(INSN_NAME, q, int, s, uint, 8, 16);
+  TEST_VCOMP(INSN_NAME, q, int, s, uint, 16, 8);
+  TEST_VCOMP(INSN_NAME, q, int, s, uint, 32, 4);
+  DUMP(TEST_MSG, uint, 8, 16, PRIx8);
+  DUMP(TEST_MSG, uint, 16, 8, PRIx16);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+
+  TEST_VCOMP(INSN_NAME, q, uint, u, uint, 8, 16);
+  TEST_VCOMP(INSN_NAME, q, uint, u, uint, 16, 8);
+  TEST_VCOMP(INSN_NAME, q, uint, u, uint, 32, 4);
+  DUMP(TEST_MSG, uint, 8, 16, PRIx8);
+  DUMP(TEST_MSG, uint, 16, 8, PRIx16);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+
+  TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+
+  /* Extra tests to have 100% coverage on all the variants */
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0);
+  TEST_VCOMP(INSN_NAME, , uint, u, uint, 32, 2);
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+
+  TEST_VDUP(vector2, , int, s, 32, 2, -15);
+  TEST_VCOMP(INSN_NAME, , int, s, uint, 32, 2);
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+
+  TEST_VDUP(vector2, , float, f, 32, 2, -16.0);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+}
diff --git a/ref_v_unary_op.c b/ref_v_unary_op.c
new file mode 100644
index 0000000..63d68e7
--- /dev/null
+++ b/ref_v_unary_op.c
@@ -0,0 +1,91 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+/* Template file for unary operator validation */
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N)				\
+  TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)					\
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_VLOAD(vector, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, q, int, s, 8, 16);
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Apply a unary operator named INSN_NAME  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+
+  dump_results_hex (TEST_MSG);
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
diff --git a/ref_v_unary_sat_op.c b/ref_v_unary_sat_op.c
new file mode 100644
index 0000000..e854068
--- /dev/null
+++ b/ref_v_unary_sat_op.c
@@ -0,0 +1,95 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+/* Template file for unary saturating operator validation */
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N)	\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W, N) =			\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
+		    VECT_VAR(vector_res, T1, W, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W))
+
+#define TEST_UNARY_SAT_OP(INSN, Q, T1, T2, W, N)	\
+  TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N)
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_VLOAD(vector, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, q, int, s, 8, 16);
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Apply a unary operator named INSN_NAME  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4);
+
+  dump_results_hex (TEST_MSG);
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
diff --git a/ref_vaba.c b/ref_vaba.c
new file mode 100644
index 0000000..bcbd68f
--- /dev/null
+++ b/ref_vaba.c
@@ -0,0 +1,125 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VABA/VABAQ"
+void exec_vaba (void)
+{
+  /* Basic test: v4=vaba(v1,v2,v3), then store the result.  */
+#define TEST_VABA(Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vaba##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N),			\
+		      VECT_VAR(vector3, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+#define DECL_VABA_VAR(VAR)			\
+  DECL_VARIABLE(VAR, int, 8, 8);		\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 8, 8);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 8, 16);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 8, 16);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4)
+
+  DECL_VABA_VAR(vector1);
+  DECL_VABA_VAR(vector2);
+  DECL_VABA_VAR(vector3);
+  DECL_VABA_VAR(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_VLOAD(vector1, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector1, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector1, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector1, buffer, , uint, u, 8, 8);
+  TEST_VLOAD(vector1, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector1, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector1, buffer, q, int, s, 8, 16);
+  TEST_VLOAD(vector1, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector1, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4);
+
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector2, , int, s, 8, 8, 1);
+  TEST_VDUP(vector2, , int, s, 16, 4, -13);
+  TEST_VDUP(vector2, , int, s, 32, 2, 8);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 1);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 13);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 8);
+  TEST_VDUP(vector2, q, int, s, 8, 16, 10);
+  TEST_VDUP(vector2, q, int, s, 16, 8, -12);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 32);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 10);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 12);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 32);
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector3, , int, s, 8, 8, -5);
+  TEST_VDUP(vector3, , int, s, 16, 4, 25);
+  TEST_VDUP(vector3, , int, s, 32, 2, -40);
+  TEST_VDUP(vector3, , uint, u, 8, 8, 100);
+  TEST_VDUP(vector3, , uint, u, 16, 4, 2340);
+  TEST_VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
+  TEST_VDUP(vector3, q, int, s, 8, 16, -100);
+  TEST_VDUP(vector3, q, int, s, 16, 8, -3000);
+  TEST_VDUP(vector3, q, int, s, 32, 4, 10000);
+  TEST_VDUP(vector3, q, uint, u, 8, 16, 2);
+  TEST_VDUP(vector3, q, uint, u, 16, 8, 3);
+  TEST_VDUP(vector3, q, uint, u, 32, 4, 4);
+
+  TEST_VABA(, int, s, 8, 8);
+  TEST_VABA(, int, s, 16, 4);
+  TEST_VABA(, int, s, 32, 2);
+  TEST_VABA(, uint, u, 8, 8);
+  TEST_VABA(, uint, u, 16, 4);
+  TEST_VABA(, uint, u, 32, 2);
+  TEST_VABA(q, int, s, 8, 16);
+  TEST_VABA(q, int, s, 16, 8);
+  TEST_VABA(q, int, s, 32, 4);
+  TEST_VABA(q, uint, u, 8, 16);
+  TEST_VABA(q, uint, u, 16, 8);
+  TEST_VABA(q, uint, u, 32, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vabal.c b/ref_vabal.c
new file mode 100644
index 0000000..274901b
--- /dev/null
+++ b/ref_vabal.c
@@ -0,0 +1,103 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VABAL"
+void exec_vabal (void)
+{
+  /* Basic test: v4=vabal(v1,v2,v3), then store the result.  */
+#define TEST_VABAL(T1, T2, W, W2, N)					\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    vabal_##T2##W(VECT_VAR(vector1, T1, W2, N),				\
+		  VECT_VAR(vector2, T1, W, N),				\
+		  VECT_VAR(vector3, T1, W, N));				\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+#define DECL_VABAL_VAR_LONG(VAR)		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, int, 64, 2);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 64, 2)
+
+#define DECL_VABAL_VAR_SHORT(VAR)		\
+  DECL_VARIABLE(VAR, int, 8, 8);		\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 8, 8);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2)
+
+  DECL_VABAL_VAR_LONG(vector1);
+  DECL_VABAL_VAR_SHORT(vector2);
+  DECL_VABAL_VAR_SHORT(vector3);
+  DECL_VABAL_VAR_LONG(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_VLOAD(vector1, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector1, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector1, buffer, q, int, s, 64, 2);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 64, 2);
+
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector2, , int, s, 8, 8, 1);
+  TEST_VDUP(vector2, , int, s, 16, 4, -13);
+  TEST_VDUP(vector2, , int, s, 32, 2, 8);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 1);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 13);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 8);
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector3, , int, s, 8, 8, -5);
+  TEST_VDUP(vector3, , int, s, 16, 4, 25);
+  TEST_VDUP(vector3, , int, s, 32, 2, -40);
+  TEST_VDUP(vector3, , uint, u, 8, 8, 100);
+  TEST_VDUP(vector3, , uint, u, 16, 4, 2340);
+  TEST_VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
+
+  TEST_VABAL(int, s, 8, 16, 8);
+  TEST_VABAL(int, s, 16, 32, 4);
+  TEST_VABAL(int, s, 32, 64, 2);
+  TEST_VABAL(uint, u, 8, 16, 8);
+  TEST_VABAL(uint, u, 16, 32, 4);
+  TEST_VABAL(uint, u, 32, 64, 2);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vabd.c b/ref_vabd.c
new file mode 100644
index 0000000..5b1420c
--- /dev/null
+++ b/ref_vabd.c
@@ -0,0 +1,116 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VABD/VABDQ"
+void exec_vabd (void)
+{
+  /* Basic test: v4=vabd(v1,v2), then store the result.  */
+#define TEST_VABD(Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vabd##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+#define DECL_VABD_VAR(VAR)			\
+  DECL_VARIABLE(VAR, int, 8, 8);		\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 8, 8);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, float, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 8, 16);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 8, 16);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4);		\
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+  DECL_VABD_VAR(vector1);
+  DECL_VABD_VAR(vector2);
+  DECL_VABD_VAR(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_VLOAD(vector1, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector1, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector1, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector1, buffer, , uint, u, 8, 8);
+  TEST_VLOAD(vector1, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector1, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector1, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector1, buffer, q, int, s, 8, 16);
+  TEST_VLOAD(vector1, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector1, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector2, , int, s, 8, 8, 1);
+  TEST_VDUP(vector2, , int, s, 16, 4, -13);
+  TEST_VDUP(vector2, , int, s, 32, 2, 8);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 1);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 13);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 8);
+  TEST_VDUP(vector2, , float, f, 32, 2, 8.3);
+  TEST_VDUP(vector2, q, int, s, 8, 16, 10);
+  TEST_VDUP(vector2, q, int, s, 16, 8, -12);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 32);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 10);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 12);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 32);
+  TEST_VDUP(vector2, q, float, f, 32, 4, 32.12);
+
+  TEST_VABD(, int, s, 8, 8);
+  TEST_VABD(, int, s, 16, 4);
+  TEST_VABD(, int, s, 32, 2);
+  TEST_VABD(, uint, u, 8, 8);
+  TEST_VABD(, uint, u, 16, 4);
+  TEST_VABD(, uint, u, 32, 2);
+  TEST_VABD(, float, f, 32, 2);
+  TEST_VABD(q, int, s, 8, 16);
+  TEST_VABD(q, int, s, 16, 8);
+  TEST_VABD(q, int, s, 32, 4);
+  TEST_VABD(q, uint, u, 8, 16);
+  TEST_VABD(q, uint, u, 16, 8);
+  TEST_VABD(q, uint, u, 32, 4);
+  TEST_VABD(q, float, f, 32, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vabdl.c b/ref_vabdl.c
new file mode 100644
index 0000000..a5d0d7f
--- /dev/null
+++ b/ref_vabdl.c
@@ -0,0 +1,93 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VABDL"
+void exec_vabdl (void)
+{
+  /* Basic test: v4=vabdl(v1,v2), then store the result.  */
+#define TEST_VABDL(T1, T2, W, W2, N)					\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    vabdl_##T2##W(VECT_VAR(vector1, T1, W, N),				\
+		  VECT_VAR(vector2, T1, W, N));				\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+#define DECL_VABDL_VAR_LONG(VAR)		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, int, 64, 2);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 64, 2)
+
+#define DECL_VABDL_VAR_SHORT(VAR)		\
+  DECL_VARIABLE(VAR, int, 8, 8);		\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 8, 8);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2)
+
+  DECL_VABDL_VAR_SHORT(vector1);
+  DECL_VABDL_VAR_SHORT(vector2);
+  DECL_VABDL_VAR_LONG(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_VLOAD(vector1, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector1, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector1, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector1, buffer, , uint, u, 8, 8);
+  TEST_VLOAD(vector1, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector1, buffer, , uint, u, 32, 2);
+
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector2, , int, s, 8, 8, 1);
+  TEST_VDUP(vector2, , int, s, 16, 4, -13);
+  TEST_VDUP(vector2, , int, s, 32, 2, 8);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 1);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 13);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 8);
+
+  TEST_VABDL(int, s, 8, 16, 8);
+  TEST_VABDL(int, s, 16, 32, 4);
+  TEST_VABDL(int, s, 32, 64, 2);
+  TEST_VABDL(uint, u, 8, 16, 8);
+  TEST_VABDL(uint, u, 16, 32, 4);
+  TEST_VABDL(uint, u, 32, 64, 2);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vabs.c b/ref_vabs.c
new file mode 100644
index 0000000..d8f1e2d
--- /dev/null
+++ b/ref_vabs.c
@@ -0,0 +1,54 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vabs
+#define TEST_MSG "VABS/VABSQ"
+
+/* Extra tests for functions requiring floating-point types */
+void exec_vabs_f32(void);
+#define EXTRA_TESTS exec_vabs_f32
+
+#include "ref_v_unary_op.c"
+
+void exec_vabs_f32(void)
+{
+  int i;
+
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  TEST_VDUP(vector, , float, f, 32, 2, -2.3);
+  TEST_VDUP(vector, q, float, f, 32, 4, 3.4);
+
+  TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4);
+
+  fprintf(ref_file, "\nfloat32:\n");
+  DUMP_FP(TEST_MSG, float, 32, 2, PRIx32);
+  DUMP_FP(TEST_MSG, float, 32, 4, PRIx32);
+}
diff --git a/ref_vadd.c b/ref_vadd.c
new file mode 100644
index 0000000..29d96c6
--- /dev/null
+++ b/ref_vadd.c
@@ -0,0 +1,60 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vadd
+#define TEST_MSG "VADD/VADDQ"
+
+/* Extra tests for functions requiring floating-point types */
+void exec_vadd_f32(void);
+#define EXTRA_TESTS exec_vadd_f32
+
+#include "ref_v_binary_op.c"
+
+void exec_vadd_f32(void)
+{
+  int i;
+
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  TEST_VDUP(vector, , float, f, 32, 2, 2.3);
+  TEST_VDUP(vector, q, float, f, 32, 4, 3.4);
+
+  TEST_VDUP(vector2, , float, f, 32, 2, 4.5);
+  TEST_VDUP(vector2, q, float, f, 32, 4, 5.6);
+
+  TEST_BINARY_OP(INSN_NAME, , float, f, 32, 2);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+
+  fprintf(ref_file, "\nfloat32:\n");
+  DUMP_FP(TEST_MSG, float, 32, 2, PRIx32);
+  DUMP_FP(TEST_MSG, float, 32, 4, PRIx32);
+}
diff --git a/ref_vaddhn.c b/ref_vaddhn.c
new file mode 100644
index 0000000..284396a
--- /dev/null
+++ b/ref_vaddhn.c
@@ -0,0 +1,86 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#include <stdint.h>
+
+#ifndef INSN_NAME
+#define INSN_NAME vaddhn
+#define TEST_MSG "VADDHN"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: vec64=vaddhn(vec128_a, vec128_b), then store the result.  */
+#define TEST_VADDHN1(INSN, T1, T2, W, W2, N)				\
+  VECT_VAR(vector64, T1, W2, N) = INSN##_##T2##W(VECT_VAR(vector1, T1, W, N), \
+						 VECT_VAR(vector2, T1, W, N)); \
+  vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N))
+
+#define TEST_VADDHN(INSN, T1, T2, W, W2, N)				\
+  TEST_VADDHN1(INSN, T1, T2, W, W2, N)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_64BITS_VARIANTS(vector64);
+  DECL_VARIABLE_128BITS_VARIANTS(vector1);
+  DECL_VARIABLE_128BITS_VARIANTS(vector2);
+
+  clean_results ();
+
+  /* Fill input vector1 and vector2 with arbitrary values */
+  TEST_VDUP(vector1, q, int, s, 16, 8, 50*(UINT8_MAX+1));
+  TEST_VDUP(vector1, q, int, s, 32, 4, 50*(UINT16_MAX+1));
+  TEST_VDUP(vector1, q, int, s, 64, 2, 24*((uint64_t)UINT32_MAX+1));
+  TEST_VDUP(vector1, q, uint, u, 16, 8, 3*(UINT8_MAX+1));
+  TEST_VDUP(vector1, q, uint, u, 32, 4, 55*(UINT16_MAX+1));
+  TEST_VDUP(vector1, q, uint, u, 64, 2, 3*((uint64_t)UINT32_MAX+1));
+
+  TEST_VDUP(vector2, q, int, s, 16, 8, (uint16_t)UINT8_MAX);
+  TEST_VDUP(vector2, q, int, s, 32, 4, (uint32_t)UINT16_MAX);
+  TEST_VDUP(vector2, q, int, s, 64, 2, (uint64_t)UINT32_MAX);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, (uint16_t)UINT8_MAX);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, (uint32_t)UINT16_MAX);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, (uint64_t)UINT32_MAX);
+
+  TEST_VADDHN(INSN_NAME, int, s, 16, 8, 8);
+  TEST_VADDHN(INSN_NAME, int, s, 32, 16, 4);
+  TEST_VADDHN(INSN_NAME, int, s, 64, 32, 2);
+  TEST_VADDHN(INSN_NAME, uint, u, 16, 8, 8);
+  TEST_VADDHN(INSN_NAME, uint, u, 32, 16, 4);
+  TEST_VADDHN(INSN_NAME, uint, u, 64, 32, 2);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vaddl.c b/ref_vaddl.c
new file mode 100644
index 0000000..74fce77
--- /dev/null
+++ b/ref_vaddl.c
@@ -0,0 +1,104 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vaddl
+#define TEST_MSG "VADDL"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=vaddl(x,x), then store the result.  */
+#define TEST_VADDL1(INSN, T1, T2, W, W2, N)				\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),				\
+		   VECT_VAR(vector2, T1, W, N));			\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+#define TEST_VADDL(INSN, T1, T2, W, W2, N)	\
+  TEST_VADDL1(INSN, T1, T2, W, W2, N)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+
+  DECL_VARIABLE(vector2, int, 8, 8);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 8, 8);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_VLOAD(vector, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, , uint, u, 8, 8);
+  TEST_VLOAD(vector, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector, buffer, , uint, u, 32, 2);
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector2, , int, s, 8, 8, -13);
+  TEST_VDUP(vector2, , int, s, 16, 4, -14);
+  TEST_VDUP(vector2, , int, s, 32, 2, -16);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0xf3);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0xfff1);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0xfffffff0);
+
+  TEST_VADDL(INSN_NAME, int, s, 8, 16, 8);
+  TEST_VADDL(INSN_NAME, int, s, 16, 32, 4);
+  TEST_VADDL(INSN_NAME, int, s, 32, 64, 2);
+  TEST_VADDL(INSN_NAME, uint, u, 8, 16, 8);
+  TEST_VADDL(INSN_NAME, uint, u, 16, 32, 4);
+  TEST_VADDL(INSN_NAME, uint, u, 32, 64, 2);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vaddw.c b/ref_vaddw.c
new file mode 100644
index 0000000..324e843
--- /dev/null
+++ b/ref_vaddw.c
@@ -0,0 +1,104 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vaddw
+#define TEST_MSG "VADDW"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=vaddw(x,x), then store the result.  */
+#define TEST_VADDW1(INSN, T1, T2, W, W2, N)				\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    INSN##_##T2##W(VECT_VAR(vector, T1, W2, N),				\
+		   VECT_VAR(vector2, T1, W, N));			\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+#define TEST_VADDW(INSN, T1, T2, W, W2, N)	\
+  TEST_VADDW1(INSN, T1, T2, W, W2, N)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, uint, 64, 2);
+
+  DECL_VARIABLE(vector2, int, 8, 8);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 8, 8);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, int, s, 64, 2);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector2, , int, s, 8, 8, -13);
+  TEST_VDUP(vector2, , int, s, 16, 4, -14);
+  TEST_VDUP(vector2, , int, s, 32, 2, -16);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0xf3);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0xfff1);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0xfffffff0);
+
+  TEST_VADDW(INSN_NAME, int, s, 8, 16, 8);
+  TEST_VADDW(INSN_NAME, int, s, 16, 32, 4);
+  TEST_VADDW(INSN_NAME, int, s, 32, 64, 2);
+  TEST_VADDW(INSN_NAME, uint, u, 8, 16, 8);
+  TEST_VADDW(INSN_NAME, uint, u, 16, 32, 4);
+  TEST_VADDW(INSN_NAME, uint, u, 32, 64, 2);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vand.c b/ref_vand.c
new file mode 100644
index 0000000..57e9013
--- /dev/null
+++ b/ref_vand.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vand
+#define TEST_MSG "VAND/VANDQ"
+
+#include "ref_v_binary_op.c"
diff --git a/ref_vbic.c b/ref_vbic.c
new file mode 100644
index 0000000..0d033ac
--- /dev/null
+++ b/ref_vbic.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vbic
+#define TEST_MSG "VBIC/VBICQ"
+
+#include "ref_v_binary_op.c"
diff --git a/ref_vbsl.c b/ref_vbsl.c
new file mode 100644
index 0000000..3bd8933
--- /dev/null
+++ b/ref_vbsl.c
@@ -0,0 +1,96 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VBSL/VBSLQ"
+void exec_vbsl (void)
+{
+  /* Basic test: y=vbsl(unsigned_vec,x,x), then store the result.  */
+#define TEST_VBSL(T3, Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vbsl##Q##_##T2##W(VECT_VAR(vector_first, T3, W, N),			\
+		      VECT_VAR(vector, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  DECL_VARIABLE_UNSIGNED_VARIANTS(vector_first);
+
+  clean_results ();
+
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily, will be used for vector
+     comparison. As we want different values for each type variant, we
+     can't use generic initialization macros.  */
+  TEST_VDUP(vector2, , int, s, 8, 8, -10);
+  TEST_VDUP(vector2, , int, s, 16, 4, -14);
+  TEST_VDUP(vector2, , int, s, 32, 2, -30);
+  TEST_VDUP(vector2, , int, s, 64, 1, -33);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0xF3);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0xFFF2);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3);
+  TEST_VDUP(vector2, , float, f, 32, 2, -30.3);
+
+  TEST_VDUP(vector2, q, int, s, 8, 16, -10);
+  TEST_VDUP(vector2, q, int, s, 16, 8, -14);
+  TEST_VDUP(vector2, q, int, s, 32, 4, -30);
+  TEST_VDUP(vector2, q, int, s, 64, 2, -33);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0xF3);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0xFFF2);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFF0);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3);
+  TEST_VDUP(vector2, q, float, f, 32, 4, -30.4);
+
+  TEST_VDUP(vector_first, , uint, u, 8, 8, 0xF4);
+  TEST_VDUP(vector_first, , uint, u, 16, 4, 0xFFF6);
+  TEST_VDUP(vector_first, , uint, u, 32, 2, 0xFFFFFFF2);
+  TEST_VDUP(vector_first, , uint, u, 64, 1, 0xFFFFFFF2);
+  TEST_VDUP(vector_first, q, uint, u, 8, 16, 0xF4);
+  TEST_VDUP(vector_first, q, uint, u, 16, 8, 0xFFF6);
+  TEST_VDUP(vector_first, q, uint, u, 32, 4, 0xFFFFFFF2);
+  TEST_VDUP(vector_first, q, uint, u, 64, 2, 0xFFFFFFF2);
+
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VBSL, uint);
+  TEST_VBSL(uint, , float, f, 32, 2);
+  TEST_VBSL(uint, q, float, f, 32, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vcage.c b/ref_vcage.c
new file mode 100644
index 0000000..b9919f9
--- /dev/null
+++ b/ref_vcage.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vcage
+#define TEST_MSG "VCAGE/VCAGEQ"
+
+#include "ref_v_comp_f_op.c"
diff --git a/ref_vcagt.c b/ref_vcagt.c
new file mode 100644
index 0000000..edb6fa0
--- /dev/null
+++ b/ref_vcagt.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vcagt
+#define TEST_MSG "VCAGT/VCAGTQ"
+
+#include "ref_v_comp_f_op.c"
diff --git a/ref_vcale.c b/ref_vcale.c
new file mode 100644
index 0000000..b221f79
--- /dev/null
+++ b/ref_vcale.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vcale
+#define TEST_MSG "VCALE/VCALEQ"
+
+#include "ref_v_comp_f_op.c"
diff --git a/ref_vcalt.c b/ref_vcalt.c
new file mode 100644
index 0000000..189a9ae
--- /dev/null
+++ b/ref_vcalt.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vcalt
+#define TEST_MSG "VCALT/VCALTQ"
+
+#include "ref_v_comp_f_op.c"
diff --git a/ref_vceq.c b/ref_vceq.c
new file mode 100644
index 0000000..100f201
--- /dev/null
+++ b/ref_vceq.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vceq
+#define TEST_MSG "VCEQ/VCEQQ"
+
+#include "ref_v_comp_op.c"
diff --git a/ref_vcge.c b/ref_vcge.c
new file mode 100644
index 0000000..9e1fdea
--- /dev/null
+++ b/ref_vcge.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vcge
+#define TEST_MSG "VCGE/VCGEQ"
+
+#include "ref_v_comp_op.c"
diff --git a/ref_vcgt.c b/ref_vcgt.c
new file mode 100644
index 0000000..afd8fe5
--- /dev/null
+++ b/ref_vcgt.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vcgt
+#define TEST_MSG "VCGT/VCGTQ"
+
+#include "ref_v_comp_op.c"
diff --git a/ref_vcle.c b/ref_vcle.c
new file mode 100644
index 0000000..3da8d1a
--- /dev/null
+++ b/ref_vcle.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vcle
+#define TEST_MSG "VCLE/VCLEQ"
+
+#include "ref_v_comp_op.c"
diff --git a/ref_vcls.c b/ref_vcls.c
new file mode 100644
index 0000000..5c96097
--- /dev/null
+++ b/ref_vcls.c
@@ -0,0 +1,107 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN_NAME vcls
+#define TEST_MSG "VCLS/VCLSQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N)	\
+  TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)		\
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Fill input vector with arbitrary values */
+  TEST_VDUP(vector, , int, s, 8, 8, 0x1);
+  TEST_VDUP(vector, , int, s, 16, 4, 0x1234);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x34);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x1234);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x678);
+
+  /* Apply a unary operator named INSN_NAME  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+
+  dump_results_hex2 (TEST_MSG, " (positive input)");
+
+
+  /* Fill input vector with arbitrary values (negative) */
+  TEST_VDUP(vector, , int, s, 8, 8, 0xFF);
+  TEST_VDUP(vector, , int, s, 16, 4, 0xC234);
+  TEST_VDUP(vector, , int, s, 32, 2, 0xDEAD0034);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0x80);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0xE234);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0xBEEF0678);
+
+  /* Apply a unary operator named INSN_NAME  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+
+  dump_results_hex2 (TEST_MSG, " (positive input)");
+}
diff --git a/ref_vclt.c b/ref_vclt.c
new file mode 100644
index 0000000..ce974d9
--- /dev/null
+++ b/ref_vclt.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vclt
+#define TEST_MSG "VCLT/VCLTQ"
+
+#include "ref_v_comp_op.c"
diff --git a/ref_vclz.c b/ref_vclz.c
new file mode 100644
index 0000000..ff57226
--- /dev/null
+++ b/ref_vclz.c
@@ -0,0 +1,112 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN_NAME vclz
+#define TEST_MSG "VCLZ/VCLZQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N)	\
+  TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)		\
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, uint, 8, 16);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 8, 16);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  clean_results ();
+
+  /* Fill input vector with arbitrary values */
+  TEST_VDUP(vector, , int, s, 8, 8, 0x84);
+  TEST_VDUP(vector, , int, s, 16, 4, 0x1234);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x5678);
+  TEST_VDUP(vector, , uint, u, 8, 8, 0x34);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0x8234);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0x7654321);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0x34);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x1234);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x12345678);
+  TEST_VDUP(vector, q, uint, u, 8, 16, 0x13);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0x4);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0x1);
+
+  /* Apply a unary operator named INSN_NAME  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vcnt.c b/ref_vcnt.c
new file mode 100644
index 0000000..c680620
--- /dev/null
+++ b/ref_vcnt.c
@@ -0,0 +1,80 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN_NAME vcnt
+#define TEST_MSG "VCNT/VCNTQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N)	\
+  TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)		\
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, uint, 8, 16);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, uint, 8, 16);
+
+  clean_results ();
+
+  /* Fill input vector with arbitrary values */
+  TEST_VDUP(vector, , int, s, 8, 8, 0xFF);
+  TEST_VDUP(vector, , uint, u, 8, 8, 0x34);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0);
+  TEST_VDUP(vector, q, uint, u, 8, 16, 0xBD);
+
+  /* Apply a unary operator named INSN_NAME  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vcombine.c b/ref_vcombine.c
new file mode 100644
index 0000000..0aa3cbf
--- /dev/null
+++ b/ref_vcombine.c
@@ -0,0 +1,77 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VCOMBINE"
+void exec_vcombine (void)
+{
+  /* Basic test: vec128=vcombine(vec64_a, vec64_b), then store the result.  */
+#define TEST_VCOMBINE(T1, T2, W, N, N2)					\
+  VECT_VAR(vector128, T1, W, N2) =					\
+    vcombine_##T2##W(VECT_VAR(vector64_a, T1, W, N),			\
+		     VECT_VAR(vector64_b, T1, W, N));			\
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N2), VECT_VAR(vector128, T1, W, N2))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_64BITS_VARIANTS(vector64_a);
+  DECL_VARIABLE_64BITS_VARIANTS(vector64_b);
+  DECL_VARIABLE_128BITS_VARIANTS(vector128);
+
+  TEST_MACRO_64BITS_VARIANTS_2_5(TEST_VLOAD, vector64_a, buffer);
+
+  TEST_VLOAD(vector64_a, buffer, , float, f, 32, 2);
+
+  TEST_VDUP(vector64_b, , int, s, 8, 8, 0x11);
+  TEST_VDUP(vector64_b, , int, s, 16, 4, 0x22);
+  TEST_VDUP(vector64_b, , int, s, 32, 2, 0x33);
+  TEST_VDUP(vector64_b, , int, s, 64, 1, 0x44);
+  TEST_VDUP(vector64_b, , uint, u, 8, 8, 0x55);
+  TEST_VDUP(vector64_b, , uint, u, 16, 4, 0x66);
+  TEST_VDUP(vector64_b, , uint, u, 32, 2, 0x77);
+  TEST_VDUP(vector64_b, , uint, u, 64, 1, 0x88);
+  TEST_VDUP(vector64_b, , float, f, 32, 2, 3.3);
+
+  clean_results ();
+
+  TEST_VCOMBINE(int, s, 8, 8, 16);
+  TEST_VCOMBINE(int, s, 16, 4, 8);
+  TEST_VCOMBINE(int, s, 32, 2, 4);
+  TEST_VCOMBINE(int, s, 64, 1, 2);
+  TEST_VCOMBINE(uint, u, 8, 8, 16);
+  TEST_VCOMBINE(uint, u, 16, 4, 8);
+  TEST_VCOMBINE(uint, u, 32, 2, 4);
+  TEST_VCOMBINE(uint, u, 64, 1, 2);
+  TEST_VCOMBINE(float, f, 32, 2, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vcreate.c b/ref_vcreate.c
new file mode 100644
index 0000000..9f4ee4b
--- /dev/null
+++ b/ref_vcreate.c
@@ -0,0 +1,99 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+/* Template file for unary operator validation */
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN_NAME vcreate
+#define TEST_MSG "VCREATE"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=vcreate(x), then store the result.  */
+#define TEST_VCREATE(T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) = vcreate_##T2##W(VECT_VAR(val, T1, W, N)); \
+  vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+#define DECL_VAL(VAR, T1, W, N)			\
+  uint64_t VECT_VAR(VAR, T1, W, N)
+
+  DECL_VAL(val, int, 8, 8);
+  DECL_VAL(val, int, 16, 4);
+  DECL_VAL(val, int, 32, 2);
+  DECL_VAL(val, int, 64, 1);
+  DECL_VAL(val, float, 32, 2);
+  DECL_VAL(val, uint, 8, 8);
+  DECL_VAL(val, uint, 16, 4);
+  DECL_VAL(val, uint, 32, 2);
+  DECL_VAL(val, uint, 64, 1);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 1);
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 1);
+
+  clean_results ();
+
+  /* Initialize input values arbitrarily */
+  VECT_VAR(val, int, 8, 8) = 0x123456789abcdef0LL;
+  VECT_VAR(val, int, 16, 4) = 0x123456789abcdef0LL;
+  VECT_VAR(val, int, 32, 2) = 0x123456789abcdef0LL;
+  VECT_VAR(val, int, 64, 1) = 0x123456789abcdef0LL;
+  VECT_VAR(val, float, 32, 2) = 0x123456789abcdef0LL;
+  VECT_VAR(val, uint, 8, 8) = 0x123456789abcdef0ULL;
+  VECT_VAR(val, uint, 16, 4) = 0x123456789abcdef0ULL;
+  VECT_VAR(val, uint, 32, 2) = 0x123456789abcdef0ULL;
+  VECT_VAR(val, uint, 64, 1) = 0x123456789abcdef0ULL;
+
+  TEST_VCREATE(int, s, 8, 8);
+  TEST_VCREATE(int, s, 16, 4);
+  TEST_VCREATE(int, s, 32, 2);
+  TEST_VCREATE(float, f, 32, 2);
+  TEST_VCREATE(int, s, 64, 1);
+  TEST_VCREATE(uint, u, 8, 8);
+  TEST_VCREATE(uint, u, 16, 4);
+  TEST_VCREATE(uint, u, 32, 2);
+  TEST_VCREATE(uint, u, 64, 1);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vcvt.c b/ref_vcvt.c
new file mode 100644
index 0000000..45553d0
--- /dev/null
+++ b/ref_vcvt.c
@@ -0,0 +1,160 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VCVT/VCVTQ"
+void exec_vcvt (void)
+{
+  int i;
+
+  /* Basic test: y=vcvt(x), then store the result.  */
+#define TEST_VCVT(Q, T1, T2, W, N, TS1, TS2)			\
+  VECT_VAR(vector_res, T1, W, N) =				\
+    vcvt##Q##_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N));	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),			\
+		    VECT_VAR(vector_res, T1, W, N));		\
+  DUMP(TEST_MSG, T1, W, N, PRIx##W);
+
+#define TEST_VCVT_FP(Q, T1, T2, W, N, TS1, TS2)			\
+  VECT_VAR(vector_res, T1, W, N) =				\
+    vcvt##Q##_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N));	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),			\
+		    VECT_VAR(vector_res, T1, W, N));		\
+  DUMP_FP(TEST_MSG, T1, W, N, PRIx##W);
+
+#define TEST_VCVT_N(Q, T1, T2, W, N, TS1, TS2, V)			\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vcvt##Q##_n_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N), V);	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N));			\
+  DUMP(TEST_MSG, T1, W, N, PRIx##W);
+
+#define TEST_VCVT_N_FP(Q, T1, T2, W, N, TS1, TS2, V)			\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vcvt##Q##_n_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N), V);	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N));			\
+  DUMP_FP(TEST_MSG, T1, W, N, PRIx##W);
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Make sure some elements have a fractional part, to exercise
+     integer conversions */
+  TEST_VSET_LANE(vector, , float, f, 32, 2, 0, -15.3);
+  TEST_VSET_LANE(vector, , float, f, 32, 2, 1, 5.3);
+  TEST_VSET_LANE(vector, q, float, f, 32, 4, 2, -15.3);
+  TEST_VSET_LANE(vector, q, float, f, 32, 4, 3, 5.3);
+
+  /* The same result buffers are used multiple times, so we output
+     them before overwriting them  */
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG);
+
+  /* vcvt_f32_xx */
+  TEST_VCVT_FP(, float, f, 32, 2, int, s);
+  TEST_VCVT_FP(, float, f, 32, 2, uint, u);
+
+  /* vcvtq_f32_xx */
+  TEST_VCVT_FP(q, float, f, 32, 4, int, s);
+  TEST_VCVT_FP(q, float, f, 32, 4, uint, u);
+
+  /* vcvt_xx_f32 */
+  TEST_VCVT(, int, s, 32, 2, float, f);
+  TEST_VCVT(, uint, u, 32, 2, float, f);
+
+  /* vcvtq_xx_f32 */
+  TEST_VCVT(q, int, s, 32, 4, float, f);
+  TEST_VCVT(q, uint, u, 32, 4, float, f);
+
+  /* The same result buffers are used multiple times, so output them
+     before overwriting them  */
+#undef TEST_MSG
+#define TEST_MSG "VCVT_N/VCVTQ_N"
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG);
+
+  /* vcvt_n_f32_xx */
+  TEST_VCVT_N_FP(, float, f, 32, 2, int, s, 2);
+  TEST_VCVT_N_FP(, float, f, 32, 2, uint, u, 7);
+
+  /* vcvtq_n_f32_xx */
+  TEST_VCVT_N_FP(q, float, f, 32, 4, int, s, 30);
+  TEST_VCVT_N_FP(q, float, f, 32, 4, uint, u, 12);
+
+  /* vcvt_n_xx_f32 */
+  TEST_VCVT_N(, int, s, 32, 2, float, f, 20);
+  TEST_VCVT_N(, uint, u, 32, 2, float, f, 2);
+
+  /* vcvtq_n_xx_f32 */
+  TEST_VCVT_N(q, int, s, 32, 4, float, f, 13);
+  TEST_VCVT_N(q, uint, u, 32, 4, float, f, 1);
+
+  /* Check rounding */
+#undef TEST_MSG
+#define TEST_MSG "VCVT/VCVTQ"
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check rounding)");
+  TEST_VDUP(vector, , float, f, 32, 2, 10.4);
+  TEST_VDUP(vector, q, float, f, 32, 4, 125.9);
+  /* vcvt_xx_f32 */
+  TEST_VCVT(, int, s, 32, 2, float, f);
+  TEST_VCVT(, uint, u, 32, 2, float, f);
+  /* vcvtq_xx_f32 */
+  TEST_VCVT(q, int, s, 32, 4, float, f);
+  TEST_VCVT(q, uint, u, 32, 4, float, f);
+
+#undef TEST_MSG
+#define TEST_MSG "VCVT_N/VCVTQ_N"
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check rounding)");
+  /* vcvt_n_xx_f32 */
+  TEST_VCVT_N(, int, s, 32, 2, float, f, 20);
+  TEST_VCVT_N(, uint, u, 32, 2, float, f, 20);
+  /* vcvtq_n_xx_f32 */
+  TEST_VCVT_N(q, int, s, 32, 4, float, f, 13);
+  TEST_VCVT_N(q, uint, u, 32, 4, float, f, 13);
+
+#undef TEST_MSG
+#define TEST_MSG "VCVT_N/VCVTQ_N"
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check saturation)");
+  /* vcvt_n_xx_f32 */
+  TEST_VCVT_N(, int, s, 32, 2, float, f, 31);
+  /* vcvtq_n_xx_f32 */
+  TEST_VCVT_N(q, int, s, 32, 4, float, f, 31);
+}
diff --git a/ref_vdup.c b/ref_vdup.c
new file mode 100644
index 0000000..42085ab
--- /dev/null
+++ b/ref_vdup.c
@@ -0,0 +1,109 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VDUP/VDUPQ"
+void exec_vdup (void)
+{
+  int i;
+
+  /* Basic test: vec=vdup(x), then store the result.  */
+#undef TEST_VDUP
+#define TEST_VDUP(Q, T1, T2, W, N)					\
+  VECT_VAR(vector, T1, W, N) =						\
+    vdup##Q##_n_##T2##W(VECT_VAR(buffer, T1, W, N)[i]);			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
+
+  /* Basic test: vec=vmov(x), then store the result.  */
+#define TEST_VMOV(Q, T1, T2, W, N)					\
+  VECT_VAR(vector, T1, W, N) =						\
+    vmov##Q##_n_##T2##W(VECT_VAR(buffer, T1, W, N)[i]);			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  for (i=0; i< 3; i++) {
+    clean_results ();
+
+    TEST_VDUP(, int, s, 8, 8);
+    TEST_VDUP(, int, s, 16, 4);
+    TEST_VDUP(, int, s, 32, 2);
+    TEST_VDUP(, int, s, 64, 1);
+    TEST_VDUP(, uint, u, 8, 8);
+    TEST_VDUP(, uint, u, 16, 4);
+    TEST_VDUP(, uint, u, 32, 2);
+    TEST_VDUP(, uint, u, 64, 1);
+    TEST_VDUP(, float, f, 32, 2);
+
+    TEST_VDUP(q, int, s, 8, 16);
+    TEST_VDUP(q, int, s, 16, 8);
+    TEST_VDUP(q, int, s, 32, 4);
+    TEST_VDUP(q, int, s, 64, 2);
+    TEST_VDUP(q, uint, u, 8, 16);
+    TEST_VDUP(q, uint, u, 16, 8);
+    TEST_VDUP(q, uint, u, 32, 4);
+    TEST_VDUP(q, uint, u, 64, 2);
+    TEST_VDUP(q, float, f, 32, 4);
+
+    dump_results_hex (TEST_MSG);
+  }
+
+#undef TEST_MSG
+#define TEST_MSG "VMOV/VMOVQ"
+  for (i=0; i< 3; i++) {
+    clean_results ();
+
+    TEST_VMOV(, int, s, 8, 8);
+    TEST_VMOV(, int, s, 16, 4);
+    TEST_VMOV(, int, s, 32, 2);
+    TEST_VMOV(, int, s, 64, 1);
+    TEST_VMOV(, uint, u, 8, 8);
+    TEST_VMOV(, uint, u, 16, 4);
+    TEST_VMOV(, uint, u, 32, 2);
+    TEST_VMOV(, uint, u, 64, 1);
+    TEST_VMOV(, float, f, 32, 2);
+
+    TEST_VMOV(q, int, s, 8, 16);
+    TEST_VMOV(q, int, s, 16, 8);
+    TEST_VMOV(q, int, s, 32, 4);
+    TEST_VMOV(q, int, s, 64, 2);
+    TEST_VMOV(q, uint, u, 8, 16);
+    TEST_VMOV(q, uint, u, 16, 8);
+    TEST_VMOV(q, uint, u, 32, 4);
+    TEST_VMOV(q, uint, u, 64, 2);
+    TEST_VMOV(q, float, f, 32, 4);
+
+    dump_results_hex (TEST_MSG);
+  }
+}
diff --git a/ref_vdup_lane.c b/ref_vdup_lane.c
new file mode 100644
index 0000000..e93969d
--- /dev/null
+++ b/ref_vdup_lane.c
@@ -0,0 +1,77 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VDUP_LANE/VDUP_LANEQ"
+void exec_vdup_lane (void)
+{
+  /* Basic test: vec1=vdup_lane(vec2, lane), then store the result.  */
+#define TEST_VDUP_LANE(Q, T1, T2, W, N, N2, L)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vdup##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N2), L);		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  /* Input vector can only have 64 bits */
+  DECL_VARIABLE_64BITS_VARIANTS(vector);
+
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  TEST_MACRO_64BITS_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+
+  /* Choose lane arbitrarily  */
+  TEST_VDUP_LANE(, int, s, 8, 8, 8, 1);
+  TEST_VDUP_LANE(, int, s, 16, 4, 4, 2);
+  TEST_VDUP_LANE(, int, s, 32, 2, 2, 1);
+  TEST_VDUP_LANE(, int, s, 64, 1, 1, 0);
+  TEST_VDUP_LANE(, uint, u, 8, 8, 8, 7);
+  TEST_VDUP_LANE(, uint, u, 16, 4, 4, 3);
+  TEST_VDUP_LANE(, uint, u, 32, 2, 2, 1);
+  TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0);
+  TEST_VDUP_LANE(, float, f, 32, 2, 2, 1);
+
+  TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2);
+  TEST_VDUP_LANE(q, int, s, 16, 8, 4, 3);
+  TEST_VDUP_LANE(q, int, s, 32, 4, 2, 1);
+  TEST_VDUP_LANE(q, int, s, 64, 2, 1, 0);
+  TEST_VDUP_LANE(q, uint, u, 8, 16, 8, 5);
+  TEST_VDUP_LANE(q, uint, u, 16, 8, 4, 1);
+  TEST_VDUP_LANE(q, uint, u, 32, 4, 2, 0);
+  TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0);
+  TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_veor.c b/ref_veor.c
new file mode 100644
index 0000000..50226ff
--- /dev/null
+++ b/ref_veor.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME veor
+#define TEST_MSG "VEOR/VEORQ"
+
+#include "ref_v_binary_op.c"
diff --git a/ref_vext.c b/ref_vext.c
new file mode 100644
index 0000000..a14a5ac
--- /dev/null
+++ b/ref_vext.c
@@ -0,0 +1,100 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VEXT/VEXTQ"
+void exec_vext (void)
+{
+  /* vector_res = vext(vector1,vector2,offset), then store the result.  */
+#define TEST_VEXT(Q, T1, T2, W, N, V)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vext##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N),			\
+		      V);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector1, buffer);
+  TEST_VLOAD(vector1, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  /* Choose arbitrary initialization values */
+  TEST_VDUP(vector2, , int, s, 8, 8, 0x11);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x22);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x33);
+  TEST_VDUP(vector2, , int, s, 64, 1, 0x44);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0x55);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x66);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x77);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 0x88);
+  TEST_VDUP(vector2, , float, f, 32, 2, 33.6);
+
+  TEST_VDUP(vector2, q, int, s, 8, 16, 0x11);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x22);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x33);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 0x44);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0x55);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0x66);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0x77);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88);
+  TEST_VDUP(vector2, q, float, f, 32, 4, 33.2);
+
+  /* Choose arbitrary extract offsets */
+  TEST_VEXT(, int, s, 8, 8, 7);
+  TEST_VEXT(, int, s, 16, 4, 3);
+  TEST_VEXT(, int, s, 32, 2, 1);
+  TEST_VEXT(, int, s, 64, 1, 0);
+  TEST_VEXT(, uint, u, 8, 8, 6);
+  TEST_VEXT(, uint, u, 16, 4, 2);
+  TEST_VEXT(, uint, u, 32, 2, 1);
+  TEST_VEXT(, uint, u, 64, 1, 0);
+  TEST_VEXT(, float, f, 32, 2, 1);
+
+  TEST_VEXT(q, int, s, 8, 16, 14);
+  TEST_VEXT(q, int, s, 16, 8, 7);
+  TEST_VEXT(q, int, s, 32, 4, 3);
+  TEST_VEXT(q, int, s, 64, 2, 1);
+  TEST_VEXT(q, uint, u, 8, 16, 12);
+  TEST_VEXT(q, uint, u, 16, 8, 6);
+  TEST_VEXT(q, uint, u, 32, 4, 3);
+  TEST_VEXT(q, uint, u, 64, 2, 1);
+  TEST_VEXT(q, float, f, 32, 4, 3);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vget_high.c b/ref_vget_high.c
new file mode 100644
index 0000000..47ff9ee
--- /dev/null
+++ b/ref_vget_high.c
@@ -0,0 +1,64 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VGET_HIGH"
+void exec_vget_high (void)
+{
+  /* Basic test: vec64=vget_high(vec128), then store the result.  */
+#define TEST_VGET_HIGH(T1, T2, W, N, N2)				\
+  VECT_VAR(vector64, T1, W, N) =					\
+    vget_high_##T2##W(VECT_VAR(vector128, T1, W, N2));			\
+  vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector64, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_64BITS_VARIANTS(vector64);
+  DECL_VARIABLE_128BITS_VARIANTS(vector128);
+
+  TEST_MACRO_128BITS_VARIANTS_2_5(TEST_VLOAD, vector128, buffer);
+  TEST_VLOAD(vector128, buffer, q, float, f, 32, 4);
+
+  clean_results ();
+
+  TEST_VGET_HIGH(int, s, 8, 8, 16);
+  TEST_VGET_HIGH(int, s, 16, 4, 8);
+  TEST_VGET_HIGH(int, s, 32, 2, 4);
+  TEST_VGET_HIGH(int, s, 64, 1, 2);
+  TEST_VGET_HIGH(uint, u, 8, 8, 16);
+  TEST_VGET_HIGH(uint, u, 16, 4, 8);
+  TEST_VGET_HIGH(uint, u, 32, 2, 4);
+  TEST_VGET_HIGH(uint, u, 64, 1, 2);
+  TEST_VGET_HIGH(float, f, 32, 2, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vget_lane.c b/ref_vget_lane.c
new file mode 100644
index 0000000..e603f33
--- /dev/null
+++ b/ref_vget_lane.c
@@ -0,0 +1,93 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+void exec_vget_lane (void)
+{
+  /* vec=vget_lane(vec, lane), then store the result.  */
+#define TEST_VGET_LANE(Q, T1, T2, W, N, L)				\
+  VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)); \
+  fprintf(ref_file, "%" PRIx##W ", ", VAR(var, T1, W))
+
+  /* Special variant for floating-point */
+#define TEST_VGET_LANE_F(Q, T1, T2, W, N, L)				\
+  VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N)); \
+  fprintf(ref_file, "%" PRIx##W ", ", *((uint##W##_t*)&VAR(var, T1, W)))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  /* Scalar variables */
+  VAR_DECL(var, int, 8);
+  VAR_DECL(var, int, 16);
+  VAR_DECL(var, int, 32);
+  VAR_DECL(var, int, 64);
+  VAR_DECL(var, uint, 8);
+  VAR_DECL(var, uint, 16);
+  VAR_DECL(var, uint, 32);
+  VAR_DECL(var, uint, 64);
+  VAR_DECL(var, float, 32);
+
+  clean_results ();
+
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  fprintf(ref_file, "\n%s output:\n", "VGET_LANE/VGETQ_LANE");
+
+  /* Choose lane arbitrarily  */
+  TEST_VGET_LANE(, int, s, 8, 8, 7);
+  TEST_VGET_LANE(, int, s, 16, 4, 3);
+  TEST_VGET_LANE(, int, s, 32, 2, 1);
+  TEST_VGET_LANE(, int, s, 64, 1, 0);
+  TEST_VGET_LANE(, uint, u, 8, 8, 6);
+  TEST_VGET_LANE(, uint, u, 16, 4, 2);
+  TEST_VGET_LANE(, uint, u, 32, 2, 1);
+  TEST_VGET_LANE(, uint, u, 64, 1, 0);
+  TEST_VGET_LANE_F(, float, f, 32, 2, 1);
+
+  TEST_VGET_LANE(q, int, s, 8, 16, 15);
+  TEST_VGET_LANE(q, int, s, 16, 8, 5);
+  TEST_VGET_LANE(q, int, s, 32, 4, 3);
+  TEST_VGET_LANE(q, int, s, 64, 2, 1);
+  TEST_VGET_LANE(q, uint, u, 8, 16, 14);
+  TEST_VGET_LANE(q, uint, u, 16, 8, 6);
+  TEST_VGET_LANE(q, uint, u, 32, 4, 2);
+  TEST_VGET_LANE(q, uint, u, 64, 2, 1);
+  TEST_VGET_LANE_F(q, float, f, 32, 4, 3);
+
+  fprintf(ref_file, "\n");
+}
diff --git a/ref_vget_low.c b/ref_vget_low.c
new file mode 100644
index 0000000..8a7170f
--- /dev/null
+++ b/ref_vget_low.c
@@ -0,0 +1,64 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VGET_LOW"
+void exec_vget_low (void)
+{
+  /* Basic test: vec64=vget_low(vec128), then store the result.  */
+#define TEST_VGET_LOW(T1, T2, W, N, N2)					\
+  VECT_VAR(vector64, T1, W, N) =					\
+    vget_low_##T2##W(VECT_VAR(vector128, T1, W, N2));			\
+  vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector64, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_64BITS_VARIANTS(vector64);
+  DECL_VARIABLE_128BITS_VARIANTS(vector128);
+
+  TEST_MACRO_128BITS_VARIANTS_2_5(TEST_VLOAD, vector128, buffer);
+  TEST_VLOAD(vector128, buffer, q, float, f, 32, 4);
+
+  clean_results ();
+
+  TEST_VGET_LOW(int, s, 8, 8, 16);
+  TEST_VGET_LOW(int, s, 16, 4, 8);
+  TEST_VGET_LOW(int, s, 32, 2, 4);
+  TEST_VGET_LOW(int, s, 64, 1, 2);
+  TEST_VGET_LOW(uint, u, 8, 8, 16);
+  TEST_VGET_LOW(uint, u, 16, 4, 8);
+  TEST_VGET_LOW(uint, u, 32, 2, 4);
+  TEST_VGET_LOW(uint, u, 64, 1, 2);
+  TEST_VGET_LOW(float, f, 32, 2, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vhadd.c b/ref_vhadd.c
new file mode 100644
index 0000000..f7ab21b
--- /dev/null
+++ b/ref_vhadd.c
@@ -0,0 +1,31 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vhadd
+#define TEST_MSG "VHADD/VHADDQ"
+
+#define NO_FLOAT_VARIANT
+
+#include "ref_vmax.c"
diff --git a/ref_vhsub.c b/ref_vhsub.c
new file mode 100644
index 0000000..859df59
--- /dev/null
+++ b/ref_vhsub.c
@@ -0,0 +1,31 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vhsub
+#define TEST_MSG "VHSUB/VHSUBQ"
+
+#define NO_FLOAT_VARIANT
+
+#include "ref_vmax.c"
diff --git a/ref_vld1.c b/ref_vld1.c
new file mode 100644
index 0000000..ac4d91c
--- /dev/null
+++ b/ref_vld1.c
@@ -0,0 +1,55 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VLD1/VLD1Q"
+void exec_vld1 (void)
+{
+  /* Basic test vec=vld1(buffer); then store vec: vst1(result, vector) */
+  /* This test actually tests vdl1 and vst1 at the same time */
+#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N)				\
+  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  clean_results ();
+
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer);
+
+  TEST_VLD1(vector, buffer, , float, f, 32, 2);
+  TEST_VLD1(vector, buffer, q, float, f, 32, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vld1_dup.c b/ref_vld1_dup.c
new file mode 100644
index 0000000..7dabecd
--- /dev/null
+++ b/ref_vld1_dup.c
@@ -0,0 +1,60 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VLD1_DUP/VLD1_DUPQ"
+void exec_vld1_dup (void)
+{
+  int i;
+
+  /* Fill vector with buffer item #i  */
+#define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N)			\
+  VECT_VAR(VAR, T1, W, N) =						\
+    vld1##Q##_dup_##T2##W(&VECT_VAR(BUF, T1, W, N)[i]);			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  /* Try to read different places from the input buffer */
+  for (i=0; i<3; i++) {
+    clean_results ();
+
+    TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1_DUP, vector, buffer);
+
+    TEST_VLD1_DUP(vector, buffer, , float, f, 32, 2);
+    TEST_VLD1_DUP(vector, buffer, q, float, f, 32, 4);
+
+    dump_results_hex (TEST_MSG);
+  }
+}
diff --git a/ref_vld1_lane.c b/ref_vld1_lane.c
new file mode 100644
index 0000000..7af7d03
--- /dev/null
+++ b/ref_vld1_lane.c
@@ -0,0 +1,102 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VLD1_LANE/VLD1_LANEQ"
+void exec_vld1_lane (void)
+{
+  /* Fill vector_src with 0xAA, then load 1 lane */
+#define TEST_VLD1_LANE(Q, T1, T2, W, N, L)				\
+  memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, W/8*N);			\
+  VECT_VAR(vector_src, T1, W, N) =					\
+    vld1##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N));			\
+  VECT_VAR(vector, T1, W, N) =						\
+    vld1##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N),			\
+			   VECT_VAR(vector_src, T1, W, N), L);		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_src);
+
+  ARRAY(buffer_src, int, 8, 8);
+  ARRAY(buffer_src, int, 16, 4);
+  ARRAY(buffer_src, int, 32, 2);
+  ARRAY(buffer_src, int, 64, 1);
+  ARRAY(buffer_src, uint, 8, 8);
+  ARRAY(buffer_src, uint, 16, 4);
+  ARRAY(buffer_src, uint, 32, 2);
+  ARRAY(buffer_src, uint, 64, 1);
+  ARRAY(buffer_src, float, 32, 2);
+
+  ARRAY(buffer_src, int, 8, 16);
+  ARRAY(buffer_src, int, 16, 8);
+  ARRAY(buffer_src, int, 32, 4);
+  ARRAY(buffer_src, int, 64, 2);
+  ARRAY(buffer_src, uint, 8, 16);
+  ARRAY(buffer_src, uint, 16, 8);
+  ARRAY(buffer_src, uint, 32, 4);
+  ARRAY(buffer_src, uint, 64, 2);
+  ARRAY(buffer_src, float, 32, 4);
+
+  clean_results ();
+
+  /* Choose lane arbitrarily  */
+  TEST_VLD1_LANE(, int, s, 8, 8, 6);
+  TEST_VLD1_LANE(, int, s, 16, 4, 3);
+  TEST_VLD1_LANE(, int, s, 32, 2, 1);
+  TEST_VLD1_LANE(, int, s, 64, 1, 0);
+  TEST_VLD1_LANE(, uint, u, 8, 8, 7);
+  TEST_VLD1_LANE(, uint, u, 16, 4, 3);
+  TEST_VLD1_LANE(, uint, u, 32, 2, 1);
+  TEST_VLD1_LANE(, uint, u, 64, 1, 0);
+  TEST_VLD1_LANE(, float, f, 32, 2, 1);
+
+  TEST_VLD1_LANE(q, int, s, 8, 16, 15);
+  TEST_VLD1_LANE(q, int, s, 16, 8, 5);
+  TEST_VLD1_LANE(q, int, s, 32, 4, 2);
+  TEST_VLD1_LANE(q, int, s, 64, 2, 1);
+  TEST_VLD1_LANE(q, uint, u, 8, 16, 12);
+  TEST_VLD1_LANE(q, uint, u, 16, 8, 6);
+  TEST_VLD1_LANE(q, uint, u, 32, 4, 2);
+  TEST_VLD1_LANE(q, uint, u, 64, 2, 0);
+  TEST_VLD1_LANE(q, float, f, 32, 4, 2);
+
+#ifndef __CC_ARM
+  /* Check runtime assertions. With RVCT, the check is performed at
+     compile-time */
+  //  TEST_VLD1_LANE(, int, s, 64, 1, 1);
+#endif
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vldX.c b/ref_vldX.c
new file mode 100644
index 0000000..e52131e
--- /dev/null
+++ b/ref_vldX.c
@@ -0,0 +1,157 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+void exec_vldX (void)
+{
+  /* In this case, input variables are arrays of vectors */
+#define DECL_VLDX(T1, W, N, X)						\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+#define TEST_VLDX(Q, T1, T2, W, N, X)					\
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =					\
+    vld##X##Q##_##T2##W(VECT_VAR(buffer, T1, W, N));			\
+  vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		\
+		      VECT_ARRAY_VAR(vector, T1, W, N, X));		\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* Overwrite "result" with the contents of "result_bis"[Y] */
+#define TEST_EXTRA_CHUNK(T1, W, N, X,Y)			\
+  memcpy(VECT_VAR(result, T1, W, N),			\
+	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  /* We need all variants in 64 bits, but there is no 64x2 variant */
+#define DECL_ALL_VLDX(X)			\
+  DECL_VLDX(int, 8, 8, X);			\
+  DECL_VLDX(int, 16, 4, X);			\
+  DECL_VLDX(int, 32, 2, X);			\
+  DECL_VLDX(int, 64, 1, X);			\
+  DECL_VLDX(uint, 8, 8, X);			\
+  DECL_VLDX(uint, 16, 4, X);			\
+  DECL_VLDX(uint, 32, 2, X);			\
+  DECL_VLDX(uint, 64, 1, X);			\
+  DECL_VLDX(float, 32, 2, X);			\
+  DECL_VLDX(int, 8, 16, X);			\
+  DECL_VLDX(int, 16, 8, X);			\
+  DECL_VLDX(int, 32, 4, X);			\
+  DECL_VLDX(uint, 8, 16, X);			\
+  DECL_VLDX(uint, 16, 8, X);			\
+  DECL_VLDX(uint, 32, 4, X);			\
+  DECL_VLDX(float, 32, 4, X)
+
+#define TEST_ALL_VLDX(X)			\
+  TEST_VLDX(, int, s, 8, 8, X);			\
+  TEST_VLDX(, int, s, 16, 4, X);		\
+  TEST_VLDX(, int, s, 32, 2, X);		\
+  TEST_VLDX(, int, s, 64, 1, X);		\
+  TEST_VLDX(, uint, u, 8, 8, X);		\
+  TEST_VLDX(, uint, u, 16, 4, X);		\
+  TEST_VLDX(, uint, u, 32, 2, X);		\
+  TEST_VLDX(, uint, u, 64, 1, X);		\
+  TEST_VLDX(, float, f, 32, 2, X);		\
+  TEST_VLDX(q, int, s, 8, 16, X);		\
+  TEST_VLDX(q, int, s, 16, 8, X);		\
+  TEST_VLDX(q, int, s, 32, 4, X);		\
+  TEST_VLDX(q, uint, u, 8, 16, X);		\
+  TEST_VLDX(q, uint, u, 16, 8, X);		\
+  TEST_VLDX(q, uint, u, 32, 4, X);		\
+  TEST_VLDX(q, float, f, 32, 4, X)
+
+#define TEST_ALL_EXTRA_CHUNKS(X, Y)		\
+  TEST_EXTRA_CHUNK(int, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 64, 1, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 64, 1, X, Y);		\
+  TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 8, 16, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 16, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 32, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 8, 16, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
+
+  DECL_ALL_VLDX(2);
+  DECL_ALL_VLDX(3);
+  DECL_ALL_VLDX(4);
+
+  /* Check vld2/vld2q */
+  clean_results ();
+#define TEST_MSG "VLD2/VLD2Q"
+  TEST_ALL_VLDX(2);
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+
+  /* Check vld3/vld3q */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3/VLD3Q"
+  TEST_ALL_VLDX(3);
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+  dump_results_hex2 (TEST_MSG, " chunk 2");
+
+  /* Check vld4/vld4q */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4/VLD4Q"
+  TEST_ALL_VLDX(4);
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+  dump_results_hex2 (TEST_MSG, " chunk 2");
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+  dump_results_hex2 (TEST_MSG, " chunk 3");
+}
diff --git a/ref_vldX_dup.c b/ref_vldX_dup.c
new file mode 100644
index 0000000..a65441e
--- /dev/null
+++ b/ref_vldX_dup.c
@@ -0,0 +1,136 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+void exec_vldX_dup (void)
+{
+  /* In this case, input variables are arrays of vectors */
+#define DECL_VLDX_DUP(T1, W, N, X)					\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+  /* Fill vector with buffer item #i  */
+#define TEST_VLDX_DUP(Q, T1, T2, W, N, X)				\
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =					\
+    vld##X##Q##_dup_##T2##W(VECT_VAR(buffer, T1, W, N));		\
+									\
+  vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		\
+		      VECT_ARRAY_VAR(vector, T1, W, N, X));		\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+
+  /* Overwrite "result" with the contents of "result_bis"[Y] */
+#define TEST_EXTRA_CHUNK(T1, W, N, X,Y)			\
+  memcpy(VECT_VAR(result, T1, W, N),			\
+	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+#define DECL_ALL_VLDX_DUP(X)			\
+  DECL_VLDX_DUP(int, 8, 8, X);			\
+  DECL_VLDX_DUP(int, 16, 4, X);			\
+  DECL_VLDX_DUP(int, 32, 2, X);			\
+  DECL_VLDX_DUP(int, 64, 1, X);			\
+  DECL_VLDX_DUP(uint, 8, 8, X);			\
+  DECL_VLDX_DUP(uint, 16, 4, X);		\
+  DECL_VLDX_DUP(uint, 32, 2, X);		\
+  DECL_VLDX_DUP(uint, 64, 1, X);		\
+  DECL_VLDX_DUP(float, 32, 2, X)
+
+
+#define TEST_ALL_VLDX_DUP(X)			\
+  TEST_VLDX_DUP(, int, s, 8, 8, X);		\
+  TEST_VLDX_DUP(, int, s, 16, 4, X);		\
+  TEST_VLDX_DUP(, int, s, 32, 2, X);		\
+  TEST_VLDX_DUP(, int, s, 64, 1, X);		\
+  TEST_VLDX_DUP(, uint, u, 8, 8, X);		\
+  TEST_VLDX_DUP(, uint, u, 16, 4, X);		\
+  TEST_VLDX_DUP(, uint, u, 32, 2, X);		\
+  TEST_VLDX_DUP(, uint, u, 64, 1, X);		\
+  TEST_VLDX_DUP(, float, f, 32, 2, X)
+
+#define TEST_ALL_EXTRA_CHUNKS(X, Y)		\
+  TEST_EXTRA_CHUNK(int, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 64, 1, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 64, 1, X, Y);		\
+  TEST_EXTRA_CHUNK(float, 32, 2, X, Y)
+
+
+  DECL_ALL_VLDX_DUP(2);
+  DECL_ALL_VLDX_DUP(3);
+  DECL_ALL_VLDX_DUP(4);
+
+  /* Check vld2_dup/vld2q_dup */
+  clean_results ();
+#define TEST_MSG "VLD2_DUP/VLD2Q_DUP"
+  TEST_ALL_VLDX_DUP(2);
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+
+  /* Check vld3_dup/vld3q_dup */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3_DUP/VLD3Q_DUP"
+  TEST_ALL_VLDX_DUP(3);
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+  dump_results_hex2 (TEST_MSG, " chunk 2");
+
+  /* Check vld4_dup/vld4q_dup */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4_DUP/VLD4Q_DUP"
+  TEST_ALL_VLDX_DUP(4);
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+  dump_results_hex2 (TEST_MSG, " chunk 2");
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+  dump_results_hex2 (TEST_MSG, " chunk 3");
+}
diff --git a/ref_vldX_lane.c b/ref_vldX_lane.c
new file mode 100644
index 0000000..4fa4f57
--- /dev/null
+++ b/ref_vldX_lane.c
@@ -0,0 +1,170 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+void exec_vldX_lane (void)
+{
+  /* In this case, input variables are arrays of vectors */
+#define DECL_VLDX_LANE(T1, W, N, X)					\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X);	\
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+#define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L)				\
+  memset (VECT_VAR(buffer_src, T1, W, N), 0xAA,				\
+	  sizeof(VECT_VAR(buffer_src, T1, W, N)));			\
+									\
+  VECT_ARRAY_VAR(vector_src, T1, W, N, X) =				\
+    vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N));		\
+									\
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =					\
+    vld##X##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N),		\
+			     VECT_ARRAY_VAR(vector_src, T1, W, N, X),	\
+			     L);					\
+  vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		\
+		      VECT_ARRAY_VAR(vector, T1, W, N, X));		\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)))
+
+  /* Overwrite "result" with the contents of "result_bis"[Y] */
+#define TEST_EXTRA_CHUNK(T1, W, N, X, Y)		\
+  memcpy(VECT_VAR(result, T1, W, N),			\
+	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  /* We need all variants in 64 bits, but there is no 64x2 variant */
+#define DECL_ALL_VLDX_LANE(X)			\
+  DECL_VLDX_LANE(int, 8, 8, X);			\
+  DECL_VLDX_LANE(int, 16, 4, X);		\
+  DECL_VLDX_LANE(int, 32, 2, X);		\
+  DECL_VLDX_LANE(uint, 8, 8, X);		\
+  DECL_VLDX_LANE(uint, 16, 4, X);		\
+  DECL_VLDX_LANE(uint, 32, 2, X);		\
+  DECL_VLDX_LANE(int, 16, 8, X);		\
+  DECL_VLDX_LANE(int, 32, 4, X);		\
+  DECL_VLDX_LANE(uint, 16, 8, X);		\
+  DECL_VLDX_LANE(uint, 32, 4, X);		\
+  DECL_VLDX_LANE(float, 32, 2, X);		\
+  DECL_VLDX_LANE(float, 32, 4, X)
+
+#define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L]
+
+  /* Use the same lanes regardless of the size of the array (X), for
+     simplicity */
+#define TEST_ALL_VLDX_LANE(X)			\
+  TEST_VLDX_LANE(, int, s, 8, 8, X, 7);		\
+  TEST_VLDX_LANE(, int, s, 16, 4, X, 2);	\
+  TEST_VLDX_LANE(, int, s, 32, 2, X, 0);	\
+  TEST_VLDX_LANE(, uint, u, 8, 8, X, 4);	\
+  TEST_VLDX_LANE(, uint, u, 16, 4, X, 3);	\
+  TEST_VLDX_LANE(, uint, u, 32, 2, X, 1);	\
+  TEST_VLDX_LANE(q, int, s, 16, 8, X, 6);	\
+  TEST_VLDX_LANE(q, int, s, 32, 4, X, 2);	\
+  TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5);	\
+  TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0);	\
+  TEST_VLDX_LANE(, float, f, 32, 2, X, 0);	\
+  TEST_VLDX_LANE(q, float, f, 32, 4, X, 2)
+
+#define TEST_ALL_EXTRA_CHUNKS(X, Y)		\
+  TEST_EXTRA_CHUNK(int, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 16, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 32, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
+
+  /* Declare the temporary buffers / variables */
+  DECL_ALL_VLDX_LANE(2);
+  DECL_ALL_VLDX_LANE(3);
+  DECL_ALL_VLDX_LANE(4);
+
+  /* Define dummy input arrays, large enough for x4 vectors */
+  DUMMY_ARRAY(buffer_src, int, 8, 8, 4);
+  DUMMY_ARRAY(buffer_src, int, 16, 4, 4);
+  DUMMY_ARRAY(buffer_src, int, 32, 2, 4);
+  DUMMY_ARRAY(buffer_src, uint, 8, 8, 4);
+  DUMMY_ARRAY(buffer_src, uint, 16, 4, 4);
+  DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
+  DUMMY_ARRAY(buffer_src, int, 16, 8, 4);
+  DUMMY_ARRAY(buffer_src, int, 32, 4, 4);
+  DUMMY_ARRAY(buffer_src, uint, 16, 8, 4);
+  DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
+  DUMMY_ARRAY(buffer_src, float, 32, 2, 4);
+  DUMMY_ARRAY(buffer_src, float, 32, 4, 4);
+
+  /* Check vld2_lane/vld2q_lane */
+  clean_results ();
+#define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
+  TEST_ALL_VLDX_LANE(2);
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+
+  /* Check vld3_lane/vld3q_lane */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
+  TEST_ALL_VLDX_LANE(3);
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+  dump_results_hex2 (TEST_MSG, " chunk 2");
+
+  /* Check vld4_lane/vld4q_lane */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
+  TEST_ALL_VLDX_LANE(4);
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+  dump_results_hex2 (TEST_MSG, " chunk 2");
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+  dump_results_hex2 (TEST_MSG, " chunk 3");
+}
diff --git a/ref_vmax.c b/ref_vmax.c
new file mode 100644
index 0000000..a2a6b60
--- /dev/null
+++ b/ref_vmax.c
@@ -0,0 +1,116 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vmax
+#define TEST_MSG "VMAX/VMAXQ"
+#endif
+
+/* Can't use the standard ref_v_binary_op.c template because vmax has
+   no 64 bits variant */
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x,x), then store the result.  */
+#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =                                      \
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),                       \
+                      VECT_VAR(vector2, T1, W, N));                     \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N)   \
+  TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)        \
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement  */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+#ifndef NO_FLOAT_VARIANT
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+#endif
+
+  /* Choose init value arbitrarily, will be used as comparison value */
+  TEST_VDUP(vector2, , int, s, 8, 8, -13);
+  TEST_VDUP(vector2, , int, s, 16, 4, -14);
+  TEST_VDUP(vector2, , int, s, 32, 2, -16);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0xf3);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0xfff1);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0xfffffff0);
+  TEST_VDUP(vector2, q, int, s, 8, 16, -12);
+  TEST_VDUP(vector2, q, int, s, 16, 8, -13);
+  TEST_VDUP(vector2, q, int, s, 32, 4, -15);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0xf9);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0xfff2);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0xfffffff1);
+#ifndef NO_FLOAT_VARIANT
+  TEST_VDUP(vector2, , float, f, 32, 2, -15.5);
+  TEST_VDUP(vector2, q, float, f, 32, 4, -14.5);
+#endif
+
+#ifndef NO_FLOAT_VARIANT
+#define FLOAT_VARIANT(MACRO, VAR)			\
+  MACRO(VAR, , float, f, 32, 2);			\
+  MACRO(VAR, q, float, f, 32, 4)
+#else
+#define FLOAT_VARIANT(MACRO, VAR)
+#endif
+
+#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR)	\
+  MACRO(VAR, , int, s, 8, 8);				\
+  MACRO(VAR, , int, s, 16, 4);				\
+  MACRO(VAR, , int, s, 32, 2);				\
+  MACRO(VAR, , uint, u, 8, 8);				\
+  MACRO(VAR, , uint, u, 16, 4);				\
+  MACRO(VAR, , uint, u, 32, 2);				\
+  MACRO(VAR, q, int, s, 8, 16);				\
+  MACRO(VAR, q, int, s, 16, 8);				\
+  MACRO(VAR, q, int, s, 32, 4);				\
+  MACRO(VAR, q, uint, u, 8, 16);			\
+  MACRO(VAR, q, uint, u, 16, 8);			\
+  MACRO(VAR, q, uint, u, 32, 4);			\
+  FLOAT_VARIANT(MACRO, VAR)
+
+  /* Apply a binary operator named INSN_NAME  */
+  TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmin.c b/ref_vmin.c
new file mode 100644
index 0000000..496e3ae
--- /dev/null
+++ b/ref_vmin.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vmin
+#define TEST_MSG "VMIN/VMINQ"
+
+#include "ref_vmax.c"
diff --git a/ref_vmla.c b/ref_vmla.c
new file mode 100644
index 0000000..2b2a42b
--- /dev/null
+++ b/ref_vmla.c
@@ -0,0 +1,144 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vmla
+#define TEST_MSG "VMLA"
+#endif
+
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+#define DECL_VMLX(T, W, N)			\
+  DECL_VARIABLE(vector1, T, W, N);		\
+  DECL_VARIABLE(vector2, T, W, N);		\
+  DECL_VARIABLE(vector3, T, W, N);		\
+  DECL_VARIABLE(vector_res, T, W, N)
+
+  /* vector_res = OP(vector, vector3, vector4),
+     then store the result.  */
+#define TEST_VMLX1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =                                      \
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N),			\
+		      VECT_VAR(vector3, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),                         \
+		    VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLX(INSN, Q, T1, T2, W, N)	\
+  TEST_VMLX1(INSN, Q, T1, T2, W, N)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VMLX(int, 8, 8);
+  DECL_VMLX(int, 16, 4);
+  DECL_VMLX(int, 32, 2);
+  DECL_VMLX(uint, 8, 8);
+  DECL_VMLX(uint, 16, 4);
+  DECL_VMLX(uint, 32, 2);
+  DECL_VMLX(float, 32, 2);
+  DECL_VMLX(int, 8, 16);
+  DECL_VMLX(int, 16, 8);
+  DECL_VMLX(int, 32, 4);
+  DECL_VMLX(uint, 8, 16);
+  DECL_VMLX(uint, 16, 8);
+  DECL_VMLX(uint, 32, 4);
+  DECL_VMLX(float, 32, 4);
+
+  clean_results ();
+
+  TEST_VLOAD(vector1, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector1, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector1, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector1, buffer, , uint, u, 8, 8);
+  TEST_VLOAD(vector1, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector1, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector1, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector1, buffer, q, int, s, 8, 16);
+  TEST_VLOAD(vector1, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector1, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  TEST_VDUP(vector2, , int, s, 8, 8, 0x11);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x22);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x33);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0x44);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x55);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x66);
+  TEST_VDUP(vector2, , float, f, 32, 2, 33.1);
+  TEST_VDUP(vector2, q, int, s, 8, 16, 0x77);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x88);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x99);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0xAA);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0xBB);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0xCC);
+  TEST_VDUP(vector2, q, float, f, 32, 4, 99.2);
+
+  TEST_VDUP(vector3, , int, s, 8, 8, 0xFF);
+  TEST_VDUP(vector3, , int, s, 16, 4, 0xEE);
+  TEST_VDUP(vector3, , int, s, 32, 2, 0xDD);
+  TEST_VDUP(vector3, , uint, u, 8, 8, 0xCC);
+  TEST_VDUP(vector3, , uint, u, 16, 4, 0xBB);
+  TEST_VDUP(vector3, , uint, u, 32, 2, 0xAA);
+  TEST_VDUP(vector3, , float, f, 32, 2, 10.23);
+  TEST_VDUP(vector3, q, int, s, 8, 16, 0x99);
+  TEST_VDUP(vector3, q, int, s, 16, 8, 0x88);
+  TEST_VDUP(vector3, q, int, s, 32, 4, 0x77);
+  TEST_VDUP(vector3, q, uint, u, 8, 16, 0x66);
+  TEST_VDUP(vector3, q, uint, u, 16, 8, 0x55);
+  TEST_VDUP(vector3, q, uint, u, 32, 4, 0x44);
+  TEST_VDUP(vector3, q, float, f, 32, 4, 77.8);
+
+  TEST_VMLX(INSN_NAME, , int, s, 8, 8);
+  TEST_VMLX(INSN_NAME, , int, s, 16, 4);
+  TEST_VMLX(INSN_NAME, , int, s, 32, 2);
+  TEST_VMLX(INSN_NAME, , uint, u, 8, 8);
+  TEST_VMLX(INSN_NAME, , uint, u, 16, 4);
+  TEST_VMLX(INSN_NAME, , uint, u, 32, 2);
+  TEST_VMLX(INSN_NAME, , float, f, 32, 2);
+  TEST_VMLX(INSN_NAME, q, int, s, 8, 16);
+  TEST_VMLX(INSN_NAME, q, int, s, 16, 8);
+  TEST_VMLX(INSN_NAME, q, int, s, 32, 4);
+  TEST_VMLX(INSN_NAME, q, uint, u, 8, 16);
+  TEST_VMLX(INSN_NAME, q, uint, u, 16, 8);
+  TEST_VMLX(INSN_NAME, q, uint, u, 32, 4);
+  TEST_VMLX(INSN_NAME, q, float, f, 32, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmla_lane.c b/ref_vmla_lane.c
new file mode 100644
index 0000000..b79d44c
--- /dev/null
+++ b/ref_vmla_lane.c
@@ -0,0 +1,125 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vmla
+#define TEST_MSG "VMLA_LANE"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME ##_lane (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+#define DECL_VMLX_LANE(VAR)			\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, float, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4);		\
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+  /* vector_res = vmlx_lane(vector, vector2, vector3, lane),
+     then store the result.  */
+#define TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, L)			\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			   VECT_VAR(vector2, T1, W, N),			\
+			   VECT_VAR(vector3, T1, W, N2),		\
+			   L);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \
+		    VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLX_LANE(INSN, Q, T1, T2, W, N, N2, V)	\
+  TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, V)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VMLX_LANE(vector);
+  DECL_VMLX_LANE(vector2);
+  DECL_VMLX_LANE(vector_res);
+
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector3, uint, 16, 4);
+  DECL_VARIABLE(vector3, uint, 32, 2);
+  DECL_VARIABLE(vector3, float, 32, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x55);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x55);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x55);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x55);
+  TEST_VDUP(vector2, , float, f, 32, 2, 55.3);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x55);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x55);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0x55);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0x55);
+  TEST_VDUP(vector2, q, float, f, 32, 4, 55.8);
+
+  TEST_VDUP(vector3, , int, s, 16, 4, 0xBB);
+  TEST_VDUP(vector3, , int, s, 32, 2, 0xBB);
+  TEST_VDUP(vector3, , uint, u, 16, 4, 0xBB);
+  TEST_VDUP(vector3, , uint, u, 32, 2, 0xBB);
+  TEST_VDUP(vector3, , float, f, 32, 2, 11.34);
+
+  /* Choose lane arbitrarily */
+  TEST_VMLX_LANE(INSN_NAME, , int, s, 16, 4, 4, 2);
+  TEST_VMLX_LANE(INSN_NAME, , int, s, 32, 2, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, , uint, u, 16, 4, 4, 2);
+  TEST_VMLX_LANE(INSN_NAME, , uint, u, 32, 2, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, , float, f, 32, 2, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, q, int, s, 16, 8, 4, 3);
+  TEST_VMLX_LANE(INSN_NAME, q, int, s, 32, 4, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, q, uint, u, 16, 8, 4, 2);
+  TEST_VMLX_LANE(INSN_NAME, q, uint, u, 32, 4, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, q, float, f, 32, 4, 2, 1);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmla_n.c b/ref_vmla_n.c
new file mode 100644
index 0000000..edcfc5e
--- /dev/null
+++ b/ref_vmla_n.c
@@ -0,0 +1,112 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vmla
+#define TEST_MSG "VMLA_N"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME ##_n (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+#define DECL_VMLX_N(VAR)			\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, float, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, float, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 4)
+
+  /* vector_res = vmlx_n(vector, vector2, val),
+     then store the result.  */
+#define TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V)          \
+  VECT_VAR(vector_res, T1, W, N) =                      \
+    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),     \
+			VECT_VAR(vector2, T1, W, N),    \
+			V);                             \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),         \
+		    VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLX_N(INSN, Q, T1, T2, W, N, V)	\
+  TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VMLX_N(vector);
+  DECL_VMLX_N(vector2);
+  DECL_VMLX_N(vector_res);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x55);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x55);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x55);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x55);
+  TEST_VDUP(vector2, , float, f, 32, 2, 55.2);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x55);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x55);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0x55);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0x55);
+  TEST_VDUP(vector2, q, float, f, 32, 4, 55.9);
+
+  /* Choose multiplier arbitrarily */
+  TEST_VMLX_N(INSN_NAME, , int, s, 16, 4, 0x11);
+  TEST_VMLX_N(INSN_NAME, , int, s, 32, 2, 0x22);
+  TEST_VMLX_N(INSN_NAME, , uint, u, 16, 4, 0x33);
+  TEST_VMLX_N(INSN_NAME, , uint, u, 32, 2, 0x44);
+  TEST_VMLX_N(INSN_NAME, , float, f, 32, 2, 22.3);
+  TEST_VMLX_N(INSN_NAME, q, int, s, 16, 8, 0x55);
+  TEST_VMLX_N(INSN_NAME, q, int, s, 32, 4, 0x66);
+  TEST_VMLX_N(INSN_NAME, q, uint, u, 16, 8, 0x77);
+  TEST_VMLX_N(INSN_NAME, q, uint, u, 32, 4, 0x88);
+  TEST_VMLX_N(INSN_NAME, q, float, f, 32, 4, 66.7);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmlal.c b/ref_vmlal.c
new file mode 100644
index 0000000..9c00fb7
--- /dev/null
+++ b/ref_vmlal.c
@@ -0,0 +1,119 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vmlal
+#define TEST_MSG "VMLAL"
+#endif
+
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* vector_res = OP(vector, vector3, vector4),
+     then store the result.  */
+#define TEST_VMLXL1(INSN, T1, T2, W, W2, N)				\
+  VECT_VAR(vector_res, T1, W, N) =                                      \
+    INSN##_##T2##W2(VECT_VAR(vector, T1, W, N),                         \
+                    VECT_VAR(vector3, T1, W2, N),                       \
+                    VECT_VAR(vector4, T1, W2, N));                      \
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLXL(INSN, T1, T2, W, W2, N)	\
+  TEST_VMLXL1(INSN, T1, T2, W, W2, N)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector3, int, 8, 8);
+  DECL_VARIABLE(vector4, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector4, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector4, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector3, uint, 8, 8);
+  DECL_VARIABLE(vector4, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector3, uint, 16, 4);
+  DECL_VARIABLE(vector4, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  DECL_VARIABLE(vector, uint, 64, 2);
+  DECL_VARIABLE(vector3, uint, 32, 2);
+  DECL_VARIABLE(vector4, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, int, s, 64, 2);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  TEST_VDUP(vector3, , int, s, 8, 8, 0x55);
+  TEST_VDUP(vector4, , int, s, 8, 8, 0xBB);
+  TEST_VDUP(vector3, , int, s, 16, 4, 0x55);
+  TEST_VDUP(vector4, , int, s, 16, 4, 0xBB);
+  TEST_VDUP(vector3, , int, s, 32, 2, 0x55);
+  TEST_VDUP(vector4, , int, s, 32, 2, 0xBB);
+  TEST_VDUP(vector3, , uint, u, 8, 8, 0x55);
+  TEST_VDUP(vector4, , uint, u, 8, 8, 0xBB);
+  TEST_VDUP(vector3, , uint, u, 16, 4, 0x55);
+  TEST_VDUP(vector4, , uint, u, 16, 4, 0xBB);
+  TEST_VDUP(vector3, , uint, u, 32, 2, 0x55);
+  TEST_VDUP(vector4, , uint, u, 32, 2, 0xBB);
+
+  TEST_VMLXL(INSN_NAME, int, s, 16, 8, 8);
+  TEST_VMLXL(INSN_NAME, int, s, 32, 16, 4);
+  TEST_VMLXL(INSN_NAME, int, s, 64, 32, 2);
+  TEST_VMLXL(INSN_NAME, uint, u, 16, 8, 8);
+  TEST_VMLXL(INSN_NAME, uint, u, 32, 16, 4);
+  TEST_VMLXL(INSN_NAME, uint, u, 64, 32, 2);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmlal_lane.c b/ref_vmlal_lane.c
new file mode 100644
index 0000000..afa8e6b
--- /dev/null
+++ b/ref_vmlal_lane.c
@@ -0,0 +1,101 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vmlal_lane
+#define TEST_MSG "VMLAL_LANE"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* vector_res = vmlxl_lane(vector, vector3, vector4, lane),
+     then store the result.  */
+#define TEST_VMLXL_LANE1(INSN, T1, T2, W, W2, N, V)			\
+  VECT_VAR(vector_res, T1, W, N) =                                      \
+    INSN##_##T2##W2(VECT_VAR(vector, T1, W, N),                         \
+                    VECT_VAR(vector3, T1, W2, N),                       \
+                    VECT_VAR(vector4, T1, W2, N),                       \
+                    V);                                                 \
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLXL_LANE(INSN, T1, T2, W, W2, N, V)			\
+  TEST_VMLXL_LANE1(INSN, T1, T2, W, W2, N, V)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector4, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector4, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector3, uint, 16, 4);
+  DECL_VARIABLE(vector4, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  DECL_VARIABLE(vector, uint, 64, 2);
+  DECL_VARIABLE(vector3, uint, 32, 2);
+  DECL_VARIABLE(vector4, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, int, s, 64, 2);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  TEST_VDUP(vector3, , int, s, 16, 4, 0x55);
+  TEST_VDUP(vector4, , int, s, 16, 4, 0xBB);
+  TEST_VDUP(vector3, , int, s, 32, 2, 0x55);
+  TEST_VDUP(vector4, , int, s, 32, 2, 0xBB);
+  TEST_VDUP(vector3, , uint, u, 16, 4, 0x55);
+  TEST_VDUP(vector4, , uint, u, 16, 4, 0xBB);
+  TEST_VDUP(vector3, , uint, u, 32, 2, 0x55);
+  TEST_VDUP(vector4, , uint, u, 32, 2, 0xBB);
+
+  TEST_VMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 2);
+  TEST_VMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 1);
+  TEST_VMLXL_LANE(INSN_NAME, uint, u, 32, 16, 4, 2);
+  TEST_VMLXL_LANE(INSN_NAME, uint, u, 64, 32, 2, 1);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmlal_n.c b/ref_vmlal_n.c
new file mode 100644
index 0000000..da981f7
--- /dev/null
+++ b/ref_vmlal_n.c
@@ -0,0 +1,92 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vmlal_n
+#define TEST_MSG "VMLAL_N"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* vector_res = vmlxl_n(vector, vector2, val),
+     then store the result.  */
+#define TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V)			\
+  VECT_VAR(vector_res, T1, W, N) = INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \
+						   VECT_VAR(vector2, T1, W2, N), \
+						   V);			\
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLXL_N(INSN, T1, T2, W, W2, N, V)			\
+  TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  DECL_VARIABLE(vector, uint, 64, 2);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, int, s, 64, 2);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x55);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x55);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x55);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x55);
+
+  /* Choose multiplier arbitrarily */
+  TEST_VMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x11);
+  TEST_VMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x22);
+  TEST_VMLXL_N(INSN_NAME, uint, u, 32, 16, 4, 0x33);
+  TEST_VMLXL_N(INSN_NAME, uint, u, 64, 32, 2, 0x33);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmls.c b/ref_vmls.c
new file mode 100644
index 0000000..9eacdb2
--- /dev/null
+++ b/ref_vmls.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vmls
+#define TEST_MSG "VMLS"
+
+#include "ref_vmla.c"
diff --git a/ref_vmls_lane.c b/ref_vmls_lane.c
new file mode 100644
index 0000000..68cce02
--- /dev/null
+++ b/ref_vmls_lane.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vmls
+#define TEST_MSG "VMLS_LANE"
+
+#include "ref_vmla_lane.c"
diff --git a/ref_vmls_n.c b/ref_vmls_n.c
new file mode 100644
index 0000000..050a4b9
--- /dev/null
+++ b/ref_vmls_n.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vmls
+#define TEST_MSG "VMLS_N"
+
+#include "ref_vmla_n.c"
diff --git a/ref_vmlsl.c b/ref_vmlsl.c
new file mode 100644
index 0000000..22b11aa
--- /dev/null
+++ b/ref_vmlsl.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vmlsl
+#define TEST_MSG "VMLSL"
+
+#include "ref_vmlal.c"
diff --git a/ref_vmlsl_lane.c b/ref_vmlsl_lane.c
new file mode 100644
index 0000000..a972b93
--- /dev/null
+++ b/ref_vmlsl_lane.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vmlsl_lane
+#define TEST_MSG "VMLSL_LANE"
+
+#include "ref_vmlal_lane.c"
diff --git a/ref_vmlsl_n.c b/ref_vmlsl_n.c
new file mode 100644
index 0000000..b7f7000
--- /dev/null
+++ b/ref_vmlsl_n.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vmlsl_n
+#define TEST_MSG "VMLSL_N"
+
+#include "ref_vmlal_n.c"
diff --git a/ref_vmovl.c b/ref_vmovl.c
new file mode 100644
index 0000000..fa9f599
--- /dev/null
+++ b/ref_vmovl.c
@@ -0,0 +1,60 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VMOVL"
+void exec_vmovl (void)
+{
+  /* Basic test: vec128=vmovl(vec64), then store the result.  */
+#define TEST_VMOVL(T1, T2, W, W2, N)					\
+  VECT_VAR(vector128, T1, W2, N) =					\
+    vmovl_##T2##W(VECT_VAR(vector64, T1, W, N));			\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector128, T1, W2, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_64BITS_VARIANTS(vector64);
+  DECL_VARIABLE_128BITS_VARIANTS(vector128);
+
+  TEST_MACRO_64BITS_VARIANTS_2_5(TEST_VLOAD, vector64, buffer);
+
+  clean_results ();
+
+  TEST_VMOVL(int, s, 8, 16, 8);
+  TEST_VMOVL(int, s, 16, 32, 4);
+  TEST_VMOVL(int, s, 32, 64, 2);
+  TEST_VMOVL(uint, u, 8, 16, 8);
+  TEST_VMOVL(uint, u, 16, 32, 4);
+  TEST_VMOVL(uint, u, 32, 64, 2);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmovn.c b/ref_vmovn.c
new file mode 100644
index 0000000..ff00e75
--- /dev/null
+++ b/ref_vmovn.c
@@ -0,0 +1,60 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VMOVN"
+void exec_vmovn (void)
+{
+  /* Basic test: vec64=vmovn(vec128), then store the result.  */
+#define TEST_VMOVN(T1, T2, W, W2, N)					\
+  VECT_VAR(vector64, T1, W2, N) =					\
+    vmovn_##T2##W(VECT_VAR(vector128, T1, W, N));			\
+  vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_64BITS_VARIANTS(vector64);
+  DECL_VARIABLE_128BITS_VARIANTS(vector128);
+
+  TEST_MACRO_128BITS_VARIANTS_2_5(TEST_VLOAD, vector128, buffer);
+
+  clean_results ();
+
+  TEST_VMOVN(int, s, 16, 8, 8);
+  TEST_VMOVN(int, s, 32, 16, 4);
+  TEST_VMOVN(int, s, 64, 32, 2);
+  TEST_VMOVN(uint, u, 16, 8, 8);
+  TEST_VMOVN(uint, u, 32, 16, 4);
+  TEST_VMOVN(uint, u, 64, 32, 2);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmul.c b/ref_vmul.c
new file mode 100644
index 0000000..7e77338
--- /dev/null
+++ b/ref_vmul.c
@@ -0,0 +1,127 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vmul
+#define TEST_MSG "VMUL"
+#endif
+
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+#define DECL_VMUL(T, W, N)			\
+  DECL_VARIABLE(vector1, T, W, N);		\
+  DECL_VARIABLE(vector2, T, W, N);		\
+  DECL_VARIABLE(vector_res, T, W, N)
+
+  /* vector_res = OP(vector, vector3, vector4),
+     then store the result.  */
+#define TEST_VMUL1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMUL(INSN, Q, T1, T2, W, N)	\
+  TEST_VMUL1(INSN, Q, T1, T2, W, N)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VMUL(int, 8, 8);
+  DECL_VMUL(int, 16, 4);
+  DECL_VMUL(int, 32, 2);
+  DECL_VMUL(uint, 8, 8);
+  DECL_VMUL(uint, 16, 4);
+  DECL_VMUL(uint, 32, 2);
+  DECL_VMUL(float, 32, 2);
+  DECL_VMUL(int, 8, 16);
+  DECL_VMUL(int, 16, 8);
+  DECL_VMUL(int, 32, 4);
+  DECL_VMUL(uint, 8, 16);
+  DECL_VMUL(uint, 16, 8);
+  DECL_VMUL(uint, 32, 4);
+  DECL_VMUL(float, 32, 4);
+
+  clean_results ();
+
+  TEST_VLOAD(vector1, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector1, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector1, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector1, buffer, , uint, u, 8, 8);
+  TEST_VLOAD(vector1, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector1, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector1, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector1, buffer, q, int, s, 8, 16);
+  TEST_VLOAD(vector1, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector1, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  TEST_VDUP(vector2, , int, s, 8, 8, 0x11);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x22);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x33);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0x44);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x55);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x66);
+  TEST_VDUP(vector2, , float, f, 32, 2, 33.3);
+  TEST_VDUP(vector2, q, int, s, 8, 16, 0x77);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x88);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x99);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0xAA);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0xBB);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0xCC);
+  TEST_VDUP(vector2, q, float, f, 32, 4, 99.6);
+
+  TEST_VMUL(INSN_NAME, , int, s, 8, 8);
+  TEST_VMUL(INSN_NAME, , int, s, 16, 4);
+  TEST_VMUL(INSN_NAME, , int, s, 32, 2);
+  TEST_VMUL(INSN_NAME, , uint, u, 8, 8);
+  TEST_VMUL(INSN_NAME, , uint, u, 16, 4);
+  TEST_VMUL(INSN_NAME, , uint, u, 32, 2);
+  TEST_VMUL(INSN_NAME, , float, f, 32, 2);
+  TEST_VMUL(INSN_NAME, q, int, s, 8, 16);
+  TEST_VMUL(INSN_NAME, q, int, s, 16, 8);
+  TEST_VMUL(INSN_NAME, q, int, s, 32, 4);
+  TEST_VMUL(INSN_NAME, q, uint, u, 8, 16);
+  TEST_VMUL(INSN_NAME, q, uint, u, 16, 8);
+  TEST_VMUL(INSN_NAME, q, uint, u, 32, 4);
+  TEST_VMUL(INSN_NAME, q, float, f, 32, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmul_lane.c b/ref_vmul_lane.c
new file mode 100644
index 0000000..650c9d9
--- /dev/null
+++ b/ref_vmul_lane.c
@@ -0,0 +1,105 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VMUL_LANE"
+void exec_vmul_lane (void)
+{
+#define DECL_VMUL(VAR)				\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, float, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4);		\
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+  /* vector_res = vmul_lane(vector,vector2,lane), then store the result.  */
+#define TEST_VMUL_LANE(Q, T1, T2, W, N, N2, L)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vmul##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			   VECT_VAR(vector2, T1, W, N2),		\
+			   L);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VMUL(vector);
+  DECL_VMUL(vector_res);
+
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 2);
+
+  clean_results ();
+
+  /* Initialize vector from pre-initialized values  */
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Initialize vector2 */
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x4);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x22);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x444);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x532);
+  TEST_VDUP(vector2, , float, f, 32, 2, 22.8);
+
+  /* Choose lane arbitrarily */
+  TEST_VMUL_LANE(, int, s, 16, 4, 4, 2);
+  TEST_VMUL_LANE(, int, s, 32, 2, 2, 1);
+  TEST_VMUL_LANE(, uint, u, 16, 4, 4, 2);
+  TEST_VMUL_LANE(, uint, u, 32, 2, 2, 1);
+  TEST_VMUL_LANE(, float, f, 32, 2, 2, 1);
+  TEST_VMUL_LANE(q, int, s, 16, 8, 4, 2);
+  TEST_VMUL_LANE(q, int, s, 32, 4, 2, 0);
+  TEST_VMUL_LANE(q, uint, u, 16, 8, 4, 2);
+  TEST_VMUL_LANE(q, uint, u, 32, 4, 2, 1);
+  TEST_VMUL_LANE(q, float, f, 32, 4, 2, 0);
+
+  /* FIXME: only a subset of the result buffers are used, but we
+     output all of them */
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmul_n.c b/ref_vmul_n.c
new file mode 100644
index 0000000..628862d
--- /dev/null
+++ b/ref_vmul_n.c
@@ -0,0 +1,91 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VMUL_N"
+void exec_vmul_n (void)
+{
+#define DECL_VMUL(VAR)				\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, float, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4);		\
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+  /* vector_res = vmul_n(vector,val), then store the result.  */
+#define TEST_VMUL_N(Q, T1, T2, W, N, L)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vmul##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			L);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VMUL(vector);
+  DECL_VMUL(vector_res);
+
+  clean_results ();
+
+  /* Initialize vector from pre-initialized values  */
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose multiplier arbitrarily */
+  TEST_VMUL_N(, int, s, 16, 4, 0x11);
+  TEST_VMUL_N(, int, s, 32, 2, 0x22);
+  TEST_VMUL_N(, uint, u, 16, 4, 0x33);
+  TEST_VMUL_N(, uint, u, 32, 2, 0x44);
+  TEST_VMUL_N(, float, f, 32, 2, 22.3);
+  TEST_VMUL_N(q, int, s, 16, 8, 0x55);
+  TEST_VMUL_N(q, int, s, 32, 4, 0x66);
+  TEST_VMUL_N(q, uint, u, 16, 8, 0x77);
+  TEST_VMUL_N(q, uint, u, 32, 4, 0x88);
+  TEST_VMUL_N(q, float, f, 32, 4, 88.9);
+
+  /* FIXME: only a subset of the result buffers are used, but we
+     output all of them */
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmull.c b/ref_vmull.c
new file mode 100644
index 0000000..e61d8e6
--- /dev/null
+++ b/ref_vmull.c
@@ -0,0 +1,77 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VMULL"
+void exec_vmull (void)
+{
+  /* Basic test: y=vmull(x,x), then store the result.  */
+#define TEST_VMULL(T1, T2, W, W2, N)					\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    vmull_##T2##W(VECT_VAR(vector, T1, W, N),				\
+		  VECT_VAR(vector, T1, W, N));				\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, , uint, u, 8, 8);
+  TEST_VLOAD(vector, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector, buffer, , uint, u, 32, 2);
+
+  TEST_VMULL(int, s, 8, 16, 8);
+  TEST_VMULL(int, s, 16, 32, 4);
+  TEST_VMULL(int, s, 32, 64, 2);
+  TEST_VMULL(uint, u, 8, 16, 8);
+  TEST_VMULL(uint, u, 16, 32, 4);
+  TEST_VMULL(uint, u, 32, 64, 2);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmull_lane.c b/ref_vmull_lane.c
new file mode 100644
index 0000000..23434e1
--- /dev/null
+++ b/ref_vmull_lane.c
@@ -0,0 +1,84 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VMULL_LANE"
+void exec_vmull_lane (void)
+{
+  /* vector_res = vmull_lane(vector,vector2,lane), then store the result.  */
+#define TEST_VMULL_LANE(T1, T2, W, W2, N, L)				\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    vmull##_lane_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			 VECT_VAR(vector2, T1, W, N),			\
+			 L);						\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize vector */
+  TEST_VDUP(vector, , int, s, 16, 4, 0x1000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x1000);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0x1000);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0x1000);
+
+  /* Initialize vector2 */
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x4);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x2);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x4);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x2);
+
+  /* Choose lane arbitrarily */
+  TEST_VMULL_LANE(int, s, 16, 32, 4, 2);
+  TEST_VMULL_LANE(int, s, 32, 64, 2, 1);
+  TEST_VMULL_LANE(uint, u, 16, 32, 4, 2);
+  TEST_VMULL_LANE(uint, u, 32, 64, 2, 1);
+
+  /* FIXME: only a subset of the result buffers are used, but we
+     output all of them */
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vmull_n.c b/ref_vmull_n.c
new file mode 100644
index 0000000..8713d35
--- /dev/null
+++ b/ref_vmull_n.c
@@ -0,0 +1,81 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN_NAME vmull
+#define TEST_MSG "VMULL_N"
+void exec_vmull_n (void)
+{
+  int i;
+
+  /* vector_res = vmull_n(vector,val), then store the result.  */
+#define TEST_VMULL_N1(INSN, T1, T2, W, W2, N, L)			\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		     L);						\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+#define TEST_VMULL_N(INSN, T1, T2, W, W2, N, L)	\
+  TEST_VMULL_N1(INSN, T1, T2, W, W2, N, L)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize vector */
+  TEST_VDUP(vector, , int, s, 16, 4, 0x1000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x1000);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0x1000);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0x1000);
+
+  /* Choose multiplier arbitrarily */
+  TEST_VMULL_N(INSN_NAME, int, s, 16, 32, 4, 0x11);
+  TEST_VMULL_N(INSN_NAME, int, s, 32, 64, 2, 0x22);
+  TEST_VMULL_N(INSN_NAME, uint, u, 16, 32, 4, 0x33);
+  TEST_VMULL_N(INSN_NAME, uint, u, 32, 64, 2, 0x44);
+
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG);
+  DUMP(TEST_MSG, int, 32, 4, PRIx32);
+  DUMP(TEST_MSG, int, 64, 2, PRIx64);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+  DUMP(TEST_MSG, uint, 64, 2, PRIx64);
+}
diff --git a/ref_vmvn.c b/ref_vmvn.c
new file mode 100644
index 0000000..75b750e
--- /dev/null
+++ b/ref_vmvn.c
@@ -0,0 +1,112 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN_NAME vmvn
+#define TEST_MSG "VMVN/VMVNQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N)				\
+  TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)					\
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, uint, 8, 16);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 8, 16);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_VLOAD(vector, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, , uint, u, 8, 8);
+  TEST_VLOAD(vector, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector, buffer, q, int, s, 8, 16);
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 8, 16);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+
+  /* Apply a unary operator named INSN_NAME  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vneg.c b/ref_vneg.c
new file mode 100644
index 0000000..4e9e68f
--- /dev/null
+++ b/ref_vneg.c
@@ -0,0 +1,54 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vneg
+#define TEST_MSG "VNEG/VNEGQ"
+
+/* Extra tests for functions requiring floating-point types */
+void exec_vneg_f32(void);
+#define EXTRA_TESTS exec_vneg_f32
+
+#include "ref_v_unary_op.c"
+
+void exec_vneg_f32(void)
+{
+  int i;
+
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  TEST_VDUP(vector, , float, f, 32, 2, 2.3);
+  TEST_VDUP(vector, q, float, f, 32, 4, 3.4);
+
+  TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4);
+
+  fprintf(ref_file, "\nfloat32:\n");
+  DUMP_FP(TEST_MSG, float, 32, 2, PRIx32);
+  DUMP_FP(TEST_MSG, float, 32, 4, PRIx32);
+}
diff --git a/ref_vorn.c b/ref_vorn.c
new file mode 100644
index 0000000..df9823e
--- /dev/null
+++ b/ref_vorn.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vorn
+#define TEST_MSG "VORN/VORNQ"
+
+#include "ref_v_binary_op.c"
diff --git a/ref_vorr.c b/ref_vorr.c
new file mode 100644
index 0000000..0762e10
--- /dev/null
+++ b/ref_vorr.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vorr
+#define TEST_MSG "VORR/VORRQ"
+
+#include "ref_v_binary_op.c"
diff --git a/ref_vpadal.c b/ref_vpadal.c
new file mode 100644
index 0000000..8ca3e07
--- /dev/null
+++ b/ref_vpadal.c
@@ -0,0 +1,140 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN_NAME vpadal
+#define TEST_MSG "VPADAL/VPADALQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_VPADAL1(INSN, Q, T1, T2, W, N, W2, N2)			\
+  VECT_VAR(vector_res, T1, W2, N2) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W2, N2), VECT_VAR(vector2, T1, W, N)); \
+  vst1##Q##_##T2##W2(VECT_VAR(result, T1, W2, N2),			\
+		     VECT_VAR(vector_res, T1, W2, N2))
+
+#define TEST_VPADAL(INSN, Q, T1, T2, W, N, W2, N2)	\
+  TEST_VPADAL1(INSN, Q, T1, T2, W, N, W2, N2)
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 64, 1);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, uint, 64, 1);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, uint, 64, 2);
+
+  DECL_VARIABLE(vector2, int, 8, 8);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 8, 8);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+  DECL_VARIABLE(vector2, int, 8, 16);
+  DECL_VARIABLE(vector2, int, 16, 8);
+  DECL_VARIABLE(vector2, int, 32, 4);
+  DECL_VARIABLE(vector2, uint, 8, 16);
+  DECL_VARIABLE(vector2, uint, 16, 8);
+  DECL_VARIABLE(vector2, uint, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 1);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 1);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, , int, s, 64, 1);
+  TEST_VLOAD(vector, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector, buffer, , uint, u, 64, 1);
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, int, s, 64, 2);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  /* Initialize input "vector2" from "buffer"  */
+  TEST_VLOAD(vector2, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector2, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector2, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector2, buffer, , uint, u, 8, 8);
+  TEST_VLOAD(vector2, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector2, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector2, buffer, q, int, s, 8, 16);
+  TEST_VLOAD(vector2, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector2, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector2, buffer, q, uint, u, 8, 16);
+  TEST_VLOAD(vector2, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector2, buffer, q, uint, u, 32, 4);
+
+  /* Apply a unary operator named INSN_NAME  */
+  TEST_VPADAL(INSN_NAME, , int, s, 8, 8, 16, 4);
+  TEST_VPADAL(INSN_NAME, , int, s, 16, 4, 32, 2);
+  TEST_VPADAL(INSN_NAME, , int, s, 32, 2, 64 ,1);
+  TEST_VPADAL(INSN_NAME, , uint, u, 8, 8, 16, 4);
+  TEST_VPADAL(INSN_NAME, , uint, u, 16, 4, 32, 2);
+  TEST_VPADAL(INSN_NAME, , uint, u, 32, 2, 64, 1);
+  TEST_VPADAL(INSN_NAME, q, int, s, 8, 16, 16, 8);
+  TEST_VPADAL(INSN_NAME, q, int, s, 16, 8, 32, 4);
+  TEST_VPADAL(INSN_NAME, q, int, s, 32, 4, 64 ,2);
+  TEST_VPADAL(INSN_NAME, q, uint, u, 8, 16, 16, 8);
+  TEST_VPADAL(INSN_NAME, q, uint, u, 16, 8, 32, 4);
+  TEST_VPADAL(INSN_NAME, q, uint, u, 32, 4, 64, 2);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vpadd.c b/ref_vpadd.c
new file mode 100644
index 0000000..ebd3127
--- /dev/null
+++ b/ref_vpadd.c
@@ -0,0 +1,96 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vpadd
+#define TEST_MSG "VPADD"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_VPADD1(INSN, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),				\
+		   VECT_VAR(vector, T1, W, N));				\
+  vst1##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		 VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VPADD(INSN, T1, T2, W, N)		\
+  TEST_VPADD1(INSN, T1, T2, W, N)		\
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 2);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_VLOAD(vector, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, , uint, u, 8, 8);
+  TEST_VLOAD(vector, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+
+  /* Apply a unary operator named INSN_NAME  */
+  TEST_VPADD(INSN_NAME, int, s, 8, 8);
+  TEST_VPADD(INSN_NAME, int, s, 16, 4);
+  TEST_VPADD(INSN_NAME, int, s, 32, 2);
+  TEST_VPADD(INSN_NAME, uint, u, 8, 8);
+  TEST_VPADD(INSN_NAME, uint, u, 16, 4);
+  TEST_VPADD(INSN_NAME, uint, u, 32, 2);
+  TEST_VPADD(INSN_NAME, float, f, 32, 2);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vpaddl.c b/ref_vpaddl.c
new file mode 100644
index 0000000..43ad68a
--- /dev/null
+++ b/ref_vpaddl.c
@@ -0,0 +1,113 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN_NAME vpaddl
+#define TEST_MSG "VPADDL/VPADDLQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_VPADDL1(INSN, Q, T1, T2, W, N, W2, N2)	\
+  VECT_VAR(vector_res, T1, W2, N2) =			\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));	\
+  vst1##Q##_##T2##W2(VECT_VAR(result, T1, W2, N2),	\
+		    VECT_VAR(vector_res, T1, W2, N2))
+
+#define TEST_VPADDL(INSN, Q, T1, T2, W, N, W2, N2)	\
+  TEST_VPADDL1(INSN, Q, T1, T2, W, N, W2, N2)
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, uint, 8, 16);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 1);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 1);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_VLOAD(vector, buffer, , int, s, 8, 8);
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, , uint, u, 8, 8);
+  TEST_VLOAD(vector, buffer, , uint, u, 16, 4);
+  TEST_VLOAD(vector, buffer, , uint, u, 32, 2);
+  TEST_VLOAD(vector, buffer, q, int, s, 8, 16);
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 8, 16);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+
+  /* Apply a unary operator named INSN_NAME  */
+  TEST_VPADDL(INSN_NAME, , int, s, 8, 8, 16, 4);
+  TEST_VPADDL(INSN_NAME, , int, s, 16, 4, 32, 2);
+  TEST_VPADDL(INSN_NAME, , int, s, 32, 2, 64, 1);
+  TEST_VPADDL(INSN_NAME, , uint, u, 8, 8, 16, 4);
+  TEST_VPADDL(INSN_NAME, , uint, u, 16, 4, 32, 2);
+  TEST_VPADDL(INSN_NAME, , uint, u, 32, 2, 64, 1);
+  TEST_VPADDL(INSN_NAME, q, int, s, 8, 16, 16, 8);
+  TEST_VPADDL(INSN_NAME, q, int, s, 16, 8, 32, 4);
+  TEST_VPADDL(INSN_NAME, q, int, s, 32, 4, 64, 2);
+  TEST_VPADDL(INSN_NAME, q, uint, u, 8, 16, 16, 8);
+  TEST_VPADDL(INSN_NAME, q, uint, u, 16, 8, 32, 4);
+  TEST_VPADDL(INSN_NAME, q, uint, u, 32, 4, 64, 2);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vpmax.c b/ref_vpmax.c
new file mode 100644
index 0000000..bac8d20
--- /dev/null
+++ b/ref_vpmax.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vpmax
+#define TEST_MSG "VPMAX"
+
+#include "ref_vpadd.c"
diff --git a/ref_vpmin.c b/ref_vpmin.c
new file mode 100644
index 0000000..cf0a044
--- /dev/null
+++ b/ref_vpmin.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vpmin
+#define TEST_MSG "VPMIN"
+
+#include "ref_vpadd.c"
diff --git a/ref_vqabs.c b/ref_vqabs.c
new file mode 100644
index 0000000..4a4d04e
--- /dev/null
+++ b/ref_vqabs.c
@@ -0,0 +1,73 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vqabs
+#define TEST_MSG "VQABS/VQABSQ"
+
+/* Extra tests for functions requiring corner cases tests */
+void vqabs_extra(void);
+#define EXTRA_TESTS vqabs_extra
+
+#include "ref_v_unary_sat_op.c"
+
+void vqabs_extra()
+{
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" with max negative values to check
+     saturation  */
+  TEST_VDUP(vector, , int, s, 8, 8, 0x80);
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0x80);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+
+  /* Apply a unary operator named INSN_NAME  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vqadd.c b/ref_vqadd.c
new file mode 100644
index 0000000..6c9b508
--- /dev/null
+++ b/ref_vqadd.c
@@ -0,0 +1,153 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vqadd
+#define TEST_MSG "VQADD/VQADDQ"
+
+/* Extra tests for functions requiring types larger than 64 bits to
+   compute saturation */
+void vqadd_64(void);
+#define EXTRA_TESTS vqadd_64
+
+#include "ref_v_binary_sat_op.c"
+
+void vqadd_64(void)
+{
+  int i;
+
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  /* Initialize input "vector1" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector1, buffer);
+
+  TEST_VDUP(vector2, , int, s, 64, 1, 0x0);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 0x0);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 0x0);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 0x0);
+
+  fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2);
+
+  fprintf(ref_file, "\n64 bits saturation:\n");
+  DUMP(TEST_MSG, int, 64, 1, PRIx64);
+  DUMP(TEST_MSG, uint, 64, 1, PRIx64);
+  DUMP(TEST_MSG, int, 64, 2, PRIx64);
+  DUMP(TEST_MSG, uint, 64, 2, PRIx64);
+
+  /* Another set of tests */
+  TEST_VDUP(vector2, , int, s, 64, 1, 0x44);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 0x88);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 0x44);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88);
+
+  fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2);
+
+  DUMP(TEST_MSG, int, 64, 1, PRIx64);
+  DUMP(TEST_MSG, uint, 64, 1, PRIx64);
+  DUMP(TEST_MSG, int, 64, 2, PRIx64);
+  DUMP(TEST_MSG, uint, 64, 2, PRIx64);
+
+  /* Another set of tests */
+  TEST_VDUP(vector2, , int, s, 64, 1, 0x8000000000000003LL);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 0x88);
+
+  TEST_VDUP(vector1, q, int, s, 64, 2, 0x4000000000000000LL);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 0x4000000000000000LL);
+
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 0x22);
+
+  fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2);
+
+  DUMP(TEST_MSG, int, 64, 1, PRIx64);
+  DUMP(TEST_MSG, uint, 64, 1, PRIx64);
+  DUMP(TEST_MSG, int, 64, 2, PRIx64);
+  DUMP(TEST_MSG, uint, 64, 2, PRIx64);
+
+  /* To improve coverage, check saturation with less than 64 bits too */
+  TEST_VDUP(vector2, , int, s, 8, 8, 0x81);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x8001);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x80000001);
+  TEST_VDUP(vector2, q, int, s, 8, 16, 0x81);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x8001);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000001);
+
+  fprintf(ref_file, "\nless than 64 bits saturation:\n");
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4);
+
+  DUMP(TEST_MSG, int, 8, 8, PRIx8);
+  DUMP(TEST_MSG, int, 16, 4, PRIx16);
+  DUMP(TEST_MSG, int, 32, 2, PRIx32);
+  DUMP(TEST_MSG, int, 8, 16, PRIx8);
+  DUMP(TEST_MSG, int, 16, 8, PRIx16);
+  DUMP(TEST_MSG, int, 32, 4, PRIx32);
+
+  TEST_VDUP(vector1, , uint, u, 8, 8, 0xF0);
+  TEST_VDUP(vector1, , uint, u, 16, 4, 0xFFF0);
+  TEST_VDUP(vector1, , uint, u, 32, 2, 0xFFFFFFF0);
+  TEST_VDUP(vector1, q, uint, u, 8, 16, 0xF0);
+  TEST_VDUP(vector1, q, uint, u, 16, 8, 0xFFF0);
+  TEST_VDUP(vector1, q, uint, u, 32, 4, 0xFFFFFFF0);
+
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0x20);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x20);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x20);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0x20);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0x20);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0x20);
+
+  fprintf(ref_file, "\n%s less than 64 bits saturation overflow output:\n",
+	  TEST_MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4);
+
+  DUMP(TEST_MSG, uint, 8, 8, PRIx8);
+  DUMP(TEST_MSG, uint, 16, 4, PRIx16);
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+  DUMP(TEST_MSG, uint, 8, 16, PRIx8);
+  DUMP(TEST_MSG, uint, 16, 8, PRIx16);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+}
diff --git a/ref_vqdmlal.c b/ref_vqdmlal.c
new file mode 100644
index 0000000..06d9bdc
--- /dev/null
+++ b/ref_vqdmlal.c
@@ -0,0 +1,97 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vqdmlal
+#define TEST_MSG "VQDMLAL"
+#endif
+
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* vector_res = OP(vector, vector3, vector4),
+     then store the result.  */
+#define TEST_VQDMLXL1(INSN, T1, T2, W, W2, N)		\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W, N) =			\
+    INSN##_##T2##W2(VECT_VAR(vector, T1, W, N),		\
+		    VECT_VAR(vector3, T1, W2, N),	\
+		    VECT_VAR(vector4, T1, W2, N));	\
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N),		\
+		VECT_VAR(vector_res, T1, W, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W2))
+
+#define TEST_VQDMLXL(INSN, T1, T2, W, W2, N)	\
+  TEST_VQDMLXL1(INSN, T1, T2, W, W2, N)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector4, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector4, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, int, s, 64, 2);
+
+  TEST_VDUP(vector3, , int, s, 16, 4, 0x55);
+  TEST_VDUP(vector4, , int, s, 16, 4, 0xBB);
+  TEST_VDUP(vector3, , int, s, 32, 2, 0x55);
+  TEST_VDUP(vector4, , int, s, 32, 2, 0xBB);
+
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQDMLXL(INSN_NAME, int, s, 32, 16, 4);
+  TEST_VQDMLXL(INSN_NAME, int, s, 64, 32, 2);
+  dump_results_hex (TEST_MSG);
+
+
+  TEST_VDUP(vector3, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector4, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector3, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector4, , int, s, 32, 2, 0x80000000);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check mul overflow)");
+  TEST_VQDMLXL(INSN_NAME, int, s, 32, 16, 4);
+  TEST_VQDMLXL(INSN_NAME, int, s, 64, 32, 2);
+  dump_results_hex2 (TEST_MSG, " (check mul overflow)");
+}
diff --git a/ref_vqdmlal_lane.c b/ref_vqdmlal_lane.c
new file mode 100644
index 0000000..6c13460
--- /dev/null
+++ b/ref_vqdmlal_lane.c
@@ -0,0 +1,103 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vqdmlal_lane
+#define TEST_MSG "VQDMLAL_LANE"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* vector_res = vqdmlxl_lane(vector, vector3, vector4, lane),
+     then store the result.  */
+#define TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V)	\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W, N) =			\
+    INSN##_##T2##W2(VECT_VAR(vector, T1, W, N),		\
+		    VECT_VAR(vector3, T1, W2, N),	\
+		    VECT_VAR(vector4, T1, W2, N),	\
+		    V);					\
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N),		\
+		VECT_VAR(vector_res, T1, W, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W2))
+
+#define TEST_VQDMLXL_LANE(INSN, T1, T2, W, W2, N, V)	\
+  TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector4, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector4, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, int, s, 64, 2);
+
+  TEST_VDUP(vector3, , int, s, 16, 4, 0x55);
+  TEST_VDUP(vector4, , int, s, 16, 4, 0xBB);
+  TEST_VDUP(vector3, , int, s, 32, 2, 0x55);
+  TEST_VDUP(vector4, , int, s, 32, 2, 0xBB);
+
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0);
+  TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0);
+  dump_results_hex (TEST_MSG);
+
+  TEST_VDUP(vector3, , int, s, 16, 4, 0);
+  TEST_VDUP(vector3, , int, s, 32, 2, 0);
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (mul with input=0)");
+  TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0);
+  TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0);
+  dump_results_hex2 (TEST_MSG, " (mul with input=0)");
+
+  TEST_VDUP(vector3, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector3, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector4, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector4, , int, s, 32, 2, 0x80000000);
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check mul overflow)");
+  TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0);
+  TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0);
+  dump_results_hex2 (TEST_MSG, " (check mul overflow)");
+}
diff --git a/ref_vqdmlal_n.c b/ref_vqdmlal_n.c
new file mode 100644
index 0000000..98c180e
--- /dev/null
+++ b/ref_vqdmlal_n.c
@@ -0,0 +1,91 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vqdmlal_n
+#define TEST_MSG "VQDMLAL_N"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* vector_res = vqdmlxl_n(vector, vector3, val),
+     then store the result.  */
+#define TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V)	\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W, N) =			\
+    INSN##_##T2##W2(VECT_VAR(vector, T1, W, N),		\
+		    VECT_VAR(vector3, T1, W2, N),	\
+		    V);					\
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N),		\
+		VECT_VAR(vector_res, T1, W, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W2))
+
+#define TEST_VQDMLXL_N(INSN, T1, T2, W, W2, N, V)	\
+  TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, int, s, 64, 2);
+
+  TEST_VDUP(vector3, , int, s, 16, 4, 0x55);
+  TEST_VDUP(vector3, , int, s, 32, 2, 0x55);
+
+  /* Choose val arbitrarily */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQDMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x22);
+  TEST_VQDMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x33);
+
+  dump_results_hex (TEST_MSG);
+
+  TEST_VDUP(vector3, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector3, , int, s, 32, 2, 0x80000000);
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check mul overflow)");
+  TEST_VQDMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x8000);
+  TEST_VQDMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x80000000);
+  dump_results_hex2 (TEST_MSG, " (check mul overflow)");
+}
diff --git a/ref_vqdmlsl.c b/ref_vqdmlsl.c
new file mode 100644
index 0000000..ceb0b6b
--- /dev/null
+++ b/ref_vqdmlsl.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vqdmlsl
+#define TEST_MSG "VQDMLSL"
+
+#include "ref_vqdmlal.c"
diff --git a/ref_vqdmlsl_lane.c b/ref_vqdmlsl_lane.c
new file mode 100644
index 0000000..7724d98
--- /dev/null
+++ b/ref_vqdmlsl_lane.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vqdmlsl_lane
+#define TEST_MSG "VQDMLSL_LANE"
+
+#include "ref_vqdmlal_lane.c"
diff --git a/ref_vqdmlsl_n.c b/ref_vqdmlsl_n.c
new file mode 100644
index 0000000..c6f8818
--- /dev/null
+++ b/ref_vqdmlsl_n.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vqdmlsl_n
+#define TEST_MSG "VQDMLSL_N"
+
+#include "ref_vqdmlal_n.c"
diff --git a/ref_vqdmulh.c b/ref_vqdmulh.c
new file mode 100644
index 0000000..84903c5
--- /dev/null
+++ b/ref_vqdmulh.c
@@ -0,0 +1,114 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqdmulh
+#define TEST_MSG "VQDMULH"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* vector_res = vqdmulh(vector,vector2,lane), then store the result.  */
+#define TEST_VQDMULH2(INSN, Q, T1, T2, W, N)		\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W, N) =			\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),	\
+		      VECT_VAR(vector2, T1, W, N));	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
+		    VECT_VAR(vector_res, T1, W, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQDMULH1(INSN, Q, T1, T2, W, N)	\
+  TEST_VQDMULH2(INSN, Q, T1, T2, W, N)
+
+#define TEST_VQDMULH(Q, T1, T2, W, N)		\
+  TEST_VQDMULH1(INSN, Q, T1, T2, W, N)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 8);
+  DECL_VARIABLE(vector2, int, 32, 4);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Initialize vector2 */
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x55);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0xBB);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x33);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x22);
+
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQDMULH(, int, s, 16, 4);
+  TEST_VQDMULH(, int, s, 32, 2);
+  TEST_VQDMULH(q, int, s, 16, 8);
+  TEST_VQDMULH(q, int, s, 32, 4);
+
+  /* FIXME: only a subset of the result buffers are used, but we
+     output all of them */
+  dump_results_hex (TEST_MSG);
+
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000000);
+
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQDMULH(, int, s, 16, 4);
+  TEST_VQDMULH(, int, s, 32, 2);
+  TEST_VQDMULH(q, int, s, 16, 8);
+  TEST_VQDMULH(q, int, s, 32, 4);
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vqdmulh_lane.c b/ref_vqdmulh_lane.c
new file mode 100644
index 0000000..feda86a
--- /dev/null
+++ b/ref_vqdmulh_lane.c
@@ -0,0 +1,115 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqdmulh
+#define TEST_MSG "VQDMULH_LANE"
+#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* vector_res = vqdmulh_lane(vector,vector2,lane), then store the result.  */
+#define TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L)	\
+  Neon_Overflow = 0;						\
+  VECT_VAR(vector_res, T1, W, N) =				\
+    INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N),		\
+			   VECT_VAR(vector2, T1, W, N2),	\
+			   L);					\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),			\
+		    VECT_VAR(vector_res, T1, W, N));		\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_lane_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L)	\
+  TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L)
+
+#define TEST_VQDMULH_LANE(Q, T1, T2, W, N, N2, L)	\
+  TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  /* vector2: vqdmulh_lane and vqdmulhq_lane have a 2nd argument with
+     the same number of elements, so we need only one variable of each
+     type.  */
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Initialize vector2 */
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x55);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0xBB);
+
+  /* Choose lane arbitrarily */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 2);
+  TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1);
+  TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 3);
+  TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 0);
+
+  /* FIXME: only a subset of the result buffers are used, but we
+     output all of them */
+  dump_results_hex (TEST_MSG);
+
+
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check mul overflow)");
+  TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 3);
+  TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1);
+  TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 2);
+  TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 1);
+  dump_results_hex2 (TEST_MSG, " (check mul overflow)");
+}
diff --git a/ref_vqdmulh_n.c b/ref_vqdmulh_n.c
new file mode 100644
index 0000000..785d304
--- /dev/null
+++ b/ref_vqdmulh_n.c
@@ -0,0 +1,108 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqdmulh
+#define TEST_MSG "VQDMULH_N"
+#define FNNAME1(NAME) void exec_ ## NAME ## _n (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  int i;
+
+  /* vector_res = vqdmulh_n(vector,val), then store the result.  */
+#define TEST_VQDMULH_N2(INSN, Q, T1, T2, W, N, L)	\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W, N) =			\
+    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),	\
+			L);				\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
+		    VECT_VAR(vector_res, T1, W, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_n_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQDMULH_N1(INSN, Q, T1, T2, W, N, L)	\
+  TEST_VQDMULH_N2(INSN, Q, T1, T2, W, N, L)
+
+#define TEST_VQDMULH_N(Q, T1, T2, W, N, L)	\
+  TEST_VQDMULH_N1(INSN, Q, T1, T2, W, N, L)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+
+  /* Initialize vector */
+  TEST_VDUP(vector, , int, s, 16, 4, 0x1000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x100023);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x1000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x100045);
+
+  /* Choose multiplier arbitrarily */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQDMULH_N(, int, s, 16, 4, 0xCF);
+  TEST_VQDMULH_N(, int, s, 32, 2, 0x2344);
+  TEST_VQDMULH_N(q, int, s, 16, 8, 0x80);
+  TEST_VQDMULH_N(q, int, s, 32, 4, 0x5422);
+
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG);
+  DUMP(TEST_MSG, int, 16, 4, PRIx16);
+  DUMP(TEST_MSG, int, 32, 2, PRIx32);
+  DUMP(TEST_MSG, int, 16, 8, PRIx16);
+  DUMP(TEST_MSG, int, 32, 4, PRIx32);
+
+
+
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check mul overflow)");
+  TEST_VQDMULH_N(, int, s, 16, 4, 0x8000);
+  TEST_VQDMULH_N(, int, s, 32, 2, 0x80000000);
+  TEST_VQDMULH_N(q, int, s, 16, 8, 0x8000);
+  TEST_VQDMULH_N(q, int, s, 32, 4, 0x80000000);
+  dump_results_hex2 (TEST_MSG, " (check mul overflow)");
+}
diff --git a/ref_vqdmull.c b/ref_vqdmull.c
new file mode 100644
index 0000000..d19794d
--- /dev/null
+++ b/ref_vqdmull.c
@@ -0,0 +1,92 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqdmull
+#define TEST_MSG "VQDMULL"
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* Basic test: y=vqdmull(x,x), then store the result.  */
+#define TEST_VQDMULL2(INSN, T1, T2, W, W2, N)		\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W2, N) =			\
+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),		\
+		   VECT_VAR(vector2, T1, W, N));	\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N),		\
+		 VECT_VAR(vector_res, T1, W2, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQDMULL1(INSN, T1, T2, W, W2, N)	\
+  TEST_VQDMULL2(INSN, T1, T2, W, W2, N)
+
+#define TEST_VQDMULL(T1, T2, W, W2, N)		\
+  TEST_VQDMULL1(INSN, T1, T2, W, W2, N)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector2, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector2, buffer, , int, s, 32, 2);
+
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQDMULL(int, s, 16, 32, 4);
+  TEST_VQDMULL(int, s, 32, 64, 2);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+
+
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000);
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check mul overflow)");
+  TEST_VQDMULL(int, s, 16, 32, 4);
+  TEST_VQDMULL(int, s, 32, 64, 2);
+  dump_results_hex2 (TEST_MSG, " (check mul overflow)");
+}
diff --git a/ref_vqdmull_lane.c b/ref_vqdmull_lane.c
new file mode 100644
index 0000000..0e67f54
--- /dev/null
+++ b/ref_vqdmull_lane.c
@@ -0,0 +1,105 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqdmull
+#define TEST_MSG "VQDMULL_LANE"
+#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  int i;
+
+  /* vector_res = vqdmull_lane(vector,vector2,lane), then store the result.  */
+#define TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L)			\
+  Neon_Overflow = 0;							\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    INSN##_lane_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			VECT_VAR(vector2, T1, W, N),			\
+			L);						\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N),				\
+		 VECT_VAR(vector_res, T1, W2, N));			\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##_lane_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQDMULL_LANE1(INSN, T1, T2, W, W2, N, L)	\
+  TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L)
+
+#define TEST_VQDMULL_LANE(T1, T2, W, W2, N, L)	\
+  TEST_VQDMULL_LANE1(INSN, T1, T2, W, W2, N, L)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  /* Initialize vector */
+  TEST_VDUP(vector, , int, s, 16, 4, 0x1000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x1000);
+
+  /* Initialize vector2 */
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x4);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x2);
+
+  /* Choose lane arbitrarily */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2);
+  TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1);
+
+
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG);
+  DUMP(TEST_MSG, int, 32, 4, PRIx32);
+  DUMP(TEST_MSG, int, 64, 2, PRIx64);
+
+
+
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000);
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (check mul overflow)");
+  TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2);
+  TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1);
+
+  fprintf (ref_file, "\n%s output:\n", TEST_MSG " (check mul overflow)");
+  DUMP(TEST_MSG, int, 32, 4, PRIx32);
+  DUMP(TEST_MSG, int, 64, 2, PRIx64);
+}
diff --git a/ref_vqdmull_n.c b/ref_vqdmull_n.c
new file mode 100644
index 0000000..13ce1a2
--- /dev/null
+++ b/ref_vqdmull_n.c
@@ -0,0 +1,101 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqdmull
+#define TEST_MSG "VQDMULL_N"
+#define FNNAME1(NAME) void exec_ ## NAME ## _n (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  int i;
+
+  /* vector_res = vqdmull_n(vector,val), then store the result.  */
+#define TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L)	\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W2, N) =			\
+    INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N),	\
+		     L);				\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N),		\
+		 VECT_VAR(vector_res, T1, W2, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##_n_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQDMULL_N1(INSN, T1, T2, W, W2, N, L)	\
+  TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L)
+
+#define TEST_VQDMULL_N(T1, T2, W, W2, N, L)	\
+  TEST_VQDMULL_N1(INSN, T1, T2, W, W2, N, L)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  /* Initialize vector */
+  TEST_VDUP(vector, , int, s, 16, 4, 0x1000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x1000);
+
+  /* Initialize vector2 */
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x4);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x2);
+
+  /* Choose multiplier arbitrarily */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQDMULL_N(int, s, 16, 32, 4, 0x22);
+  TEST_VQDMULL_N(int, s, 32, 64, 2, 0x55);
+
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG);
+  DUMP(TEST_MSG, int, 32, 4, PRIx32);
+  DUMP(TEST_MSG, int, 64, 2, PRIx64);
+
+
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check mul overflow)");
+  TEST_VQDMULL_N(int, s, 16, 32, 4, 0x8000);
+  TEST_VQDMULL_N(int, s, 32, 64, 2, 0x80000000);
+
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check mul overflow)");
+  DUMP(TEST_MSG, int, 32, 4, PRIx32);
+  DUMP(TEST_MSG, int, 64, 2, PRIx64);
+}
diff --git a/ref_vqmovn.c b/ref_vqmovn.c
new file mode 100644
index 0000000..c70c034
--- /dev/null
+++ b/ref_vqmovn.c
@@ -0,0 +1,112 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN_NAME vqmovn
+#define TEST_MSG "VQMOVN"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N)				\
+  Neon_Overflow = 0;							\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##_##T2##W2(VECT_VAR(vector, T1, W2, N));			\
+  vst1##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		 VECT_VAR(vector_res, T1, W, N));			\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W2))
+
+#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N)				\
+  TEST_UNARY_OP1(INSN, T1, T2, W, W2, N)				\
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, uint, 64, 2);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+
+  clean_results ();
+
+  /* Fill input vector with arbitrary values */
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x12);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x1278);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x12345678);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0x82);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0x8765);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0x87654321);
+
+  /* Apply a unary operator named INSN_NAME  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2);
+  TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2);
+
+  dump_results_hex (TEST_MSG);
+
+
+  /* Fill input vector with arbitrary values which cause an overflow */
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x1234);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x12345678);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x1234567890ABLL);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0x8234);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0x87654321);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0x8765432187654321ULL);
+
+  /* Apply a unary operator named INSN_NAME  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2);
+  TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vqmovun.c b/ref_vqmovun.c
new file mode 100644
index 0000000..729d6c4
--- /dev/null
+++ b/ref_vqmovun.c
@@ -0,0 +1,93 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN_NAME vqmovun
+#define TEST_MSG "VQMOVUN"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N)				\
+  Neon_Overflow = 0;							\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##_s##W2(VECT_VAR(vector, int, W2, N));				\
+  vst1##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		 VECT_VAR(vector_res, T1, W, N));			\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##_s##W2))
+
+#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N)	\
+  TEST_UNARY_OP1(INSN, T1, T2, W, W2, N)	\
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+
+  clean_results ();
+
+  /* Fill input vector with arbitrary values */
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x34);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x5678);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x12345678);
+
+  /* Apply a unary operator named INSN_NAME  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2);
+
+  dump_results_hex (TEST_MSG);
+
+  /* Fill input vector with negative values */
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8234);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x87654321);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x8765432187654321LL);
+
+  /* Apply a unary operator named INSN_NAME  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (negative input)");
+  TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2);
+
+  dump_results_hex2 (TEST_MSG, " (negative input)");
+}
diff --git a/ref_vqneg.c b/ref_vqneg.c
new file mode 100644
index 0000000..4ae0af8
--- /dev/null
+++ b/ref_vqneg.c
@@ -0,0 +1,73 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vqneg
+#define TEST_MSG "VQNEG/VQNEGQ"
+
+/* Extra tests for functions requiring corner cases tests */
+void vqneg_extra(void);
+#define EXTRA_TESTS vqneg_extra
+
+#include "ref_v_unary_sat_op.c"
+
+void vqneg_extra()
+{
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" with max negative values to check
+     saturation  */
+  TEST_VDUP(vector, , int, s, 8, 8, 0x80);
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0x80);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+
+  /* Apply a unary operator named INSN_NAME  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vqrdmulh.c b/ref_vqrdmulh.c
new file mode 100644
index 0000000..3c9ea4d
--- /dev/null
+++ b/ref_vqrdmulh.c
@@ -0,0 +1,134 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqrdmulh
+#define TEST_MSG "VQRDMULH"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* vector_res = vqrdmulh(vector,vector2), then store the result.  */
+#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N)		\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W, N) =			\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),	\
+		      VECT_VAR(vector2, T1, W, N));	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
+		    VECT_VAR(vector_res, T1, W, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQRDMULH1(INSN, Q, T1, T2, W, N)	\
+  TEST_VQRDMULH2(INSN, Q, T1, T2, W, N)
+
+#define TEST_VQRDMULH(Q, T1, T2, W, N)		\
+  TEST_VQRDMULH1(INSN, Q, T1, T2, W, N)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 8);
+  DECL_VARIABLE(vector2, int, 32, 4);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Initialize vector2 */
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x5555);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0xBB);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x33);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x22);
+
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQRDMULH(, int, s, 16, 4);
+  TEST_VQRDMULH(, int, s, 32, 2);
+  TEST_VQRDMULH(q, int, s, 16, 8);
+  TEST_VQRDMULH(q, int, s, 32, 4);
+
+  /* FIXME: only a subset of the result buffers are used, but we
+     output all of them */
+  dump_results_hex (TEST_MSG);
+
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000000);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check mul overflow)");
+  TEST_VQRDMULH(, int, s, 16, 4);
+  TEST_VQRDMULH(, int, s, 32, 2);
+  TEST_VQRDMULH(q, int, s, 16, 8);
+  TEST_VQRDMULH(q, int, s, 32, 4);
+  dump_results_hex2 (TEST_MSG, " (check mul overflow)");
+
+
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x8001);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x80000001);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x8001);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000001);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check rounding overflow)");
+  TEST_VQRDMULH(, int, s, 16, 4);
+  TEST_VQRDMULH(, int, s, 32, 2);
+  TEST_VQRDMULH(q, int, s, 16, 8);
+  TEST_VQRDMULH(q, int, s, 32, 4);
+  dump_results_hex2 (TEST_MSG, " (check rounding overflow)");
+}
diff --git a/ref_vqrdmulh_lane.c b/ref_vqrdmulh_lane.c
new file mode 100644
index 0000000..d26eda4
--- /dev/null
+++ b/ref_vqrdmulh_lane.c
@@ -0,0 +1,133 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqrdmulh
+#define TEST_MSG "VQRDMULH_LANE"
+
+#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* vector_res = vqrdmulh_lane(vector,vector2,lane), then store the result.  */
+#define TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L)	\
+  Neon_Overflow = 0;						\
+  VECT_VAR(vector_res, T1, W, N) =				\
+    INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N),		\
+			   VECT_VAR(vector2, T1, W, N2),	\
+			   L);					\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),			\
+		    VECT_VAR(vector_res, T1, W, N));		\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_lane_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L)	\
+  TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L)
+
+#define TEST_VQRDMULH_LANE(Q, T1, T2, W, N, N2, L)	\
+  TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  /* vector2: vqrdmulh_lane and vqrdmulhq_lane have a 2nd argument with
+     the same number of elements, so we need only one variable of each
+     type.  */
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Initialize vector2 */
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x55);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0xBB);
+
+  /* Choose lane arbitrarily */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2);
+  TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1);
+  TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3);
+  TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0);
+
+  /* FIXME: only a subset of the result buffers are used, but we
+     output all of them */
+  dump_results_hex (TEST_MSG);
+
+
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check mul overflow)");
+  TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2);
+  TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1);
+  TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3);
+  TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0);
+  dump_results_hex2 (TEST_MSG, " (check mul overflow)");
+
+
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x8001);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x80000001);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check rounding overflow)");
+  TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2);
+  TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1);
+  TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3);
+  TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0);
+  dump_results_hex2 (TEST_MSG, " (check rounding overflow)");
+}
diff --git a/ref_vqrdmulh_n.c b/ref_vqrdmulh_n.c
new file mode 100644
index 0000000..c780f21
--- /dev/null
+++ b/ref_vqrdmulh_n.c
@@ -0,0 +1,122 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqrdmulh
+#define TEST_MSG "VQRDMULH_N"
+
+#define FNNAME1(NAME) void exec_ ## NAME ## _n (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  int i;
+
+  /* vector_res = vqrdmulh_n(vector,val), then store the result.  */
+#define TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L)	\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W, N) =			\
+    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),	\
+			L);				\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
+		    VECT_VAR(vector_res, T1, W, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_n_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L)	\
+  TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L)
+
+#define TEST_VQRDMULH_N(Q, T1, T2, W, N, L)	\
+  TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, , int, s, 16, 4);
+  TEST_VLOAD(vector, buffer, , int, s, 32, 2);
+
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Choose multiplier arbitrarily */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQRDMULH_N(, int, s, 16, 4, 0x2233);
+  TEST_VQRDMULH_N(, int, s, 32, 2, 0x12345678);
+  TEST_VQRDMULH_N(q, int, s, 16, 8, 0xCD12);
+  TEST_VQRDMULH_N(q, int, s, 32, 4, 0xFA23456);
+
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG);
+  DUMP(TEST_MSG, int, 16, 4, PRIx16);
+  DUMP(TEST_MSG, int, 32, 2, PRIx32);
+  DUMP(TEST_MSG, int, 16, 8, PRIx16);
+  DUMP(TEST_MSG, int, 32, 4, PRIx32);
+
+
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check mul overflow)");
+  TEST_VQRDMULH_N(, int, s, 16, 4, 0x8000);
+  TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000000);
+  TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8000);
+  TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000000);
+  dump_results_hex2 (TEST_MSG, " (check mul overflow)");
+
+
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check rounding overflow)");
+  TEST_VQRDMULH_N(, int, s, 16, 4, 0x8001);
+  TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000001);
+  TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8001);
+  TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000001);
+  dump_results_hex2 (TEST_MSG, " (check rounding overflow)");
+}
diff --git a/ref_vqrshl.c b/ref_vqrshl.c
new file mode 100644
index 0000000..b266e71
--- /dev/null
+++ b/ref_vqrshl.c
@@ -0,0 +1,199 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqrshl
+#define TEST_MSG "VQRSHL/VQRSHLQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* Basic test: v3=vqrshl(v1,v2), then store the result.  */
+#define TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N)			\
+  Neon_Overflow = 0;						\
+  VECT_VAR(vector_res, T1, W, N) =				\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),		\
+		      VECT_VAR(vector_shift, T3, W, N));	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),			\
+		    VECT_VAR(vector_res, T1, W, N));		\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N)	\
+  TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N)
+
+#define TEST_VQRSHL(T3, Q, T1, T2, W, N)	\
+  TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
+
+  clean_results ();
+
+  /* Fill input vector with 0, to check saturation on limits */
+  TEST_VDUP(vector, , int, s, 8, 8, 0);
+  TEST_VDUP(vector, , int, s, 16, 4, 0);
+  TEST_VDUP(vector, , int, s, 32, 2, 0);
+  TEST_VDUP(vector, , int, s, 64, 1, 0);
+  TEST_VDUP(vector, , uint, u, 8, 8, 0);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0);
+  TEST_VDUP(vector, , uint, u, 64, 1, 0);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0);
+  TEST_VDUP(vector, q, uint, u, 8, 16, 0);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0);
+
+  /* Choose init value arbitrarily, will be used as shift amount */
+  /* Use values equal or one-less-than the type width to check
+     behaviour on limits */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, 7);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, 15);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, 31);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, 63);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, 8);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, 16);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, 32);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, 64);
+
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (with input = 0)");
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int);
+  dump_results_hex2 (TEST_MSG, " (with input = 0)");
+
+  /* Use negative shift amounts */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, -1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, -2);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, -3);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -4);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, -7);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, -11);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, -13);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, -20);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (input 0 and negative shift amount)");
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int);
+  dump_results_hex2 (TEST_MSG, " (input 0 and negative shift amount)");
+
+  /* Test again, with predefined input values */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Choose init value arbitrarily, will be used as shift amount */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, 1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, 3);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, 8);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, 3);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, 10);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, 12);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, 31);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, 63);
+
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int);
+  dump_results_hex (TEST_MSG);
+
+  /* Use negative shift amounts */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, -2);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, -2);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, -3);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -4);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, -7);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, -11);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, -13);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, -20);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (negative shift amount)");
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int);
+  dump_results_hex2 (TEST_MSG, " (negative shift amount)");
+
+
+  /* Fill input vector with max value, to check saturation on limits */
+  TEST_VDUP(vector, , int, s, 8, 8, 0x7F);
+  TEST_VDUP(vector, , int, s, 16, 4, 0x7FFF);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
+  TEST_VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, , uint, u, 8, 8, 0xFF);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0xFFFF);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
+  TEST_VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0x7F);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, q, uint, u, 8, 16, 0xFF);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
+
+  /* Use -1 shift amount to check overflow with round_const */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, -1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, -1);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, -1);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -1);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, -1);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, -1);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, -1);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, -1);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (checking overflow: shift by -1)");
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int);
+  dump_results_hex2 (TEST_MSG, " (checking overflow: shift by -1)");
+
+
+  /* Use -3 shift amount to check overflow with round_const */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, -3);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, -3);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, -3);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -3);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, -3);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, -3);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, -3);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, -3);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (checking overflow: shift by -3)");
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQRSHL, int);
+  dump_results_hex2 (TEST_MSG, " (checking overflow: shift by -3)");
+}
diff --git a/ref_vqrshrn_n.c b/ref_vqrshrn_n.c
new file mode 100644
index 0000000..f8eb4ec
--- /dev/null
+++ b/ref_vqrshrn_n.c
@@ -0,0 +1,133 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqrshrn_n
+#define TEST_MSG "VQRSHRN_N"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* Basic test: y=vqrshrn_n(x,v), then store the result.  */
+#define TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V)	\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W2, N) =			\
+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),		\
+		   V);					\
+  vst1_##T2##W2(VECT_VAR(result, T1, W2, N),		\
+		VECT_VAR(vector_res, T1, W2, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V)	\
+  TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V)
+
+#define TEST_VQRSHRN_N(T1, T2, W, W2, N, V)	\
+  TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  /* vector is twice as large as vector_res */
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, uint, 64, 2);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, int, s, 64, 2);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  /* Choose shift amount arbitrarily  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQRSHRN_N(int, s, 16, 8, 8, 1);
+  TEST_VQRSHRN_N(int, s, 32, 16, 4, 1);
+  TEST_VQRSHRN_N(int, s, 64, 32, 2, 2);
+  TEST_VQRSHRN_N(uint, u, 16, 8, 8, 2);
+  TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3);
+  TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+
+  /* Another set of tests */
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
+
+  /* shift by 3 to exercise saturation code in the lib  */
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check saturation: shift by 3)");
+  TEST_VQRSHRN_N(int, s, 16, 8, 8, 3);
+  TEST_VQRSHRN_N(int, s, 32, 16, 4, 3);
+  TEST_VQRSHRN_N(int, s, 64, 32, 2, 3);
+  TEST_VQRSHRN_N(uint, u, 16, 8, 8, 3);
+  TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3);
+  TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (check saturation: shift by 3)");
+
+
+  /* shift by max to exercise saturation code in the lib  */
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check saturation: shift by max)");
+  TEST_VQRSHRN_N(int, s, 16, 8, 8, 8);
+  TEST_VQRSHRN_N(int, s, 32, 16, 4, 16);
+  TEST_VQRSHRN_N(int, s, 64, 32, 2, 32);
+  TEST_VQRSHRN_N(uint, u, 16, 8, 8, 8);
+  TEST_VQRSHRN_N(uint, u, 32, 16, 4, 16);
+  TEST_VQRSHRN_N(uint, u, 64, 32, 2, 32);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (check saturation: shift by max)");
+}
diff --git a/ref_vqrshrun_n.c b/ref_vqrshrun_n.c
new file mode 100644
index 0000000..48ccbff
--- /dev/null
+++ b/ref_vqrshrun_n.c
@@ -0,0 +1,138 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqrshrun_n
+#define TEST_MSG "VQRSHRUN_N"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* Basic test: y=vqrshrun_n(x,v), then store the result.  */
+#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V)	\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, uint, W2, N) =			\
+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),		\
+		   V);					\
+  vst1_u##W2(VECT_VAR(result, uint, W2, N),		\
+	     VECT_VAR(vector_res, uint, W2, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V)	\
+  TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V)
+
+#define TEST_VQRSHRUN_N(T1, T2, W, W2, N, V)	\
+  TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  /* vector is twice as large as vector_res */
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+
+  clean_results ();
+
+  /* Fill input vector with negative values, to check saturation on limits */
+  TEST_VDUP(vector, q, int, s, 16, 8, -2);
+  TEST_VDUP(vector, q, int, s, 32, 4, -3);
+  TEST_VDUP(vector, q, int, s, 64, 2, -4);
+
+  /* Choose shift amount arbitrarily  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (negative input)");
+  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 3);
+  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 4);
+  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 2);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (negative input)");
+
+  /* Fill input vector with max value, to check saturation on limits */
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
+
+  /* shift by 1  */
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check saturation/overflow: shift by 1)");
+  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 1);
+  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 1);
+  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 1);
+
+  dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by 1)");
+
+  /* shift by max  */
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check saturation/overflow: shift by max, positive input)");
+  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8);
+  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16);
+  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32);
+
+  dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by max, positive input)");
+
+
+  /* Fill input vector with min value, to check saturation on limits */
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);
+
+  /* shift by max  */
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check saturation/overflow: shift by max, negative input)");
+  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8);
+  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16);
+  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32);
+
+  dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by max, negative input)");
+
+  /* Fill input vector with positive values, to check normal case */
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x1234);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x87654321);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF);
+
+  /* shift arbitrary amount  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQRSHRUN_N(int, s, 16, 8, 8, 6);
+  TEST_VQRSHRUN_N(int, s, 32, 16, 4, 7);
+  TEST_VQRSHRUN_N(int, s, 64, 32, 2, 8);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vqshl.c b/ref_vqshl.c
new file mode 100644
index 0000000..1b40c92
--- /dev/null
+++ b/ref_vqshl.c
@@ -0,0 +1,239 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqshl
+#define TEST_MSG "VQSHL/VQSHLQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* Basic test: v3=vqshl(v1,v2), then store the result.  */
+#define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N)			\
+  Neon_Overflow = 0;						\
+  VECT_VAR(vector_res, T1, W, N) =				\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),		\
+		      VECT_VAR(vector_shift, T3, W, N));	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),			\
+		    VECT_VAR(vector_res, T1, W, N));		\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N)	\
+  TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N)
+
+#define TEST_VQSHL(T3, Q, T1, T2, W, N)		\
+  TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
+
+  clean_results ();
+
+  /* Fill input vector with 0, to check saturation on limits */
+  TEST_VDUP(vector, , int, s, 8, 8, 0);
+  TEST_VDUP(vector, , int, s, 16, 4, 0);
+  TEST_VDUP(vector, , int, s, 32, 2, 0);
+  TEST_VDUP(vector, , int, s, 64, 1, 0);
+  TEST_VDUP(vector, , uint, u, 8, 8, 0);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0);
+  TEST_VDUP(vector, , uint, u, 64, 1, 0);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0);
+  TEST_VDUP(vector, q, uint, u, 8, 16, 0);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0);
+
+  /* Choose init value arbitrarily, will be used as shift amount */
+  /* Use values equal or one-less-than the type width to check
+     behaviour on limits */
+  /* Shift all lanes by 7 ... */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, 7);
+  /* except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9) */
+  TEST_VSET_LANE(vector_shift, , int, s, 8, 8, 0, 6);
+  TEST_VSET_LANE(vector_shift, , int, s, 8, 8, 1, 8);
+  TEST_VSET_LANE(vector_shift, , int, s, 8, 8, 2, 9);
+
+  TEST_VDUP(vector_shift, , int, s, 16, 4, 15);
+  TEST_VSET_LANE(vector_shift, , int, s, 16, 4, 0, 14);
+  TEST_VSET_LANE(vector_shift, , int, s, 16, 4, 1, 16);
+  TEST_VSET_LANE(vector_shift, , int, s, 16, 4, 2, 17);
+
+  TEST_VDUP(vector_shift, , int, s, 32, 2, 31);
+  TEST_VSET_LANE(vector_shift, , int, s, 32, 2, 1, 30);
+
+  TEST_VDUP(vector_shift, , int, s, 64, 1, 63);
+
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, 8);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, 16);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, 32);
+  TEST_VSET_LANE(vector_shift, q, int, s, 32, 4, 1, 33);
+
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, 64);
+  TEST_VSET_LANE(vector_shift, q, int, s, 64, 2, 1, 62);
+
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (with input = 0)");
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int);
+  dump_results_hex2 (TEST_MSG, " (with input = 0)");
+
+
+  /* Use negative shift amounts */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, -1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, -2);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, -3);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -4);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, -7);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, -11);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, -13);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, -20);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (input 0 and negative shift amount)");
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int);
+  dump_results_hex2 (TEST_MSG, " (input 0 and negative shift amount)");
+
+  /* Test again, with predefined input values */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Choose init value arbitrarily, will be used as shift amount */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, 1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, 3);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, 8);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -3);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, 10);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, 12);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, 32);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, 63);
+
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int);
+  dump_results_hex (TEST_MSG);
+
+  /* Use negative shift amounts */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, -1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, -2);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, -3);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -4);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, -7);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, -11);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, -13);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, -20);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (negative shift amount)");
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int);
+  dump_results_hex2 (TEST_MSG, " (negative shift amount)");
+
+  /* Use large shift amounts */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, 8);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, 16);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, 32);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, 64);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, 8);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, 16);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, 32);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, 64);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (large shift amount, negative input)");
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int);
+  dump_results_hex2 (TEST_MSG, " (large shift amount, negative input)");
+
+  /* Fill input vector with max value, to check saturation on limits */
+  TEST_VDUP(vector, , int, s, 8, 8, 0x7F);
+  TEST_VDUP(vector, , int, s, 16, 4, 0x7FFF);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
+  TEST_VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, , uint, u, 8, 8, 0xFF);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0xFFFF);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
+  TEST_VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0x7F);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, q, uint, u, 8, 16, 0xFF);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
+
+  /* Shift by -1 */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, -1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, -1);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, -1);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -1);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, -1);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, -1);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, -1);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, -1);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check saturation/overflow)");
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int);
+  dump_results_hex2 (TEST_MSG, " (check saturation/overflow)");
+
+  /* Use large shift amounts */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, 8);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, 16);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, 32);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, 64);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, 8);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, 16);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, 32);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, 64);
+
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (large shift amount, positive input)");
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int);
+  dump_results_hex2 (TEST_MSG, " (large shift amount, positive input)");
+
+  /* Check 64 bits saturation */
+  TEST_VDUP(vector, , int, s, 64, 1, -10);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, 64);
+  TEST_VDUP(vector, q, int, s, 64, 2, 10);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, 64);
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check saturation on 64 bits)");
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VQSHL, int);
+  dump_results_hex2 (TEST_MSG, " (check saturation on 64 bits)");
+}
diff --git a/ref_vqshl_n.c b/ref_vqshl_n.c
new file mode 100644
index 0000000..799a773
--- /dev/null
+++ b/ref_vqshl_n.c
@@ -0,0 +1,130 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqshl
+#define TEST_MSG "VQSHL_N/VQSHLQ_N"
+
+#define FNNAME1(NAME) void exec_ ## NAME ##_n (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* Basic test: v2=vqshl_n(v1,v), then store the result.  */
+#define TEST_VQSHL_N2(INSN, Q, T1, T2, W, N, V)		\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W, N) =			\
+    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),	\
+			V);				\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
+		    VECT_VAR(vector_res, T1, W, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_n_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N)	\
+  TEST_VQSHL_N2(INSN, T3, Q, T1, T2, W, N)
+
+#define TEST_VQSHL_N(T3, Q, T1, T2, W, N)	\
+  TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Choose shift amount arbitrarily  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQSHL_N(, int, s, 8, 8, 2);
+  TEST_VQSHL_N(, int, s, 16, 4, 1);
+  TEST_VQSHL_N(, int, s, 32, 2, 1);
+  TEST_VQSHL_N(, int, s, 64, 1, 2);
+  TEST_VQSHL_N(, uint, u, 8, 8, 3);
+  TEST_VQSHL_N(, uint, u, 16, 4, 2);
+  TEST_VQSHL_N(, uint, u, 32, 2, 3);
+  TEST_VQSHL_N(, uint, u, 64, 1, 3);
+
+  TEST_VQSHL_N(q, int, s, 8, 16, 2);
+  TEST_VQSHL_N(q, int, s, 16, 8, 1);
+  TEST_VQSHL_N(q, int, s, 32, 4, 1);
+  TEST_VQSHL_N(q, int, s, 64, 2, 2);
+  TEST_VQSHL_N(q, uint, u, 8, 16, 3);
+  TEST_VQSHL_N(q, uint, u, 16, 8, 2);
+  TEST_VQSHL_N(q, uint, u, 32, 4, 3);
+  TEST_VQSHL_N(q, uint, u, 64, 2, 3);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+
+
+  /* Fill input vector with max value, to check saturation on limits */
+  TEST_VDUP(vector, , int, s, 8, 8, 0x7F);
+  TEST_VDUP(vector, , int, s, 16, 4, 0x7FFF);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
+  TEST_VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, , uint, u, 8, 8, 0xFF);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0xFFFF);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
+  TEST_VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0x7F);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, q, uint, u, 8, 16, 0xFF);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
+
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (check saturation with large positive input)");
+  TEST_VQSHL_N(, int, s, 8, 8, 2);
+  TEST_VQSHL_N(, int, s, 16, 4, 1);
+  TEST_VQSHL_N(, int, s, 32, 2, 1);
+  TEST_VQSHL_N(, int, s, 64, 1, 2);
+  TEST_VQSHL_N(, uint, u, 8, 8, 3);
+  TEST_VQSHL_N(, uint, u, 16, 4, 2);
+  TEST_VQSHL_N(, uint, u, 32, 2, 3);
+  TEST_VQSHL_N(, uint, u, 64, 1, 3);
+
+  TEST_VQSHL_N(q, int, s, 8, 16, 2);
+  TEST_VQSHL_N(q, int, s, 16, 8, 1);
+  TEST_VQSHL_N(q, int, s, 32, 4, 1);
+  TEST_VQSHL_N(q, int, s, 64, 2, 2);
+  TEST_VQSHL_N(q, uint, u, 8, 16, 3);
+  TEST_VQSHL_N(q, uint, u, 16, 8, 2);
+  TEST_VQSHL_N(q, uint, u, 32, 4, 3);
+  TEST_VQSHL_N(q, uint, u, 64, 2, 3);
+
+  dump_results_hex2 (TEST_MSG, " (check saturation with large positive input)");
+}
diff --git a/ref_vqshlu_n.c b/ref_vqshlu_n.c
new file mode 100644
index 0000000..d7a2a4c
--- /dev/null
+++ b/ref_vqshlu_n.c
@@ -0,0 +1,155 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqshlu
+#define TEST_MSG "VQSHLU_N/VQSHLUQ_N"
+
+#define FNNAME1(NAME) void exec_ ## NAME ## _n(void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* Basic test: v2=vqshlu_n(v1,v), then store the result.  */
+#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V)	\
+  Neon_Overflow = 0;						\
+  VECT_VAR(vector_res, T3, W, N) =				\
+    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),		\
+			V);					\
+  vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N),			\
+		    VECT_VAR(vector_res, T3, W, N));		\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##Q##_n_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V)	\
+  TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V)
+
+#define TEST_VQSHLU_N(Q, T1, T2, T3, T4, W, N, V)		\
+  TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Fill input vector with negative values, to check saturation on limits */
+  TEST_VDUP(vector, , int, s, 8, 8, -1);
+  TEST_VDUP(vector, , int, s, 16, 4, -2);
+  TEST_VDUP(vector, , int, s, 32, 2, -3);
+  TEST_VDUP(vector, , int, s, 64, 1, -4);
+  TEST_VDUP(vector, q, int, s, 8, 16, -1);
+  TEST_VDUP(vector, q, int, s, 16, 8, -2);
+  TEST_VDUP(vector, q, int, s, 32, 4, -3);
+  TEST_VDUP(vector, q, int, s, 64, 2, -4);
+
+  /* Choose shift amount arbitrarily  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (negative input)");
+  TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2);
+  TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1);
+  TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1);
+  TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2);
+
+  TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2);
+  TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1);
+  TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1);
+  TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (negative input)");
+
+  /* Fill input vector with max value, to check saturation on limits */
+  TEST_VDUP(vector, , int, s, 8, 8, 0x7F);
+  TEST_VDUP(vector, , int, s, 16, 4, 0x7FFF);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
+  TEST_VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0x7F);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFULL);
+
+  /* shift by 1  */
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check saturation/overflow: shift by 1)");
+  TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1);
+  TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1);
+  TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1);
+  TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 1);
+
+  TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 1);
+  TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1);
+  TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1);
+  TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 1);
+
+  dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by 1)");
+
+  /* shift by 2 to force saturation  */
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check saturation/overflow: shift by 2)");
+  TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2);
+  TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2);
+  TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 2);
+  TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2);
+
+  TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2);
+  TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 2);
+  TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 2);
+  TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2);
+
+  dump_results_hex2 (TEST_MSG, " (check saturation/overflow: shift by 2)");
+
+  /* Fill input vector with positive values, to check normal case */
+  TEST_VDUP(vector, , int, s, 8, 8, 1);
+  TEST_VDUP(vector, , int, s, 16, 4, 2);
+  TEST_VDUP(vector, , int, s, 32, 2, 3);
+  TEST_VDUP(vector, , int, s, 64, 1, 4);
+  TEST_VDUP(vector, q, int, s, 8, 16, 5);
+  TEST_VDUP(vector, q, int, s, 16, 8, 6);
+  TEST_VDUP(vector, q, int, s, 32, 4, 7);
+  TEST_VDUP(vector, q, int, s, 64, 2, 8);
+
+  /* shift arbitrary amount  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1);
+  TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2);
+  TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 3);
+  TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 4);
+
+  TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 5);
+  TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 6);
+  TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 7);
+  TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 8);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vqshrn_n.c b/ref_vqshrn_n.c
new file mode 100644
index 0000000..4588bc0
--- /dev/null
+++ b/ref_vqshrn_n.c
@@ -0,0 +1,134 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqshrn_n
+#define TEST_MSG "VQSHRN_N"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* Basic test: y=vqshrn_n(x,v), then store the result.  */
+#define TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V)	\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, T1, W2, N) =			\
+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),		\
+		   V);					\
+  vst1_##T2##W2(VECT_VAR(result, T1, W2, N),		\
+		VECT_VAR(vector_res, T1, W2, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V)	\
+  TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V)
+
+#define TEST_VQSHRN_N(T1, T2, W, W2, N, V)	\
+  TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  /* vector is twice as large as vector_res */
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, uint, 64, 2);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, int, s, 64, 2);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  /* Choose shift amount arbitrarily  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQSHRN_N(int, s, 16, 8, 8, 1);
+  TEST_VQSHRN_N(int, s, 32, 16, 4, 1);
+  TEST_VQSHRN_N(int, s, 64, 32, 2, 2);
+  TEST_VQSHRN_N(uint, u, 16, 8, 8, 2);
+  TEST_VQSHRN_N(uint, u, 32, 16, 4, 3);
+  TEST_VQSHRN_N(uint, u, 64, 32, 2, 3);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+
+
+  /* Another set of tests */
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
+
+  /* shift by 3 to exercise saturation code in the lib  */
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check saturation: shift by 3)");
+  TEST_VQSHRN_N(int, s, 16, 8, 8, 3);
+  TEST_VQSHRN_N(int, s, 32, 16, 4, 3);
+  TEST_VQSHRN_N(int, s, 64, 32, 2, 3);
+  TEST_VQSHRN_N(uint, u, 16, 8, 8, 3);
+  TEST_VQSHRN_N(uint, u, 32, 16, 4, 3);
+  TEST_VQSHRN_N(uint, u, 64, 32, 2, 3);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (check saturation: shift by 3)");
+
+
+  /* shift by max to exercise saturation code in the lib  */
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check saturation: shift by max)");
+  TEST_VQSHRN_N(int, s, 16, 8, 8, 8);
+  TEST_VQSHRN_N(int, s, 32, 16, 4, 16);
+  TEST_VQSHRN_N(int, s, 64, 32, 2, 32);
+  TEST_VQSHRN_N(uint, u, 16, 8, 8, 8);
+  TEST_VQSHRN_N(uint, u, 32, 16, 4, 16);
+  TEST_VQSHRN_N(uint, u, 64, 32, 2, 32);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (check saturation: shift by max)");
+}
diff --git a/ref_vqshrun_n.c b/ref_vqshrun_n.c
new file mode 100644
index 0000000..f4e5a58
--- /dev/null
+++ b/ref_vqshrun_n.c
@@ -0,0 +1,114 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define INSN vqshrun_n
+#define TEST_MSG "VQSHRUN_N"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN)
+{
+  /* Basic test: y=vqshrun_n(x,v), then store the result.  */
+#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V)	\
+  Neon_Overflow = 0;					\
+  VECT_VAR(vector_res, uint, W2, N) =			\
+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),		\
+		   V);					\
+  vst1_u##W2(VECT_VAR(result, uint, W2, N),		\
+	     VECT_VAR(vector_res, uint, W2, N));	\
+  dump_neon_overflow(TEST_MSG, xSTR(INSN##_##T2##W))
+
+  /* Two auxliary macros are necessary to expand INSN */
+#define TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V)	\
+  TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V)
+
+#define TEST_VQSHRUN_N(T1, T2, W, W2, N, V)	\
+  TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V)
+
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  /* vector is twice as large as vector_res */
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+
+  clean_results ();
+
+  /* Fill input vector with negative values, to check saturation on limits */
+  TEST_VDUP(vector, q, int, s, 16, 8, -2);
+  TEST_VDUP(vector, q, int, s, 32, 4, -3);
+  TEST_VDUP(vector, q, int, s, 64, 2, -4);
+
+  /* Choose shift amount arbitrarily  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG " (negative input)");
+  TEST_VQSHRUN_N(int, s, 16, 8, 8, 3);
+  TEST_VQSHRUN_N(int, s, 32, 16, 4, 4);
+  TEST_VQSHRUN_N(int, s, 64, 32, 2, 2);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (negative input)");
+
+  /* Fill input vector with max value, to check saturation on limits */
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
+
+  /* shift by 1  */
+  fprintf(ref_file, "\n%s overflow output:\n",
+	  TEST_MSG " (check saturation/overflow)");
+  TEST_VQSHRUN_N(int, s, 16, 8, 8, 1);
+  TEST_VQSHRUN_N(int, s, 32, 16, 4, 1);
+  TEST_VQSHRUN_N(int, s, 64, 32, 2, 1);
+
+  dump_results_hex2 (TEST_MSG, " (check saturation/overflow)");
+
+  /* Fill input vector with positive values, to check normal case */
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x1234);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x87654321);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF);
+
+  /* shift arbitrary amount  */
+  fprintf(ref_file, "\n%s overflow output:\n", TEST_MSG);
+  TEST_VQSHRUN_N(int, s, 16, 8, 8, 6);
+  TEST_VQSHRUN_N(int, s, 32, 16, 4, 7);
+  TEST_VQSHRUN_N(int, s, 64, 32, 2, 8);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vqsub.c b/ref_vqsub.c
new file mode 100644
index 0000000..3cd28ae
--- /dev/null
+++ b/ref_vqsub.c
@@ -0,0 +1,156 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vqsub
+#define TEST_MSG "VQSUB/VQSUBQ"
+
+/* Extra tests for functions requiring types larger than 64 bits to
+   compute saturation */
+void vqsub_64(void);
+#define EXTRA_TESTS vqsub_64
+
+#include "ref_v_binary_sat_op.c"
+
+void vqsub_64(void)
+{
+  int i;
+
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  /* Initialize input "vector1" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector1, buffer);
+
+  TEST_VDUP(vector2, , int, s, 64, 1, 0x0);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 0x0);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 0x0);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 0x0);
+
+  fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2);
+
+  fprintf(ref_file, "\n64 bits saturation:\n");
+  DUMP(TEST_MSG, int, 64, 1, PRIx64);
+  DUMP(TEST_MSG, uint, 64, 1, PRIx64);
+  DUMP(TEST_MSG, int, 64, 2, PRIx64);
+  DUMP(TEST_MSG, uint, 64, 2, PRIx64);
+
+  /* Another set of tests */
+  TEST_VDUP(vector2, , int, s, 64, 1, 0x44);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 0x88);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 0x44);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88);
+
+  fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2);
+
+  DUMP(TEST_MSG, int, 64, 1, PRIx64);
+  DUMP(TEST_MSG, uint, 64, 1, PRIx64);
+  DUMP(TEST_MSG, int, 64, 2, PRIx64);
+  DUMP(TEST_MSG, uint, 64, 2, PRIx64);
+
+  /* Another set of tests */
+  TEST_VDUP(vector2, , int, s, 64, 1, 0x7fffffffffffffffLL);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 0xffffffffffffffffULL);
+
+  /* To check positive saturation, we need to write a positive value
+     in vector1 */
+  TEST_VDUP(vector1, q, int, s, 64, 2, 0x3fffffffffffffffLL);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 0x8000000000000000LL);
+
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 0xffffffffffffffffULL);
+
+  fprintf(ref_file, "\n%s 64 bits saturation overflow output:\n", TEST_MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2);
+
+  DUMP(TEST_MSG, int, 64, 1, PRIx64);
+  DUMP(TEST_MSG, uint, 64, 1, PRIx64);
+  DUMP(TEST_MSG, int, 64, 2, PRIx64);
+  DUMP(TEST_MSG, uint, 64, 2, PRIx64);
+
+  /* To improve coverage, check saturation with less than 64 bits too */
+  fprintf(ref_file, "\nless than 64 bits saturation:\n");
+  TEST_VDUP(vector2, , int, s, 8, 8, 0x7F);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x7FFF);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x7FFFFFFF);
+  TEST_VDUP(vector2, q, int, s, 8, 16, 0x7F);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x7FFF);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF);
+
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4);
+
+  DUMP(TEST_MSG, int, 8, 8, PRIx8);
+  DUMP(TEST_MSG, int, 16, 4, PRIx16);
+  DUMP(TEST_MSG, int, 32, 2, PRIx32);
+  DUMP(TEST_MSG, int, 8, 16, PRIx8);
+  DUMP(TEST_MSG, int, 16, 8, PRIx16);
+  DUMP(TEST_MSG, int, 32, 4, PRIx32);
+
+
+  TEST_VDUP(vector1, , uint, u, 8, 8, 0x10);
+  TEST_VDUP(vector1, , uint, u, 16, 4, 0x10);
+  TEST_VDUP(vector1, , uint, u, 32, 2, 0x10);
+  TEST_VDUP(vector1, q, uint, u, 8, 16, 0x10);
+  TEST_VDUP(vector1, q, uint, u, 16, 8, 0x10);
+  TEST_VDUP(vector1, q, uint, u, 32, 4, 0x10);
+
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0x20);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x20);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x20);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0x20);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0x20);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0x20);
+
+  fprintf(ref_file, "\n%s less than 64 bits saturation overflow output:\n",
+	  TEST_MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4);
+
+  DUMP(TEST_MSG, uint, 8, 8, PRIx8);
+  DUMP(TEST_MSG, uint, 16, 4, PRIx16);
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+  DUMP(TEST_MSG, uint, 8, 16, PRIx8);
+  DUMP(TEST_MSG, uint, 16, 8, PRIx16);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+}
diff --git a/ref_vraddhn.c b/ref_vraddhn.c
new file mode 100644
index 0000000..c6aac33
--- /dev/null
+++ b/ref_vraddhn.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vraddhn
+#define TEST_MSG "VRADDHN"
+
+#include "ref_vaddhn.c"
diff --git a/ref_vrecpe.c b/ref_vrecpe.c
new file mode 100644
index 0000000..4bb9714
--- /dev/null
+++ b/ref_vrecpe.c
@@ -0,0 +1,97 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VRECPE/VRECPEQ"
+void exec_vrecpe(void)
+{
+  int i;
+
+  /* Basic test: y=vrecpe(x), then store the result.  */
+#define TEST_VRECPE(Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vrecpe##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N))
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  clean_results ();
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector, , uint, u, 32, 2, 0x12345678);
+  TEST_VDUP(vector, , float, f, 32, 2, 1.9);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10);
+  TEST_VDUP(vector, q, float, f, 32, 4, 125);
+
+  /* Apply the operator */
+  TEST_VRECPE(, uint, u, 32, 2);
+  TEST_VRECPE(, float, f, 32, 2);
+  TEST_VRECPE(q, uint, u, 32, 4);
+  TEST_VRECPE(q, float, f, 32, 4);
+
+  fprintf (ref_file, "\n%s %s output:\n", TEST_MSG, " (positive input)");
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+  DUMP_FP(TEST_MSG, float, 32, 2, PRIx32);
+  DUMP_FP(TEST_MSG, float, 32, 4, PRIx32);
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
+  TEST_VDUP(vector, , float, f, 32, 2, -10.0);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0x89081234);
+  TEST_VDUP(vector, q, float, f, 32, 4, -125.0);
+
+  /* Apply the operator */
+  TEST_VRECPE(, uint, u, 32, 2);
+  TEST_VRECPE(, float, f, 32, 2);
+  TEST_VRECPE(q, uint, u, 32, 4);
+  TEST_VRECPE(q, float, f, 32, 4);
+
+  fprintf (ref_file, "\n%s %s output:\n", TEST_MSG, " (negative input)");
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+  DUMP_FP(TEST_MSG, float, 32, 2, PRIx32);
+  DUMP_FP(TEST_MSG, float, 32, 4, PRIx32);
+}
diff --git a/ref_vrecps.c b/ref_vrecps.c
new file mode 100644
index 0000000..333fa07
--- /dev/null
+++ b/ref_vrecps.c
@@ -0,0 +1,76 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VRECPS/VRECPSQ"
+void exec_vrecps(void)
+{
+  int i;
+
+  /* Basic test: y=vrecps(x), then store the result.  */
+#define TEST_VRECPS(Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vrecps##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N))
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for integer variants */
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  clean_results ();
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector, , float, f, 32, 2, 12.9);
+  TEST_VDUP(vector, q, float, f, 32, 4, 9.2);
+
+  TEST_VDUP(vector2, , float, f, 32, 2, 8.9);
+  TEST_VDUP(vector2, q, float, f, 32, 4, 3.2);
+
+  /* Apply the operator */
+  TEST_VRECPS(, float, f, 32, 2);
+  TEST_VRECPS(q, float, f, 32, 4);
+
+  fprintf (ref_file, "\n%s output:\n", TEST_MSG);
+  DUMP_FP(TEST_MSG, float, 32, 2, PRIx32);
+  DUMP_FP(TEST_MSG, float, 32, 4, PRIx32);
+}
diff --git a/ref_vreinterpret.c b/ref_vreinterpret.c
new file mode 100644
index 0000000..abd5aa3
--- /dev/null
+++ b/ref_vreinterpret.c
@@ -0,0 +1,256 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VREINTERPRET/VREINTERPRETQ"
+
+void exec_vreinterpret (void)
+{
+  int i;
+
+  /* Basic test: y=vreinterpret(x), then store the result.  */
+#define TEST_VREINTERPRET(Q, T1, T2, W, N, TS1, TS2, WS, NS)		\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N));			\
+  DUMP(TEST_MSG, T1, W, N, PRIx##W);
+
+#define TEST_VREINTERPRET_FP(Q, T1, T2, W, N, TS1, TS2, WS, NS)		\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N));			\
+  DUMP_FP(TEST_MSG, T1, W, N, PRIx##W);
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* The same result buffers are used multiple times, so output them
+     before overwriting them  */
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG);
+
+
+  /* vreinterpret_s8_xx */
+  TEST_VREINTERPRET(, int, s, 8, 8, int, s, 16, 4);
+  TEST_VREINTERPRET(, int, s, 8, 8, int, s, 32, 2);
+  TEST_VREINTERPRET(, int, s, 8, 8, int, s, 64, 1);
+  TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 8, 8);
+  TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 16, 4);
+  TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 32, 2);
+  TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 64, 1);
+
+  /* vreinterpret_s16_xx */
+  TEST_VREINTERPRET(, int, s, 16, 4, int, s, 8, 8);
+  TEST_VREINTERPRET(, int, s, 16, 4, int, s, 32, 2);
+  TEST_VREINTERPRET(, int, s, 16, 4, int, s, 64, 1);
+  TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 8, 8);
+  TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 16, 4);
+  TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 32, 2);
+  TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 64, 1);
+
+  /* vreinterpret_s32_xx */
+  TEST_VREINTERPRET(, int, s, 32, 2, int, s, 8, 8);
+  TEST_VREINTERPRET(, int, s, 32, 2, int, s, 16, 4);
+  TEST_VREINTERPRET(, int, s, 32, 2, int, s, 64, 1);
+  TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 8, 8);
+  TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 16, 4);
+  TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 32, 2);
+  TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 64, 1);
+
+  /* vreinterpret_s64_xx */
+  TEST_VREINTERPRET(, int, s, 64, 1, int, s, 8, 8);
+  TEST_VREINTERPRET(, int, s, 64, 1, int, s, 16, 4);
+  TEST_VREINTERPRET(, int, s, 64, 1, int, s, 32, 2);
+  TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 8, 8);
+  TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 16, 4);
+  TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 32, 2);
+  TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 64, 1);
+
+  /* vreinterpret_u8_xx */
+  TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 8, 8);
+  TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 16, 4);
+  TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 32, 2);
+  TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 64, 1);
+  TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 16, 4);
+  TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 32, 2);
+  TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 64, 1);
+
+  /* vreinterpret_u16_xx */
+  TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 8, 8);
+  TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 16, 4);
+  TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 32, 2);
+  TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 64, 1);
+  TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 8, 8);
+  TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 32, 2);
+  TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 64, 1);
+
+  /* vreinterpret_u32_xx */
+  TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 8, 8);
+  TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 16, 4);
+  TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 32, 2);
+  TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 64, 1);
+  TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 8, 8);
+  TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 16, 4);
+  TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 64, 1);
+
+  /* vreinterpret_u64_xx */
+  TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 8, 8);
+  TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 16, 4);
+  TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 32, 2);
+  TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 64, 1);
+  TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 8, 8);
+  TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 16, 4);
+  TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 32, 2);
+
+  /* vreinterpretq_s8_xx */
+  TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 16, 8);
+  TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 32, 4);
+  TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 64, 2);
+  TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 8, 16);
+  TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 16, 8);
+  TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 32, 4);
+  TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 64, 2);
+
+  /* vreinterpretq_s16_xx */
+  TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 8, 16);
+  TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 32, 4);
+  TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 64, 2);
+  TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 8, 16);
+  TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 16, 8);
+  TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 32, 4);
+  TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 64, 2);
+
+  /* vreinterpretq_s32_xx */
+  TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 8, 16);
+  TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 16, 8);
+  TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 64, 2);
+  TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 8, 16);
+  TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 16, 8);
+  TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 32, 4);
+  TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 64, 2);
+
+  /* vreinterpretq_s64_xx */
+  TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 8, 16);
+  TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 16, 8);
+  TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 32, 4);
+  TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 8, 16);
+  TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 16, 8);
+  TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 32, 4);
+  TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 64, 2);
+
+  /* vreinterpretq_u16_xx */
+  TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 8, 16);
+  TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 16, 8);
+  TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 32, 4);
+  TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 64, 2);
+  TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 8, 16);
+  TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 32, 4);
+  TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 64, 2);
+
+  /* vreinterpretq_u32_xx */
+  TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 8, 16);
+  TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 16, 8);
+  TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 32, 4);
+  TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 64, 2);
+  TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 8, 16);
+  TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 16, 8);
+  TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 64, 2);
+
+  /* vreinterpretq_u64_xx */
+  TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 8, 16);
+  TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 16, 8);
+  TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 32, 4);
+  TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 64, 2);
+  TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 8, 16);
+  TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 16, 8);
+  TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 32, 4);
+
+  /* vreinterpretq_u8_xx */
+  TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 8, 16);
+  TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 16, 8);
+  TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 32, 4);
+  TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 64, 2);
+  TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 16, 8);
+  TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 32, 4);
+  TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 64, 2);
+
+  /* vreinterpret_f32_xx */
+  TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 8, 8);
+  TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 16, 4);
+  TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 32, 2);
+  TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 64, 1);
+  TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 8, 8);
+  TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 16, 4);
+  TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 32, 2);
+  TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 64, 1);
+
+  /* vreinterpretq_f32_xx */
+  TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 8, 16);
+  TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 16, 8);
+  TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 32, 4);
+  TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 64, 2);
+  TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 8, 16);
+  TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 16, 8);
+  TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 32, 4);
+  TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 64, 2);
+
+  /* vreinterpret_xx_f32 */
+  TEST_VREINTERPRET(, int, s, 8, 8, float, f, 32, 2);
+  TEST_VREINTERPRET(, int, s, 16, 4, float, f, 32, 2);
+  TEST_VREINTERPRET(, int, s, 32, 2, float, f, 32, 2);
+  TEST_VREINTERPRET(, int, s, 64, 1, float, f, 32, 2);
+  TEST_VREINTERPRET(, uint, u, 8, 8, float, f, 32, 2);
+  TEST_VREINTERPRET(, uint, u, 16, 4, float, f, 32, 2);
+  TEST_VREINTERPRET(, uint, u, 32, 2, float, f, 32, 2);
+  TEST_VREINTERPRET(, uint, u, 64, 1, float, f, 32, 2);
+
+  /* vreinterpretq_xx_f32 */
+  TEST_VREINTERPRET(q, int, s, 8, 16, float, f, 32, 4);
+  TEST_VREINTERPRET(q, int, s, 16, 8, float, f, 32, 4);
+  TEST_VREINTERPRET(q, int, s, 32, 4, float, f, 32, 4);
+  TEST_VREINTERPRET(q, int, s, 64, 2, float, f, 32, 4);
+  TEST_VREINTERPRET(q, uint, u, 8, 16, float, f, 32, 4);
+  TEST_VREINTERPRET(q, uint, u, 16, 8, float, f, 32, 4);
+  TEST_VREINTERPRET(q, uint, u, 32, 4, float, f, 32, 4);
+  TEST_VREINTERPRET(q, uint, u, 64, 2, float, f, 32, 4);
+}
diff --git a/ref_vrev.c b/ref_vrev.c
new file mode 100644
index 0000000..2f088c0
--- /dev/null
+++ b/ref_vrev.c
@@ -0,0 +1,96 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+/* Template file for unary operator validation */
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+void exec_vrev (void)
+{
+  /* Basic test: y=vrev(x), then store the result.  */
+#define TEST_VREV(Q, T1, T2, W, N, W2)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vrev##W2##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement  */
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Check vrev in each of the existing combinations  */
+#define TEST_MSG "VREV16"
+  TEST_VREV(, int, s, 8, 8, 16);
+  TEST_VREV(, uint, u, 8, 8, 16);
+  TEST_VREV(q, int, s, 8, 16, 16);
+  TEST_VREV(q, uint, u, 8, 16, 16);
+  dump_results_hex (TEST_MSG);
+
+#undef TEST_MSG
+#define TEST_MSG "VREV32"
+  TEST_VREV(, int, s, 8, 8, 32);
+  TEST_VREV(, int, s, 16, 4, 32);
+  TEST_VREV(, uint, u, 8, 8, 32);
+  TEST_VREV(, uint, u, 16, 4, 32);
+  TEST_VREV(q, int, s, 8, 16, 32);
+  TEST_VREV(q, int, s, 16, 8, 32);
+  TEST_VREV(q, uint, u, 8, 16, 32);
+  TEST_VREV(q, uint, u, 16, 8, 32);
+  dump_results_hex (TEST_MSG);
+
+#undef TEST_MSG
+#define TEST_MSG "VREV64"
+  TEST_VREV(, int, s, 8, 8, 64);
+  TEST_VREV(, int, s, 16, 4, 64);
+  TEST_VREV(, int, s, 32, 2, 64);
+  TEST_VREV(, uint, u, 8, 8, 64);
+  TEST_VREV(, uint, u, 16, 4, 64);
+  TEST_VREV(, uint, u, 32, 2, 64);
+  TEST_VREV(q, int, s, 8, 16, 64);
+  TEST_VREV(q, int, s, 16, 8, 64);
+  TEST_VREV(q, int, s, 32, 4, 64);
+  TEST_VREV(q, uint, u, 8, 16, 64);
+  TEST_VREV(q, uint, u, 16, 8, 64);
+  TEST_VREV(q, uint, u, 32, 4, 64);
+
+  TEST_VREV(, float, f, 32, 2, 64);
+  TEST_VREV(q, float, f, 32, 4, 64);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vrhadd.c b/ref_vrhadd.c
new file mode 100644
index 0000000..20872a1
--- /dev/null
+++ b/ref_vrhadd.c
@@ -0,0 +1,31 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vrhadd
+#define TEST_MSG "VRHADD/VRHADDQ"
+
+#define NO_FLOAT_VARIANT
+
+#include "ref_vmax.c"
diff --git a/ref_vrshl.c b/ref_vrshl.c
new file mode 100644
index 0000000..ec534c0
--- /dev/null
+++ b/ref_vrshl.c
@@ -0,0 +1,192 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VRSHL/VRSHLQ"
+void exec_vrshl (void)
+{
+  /* Basic test: v3=vrshl(v1,v2), then store the result.  */
+#define TEST_VRSHL(T3, Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vrshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		       VECT_VAR(vector_shift, T3, W, N));		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
+
+  clean_results ();
+
+  /* Fill input vector with 0, to check behavior on limits */
+  TEST_VDUP(vector, , int, s, 8, 8, 0);
+  TEST_VDUP(vector, , int, s, 16, 4, 0);
+  TEST_VDUP(vector, , int, s, 32, 2, 0);
+  TEST_VDUP(vector, , int, s, 64, 1, 0);
+  TEST_VDUP(vector, , uint, u, 8, 8, 0);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0);
+  TEST_VDUP(vector, , uint, u, 64, 1, 0);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0);
+  TEST_VDUP(vector, q, uint, u, 8, 16, 0);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0);
+
+  /* Choose init value arbitrarily, will be used as shift amount */
+  /* Use values equal to one-less-than the type width to check
+     behaviour on limits */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, 7);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, 15);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, 31);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, 63);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, 7);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, 15);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, 31);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, 63);
+
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
+
+  dump_results_hex2 (TEST_MSG, " (with input = 0)");
+
+  /* Use negative shift amounts */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, -1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, -2);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, -3);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -4);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, -7);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, -11);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, -13);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, -20);
+
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
+
+  dump_results_hex2 (TEST_MSG, " (input 0 and negative shift amount)");
+
+  /* Test again, with predefined input values */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Choose init value arbitrarily, will be used as shift amount */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, 1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, 3);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, 8);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -3);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, 10);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, 12);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, 32);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, 63);
+
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
+
+  dump_results_hex (TEST_MSG);
+
+
+  /* Use negative shift amounts */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, -1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, -2);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, -3);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -4);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, -7);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, -11);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, -13);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, -20);
+
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
+
+  dump_results_hex2 (TEST_MSG, " (negative shift amount)");
+
+  /* Fill input vector with max value, to check behavior on limits */
+  TEST_VDUP(vector, , int, s, 8, 8, 0x7F);
+  TEST_VDUP(vector, , int, s, 16, 4, 0x7FFF);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
+  TEST_VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, , uint, u, 8, 8, 0xFF);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0xFFFF);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
+  TEST_VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0x7F);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, q, uint, u, 8, 16, 0xFF);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
+
+  /* Use -1 shift amount to check overflow with round_const */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, -1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, -1);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, -1);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -1);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, -1);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, -1);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, -1);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, -1);
+
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
+
+  dump_results_hex2 (TEST_MSG, " (checking round_const overflow: shift by -1)");
+
+  /* Use -1 shift amount to check overflow with round_const */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, -3);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, -3);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, -3);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -3);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, -3);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, -3);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, -3);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, -3);
+
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
+
+  dump_results_hex2 (TEST_MSG, " (checking round_const overflow: shift by -3)");
+
+  /* Test large shift amount */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, 10);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, 20);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, 33);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, 65);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, 9);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, 16);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, 32);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, 64);
+
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VRSHL, int);
+
+  dump_results_hex2 (TEST_MSG, " (large shift amount)");
+}
diff --git a/ref_vrshr_n.c b/ref_vrshr_n.c
new file mode 100644
index 0000000..c68b637
--- /dev/null
+++ b/ref_vrshr_n.c
@@ -0,0 +1,217 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VRSHR_N"
+void exec_vrshr_n (void)
+{
+  /* Basic test: y=vrshr_n(x,v), then store the result.  */
+#define TEST_VRSHR_N(Q, T1, T2, W, N, V)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vrshr##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			 V);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Choose shift amount arbitrarily  */
+  TEST_VRSHR_N(, int, s, 8, 8, 1);
+  TEST_VRSHR_N(, int, s, 16, 4, 12);
+  TEST_VRSHR_N(, int, s, 32, 2, 2);
+  TEST_VRSHR_N(, int, s, 64, 1, 32);
+  TEST_VRSHR_N(, uint, u, 8, 8, 2);
+  TEST_VRSHR_N(, uint, u, 16, 4, 3);
+  TEST_VRSHR_N(, uint, u, 32, 2, 5);
+  TEST_VRSHR_N(, uint, u, 64, 1, 33);
+
+  TEST_VRSHR_N(q, int, s, 8, 16, 1);
+  TEST_VRSHR_N(q, int, s, 16, 8, 12);
+  TEST_VRSHR_N(q, int, s, 32, 4, 2);
+  TEST_VRSHR_N(q, int, s, 64, 2, 32);
+  TEST_VRSHR_N(q, uint, u, 8, 16, 2);
+  TEST_VRSHR_N(q, uint, u, 16, 8, 3);
+  TEST_VRSHR_N(q, uint, u, 32, 4, 5);
+  TEST_VRSHR_N(q, uint, u, 64, 2, 33);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+
+  /* Another set of tests */
+  TEST_VDUP(vector, , int, s, 8, 8, 0x7F);
+  TEST_VDUP(vector, , int, s, 16, 4, 0x7FFF);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
+  TEST_VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, , uint, u, 8, 8, 0xFF);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0xFFFF);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
+  TEST_VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0x7F);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x7FFF);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector, q, uint, u, 8, 16, 0xFF);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
+
+  /* Use max shift amount, to exercise saturation code in the lib */
+  TEST_VRSHR_N(, int, s, 8, 8, 8);
+  TEST_VRSHR_N(, int, s, 16, 4, 16);
+  TEST_VRSHR_N(, int, s, 32, 2, 32);
+  TEST_VRSHR_N(, int, s, 64, 1, 64);
+  TEST_VRSHR_N(, uint, u, 8, 8, 8);
+  TEST_VRSHR_N(, uint, u, 16, 4, 16);
+  TEST_VRSHR_N(, uint, u, 32, 2, 32);
+  TEST_VRSHR_N(, uint, u, 64, 1, 64);
+  TEST_VRSHR_N(q, int, s, 8, 16, 8);
+  TEST_VRSHR_N(q, int, s, 16, 8, 16);
+  TEST_VRSHR_N(q, int, s, 32, 4, 32);
+  TEST_VRSHR_N(q, int, s, 64, 2, 64);
+  TEST_VRSHR_N(q, uint, u, 8, 16, 8);
+  TEST_VRSHR_N(q, uint, u, 16, 8, 16);
+  TEST_VRSHR_N(q, uint, u, 32, 4, 32);
+  TEST_VRSHR_N(q, uint, u, 64, 2, 64);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (overflow test: max shift amount, positive input)");
+
+  /* Use 1 as shift amount, to exercise saturation code in the lib */
+  TEST_VRSHR_N(, int, s, 8, 8, 1);
+  TEST_VRSHR_N(, int, s, 16, 4, 1);
+  TEST_VRSHR_N(, int, s, 32, 2, 1);
+  TEST_VRSHR_N(, int, s, 64, 1, 1);
+  TEST_VRSHR_N(, uint, u, 8, 8, 1);
+  TEST_VRSHR_N(, uint, u, 16, 4, 1);
+  TEST_VRSHR_N(, uint, u, 32, 2, 1);
+  TEST_VRSHR_N(, uint, u, 64, 1, 1);
+  TEST_VRSHR_N(q, int, s, 8, 16, 1);
+  TEST_VRSHR_N(q, int, s, 16, 8, 1);
+  TEST_VRSHR_N(q, int, s, 32, 4, 1);
+  TEST_VRSHR_N(q, int, s, 64, 2, 1);
+  TEST_VRSHR_N(q, uint, u, 8, 16, 1);
+  TEST_VRSHR_N(q, uint, u, 16, 8, 1);
+  TEST_VRSHR_N(q, uint, u, 32, 4, 1);
+  TEST_VRSHR_N(q, uint, u, 64, 2, 1);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (overflow test: shift by 1, with negative input)");
+
+  /* Use 3 as shift amount, to exercise saturation code in the lib */
+  TEST_VRSHR_N(, int, s, 8, 8, 3);
+  TEST_VRSHR_N(, int, s, 16, 4, 3);
+  TEST_VRSHR_N(, int, s, 32, 2, 3);
+  TEST_VRSHR_N(, int, s, 64, 1, 3);
+  TEST_VRSHR_N(, uint, u, 8, 8, 3);
+  TEST_VRSHR_N(, uint, u, 16, 4, 3);
+  TEST_VRSHR_N(, uint, u, 32, 2, 3);
+  TEST_VRSHR_N(, uint, u, 64, 1, 3);
+  TEST_VRSHR_N(q, int, s, 8, 16, 3);
+  TEST_VRSHR_N(q, int, s, 16, 8, 3);
+  TEST_VRSHR_N(q, int, s, 32, 4, 3);
+  TEST_VRSHR_N(q, int, s, 64, 2, 3);
+  TEST_VRSHR_N(q, uint, u, 8, 16, 3);
+  TEST_VRSHR_N(q, uint, u, 16, 8, 3);
+  TEST_VRSHR_N(q, uint, u, 32, 4, 3);
+  TEST_VRSHR_N(q, uint, u, 64, 2, 3);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (overflow test: shift by 3, positive input)");
+
+  TEST_VDUP(vector, , int, s, 8, 8, 0x80);
+  TEST_VDUP(vector, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector, , int, s, 64, 1, 0x8000000000000000LL);
+  TEST_VDUP(vector, , uint, u, 8, 8, 0xFF);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0xFFFF);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
+  TEST_VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0x80);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0x80000000);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);
+  TEST_VDUP(vector, q, uint, u, 8, 16, 0xFF);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
+
+
+  /* Use 1 as shift amount, to exercise saturation code in the lib */
+  TEST_VRSHR_N(, int, s, 8, 8, 1);
+  TEST_VRSHR_N(, int, s, 16, 4, 1);
+  TEST_VRSHR_N(, int, s, 32, 2, 1);
+  TEST_VRSHR_N(, int, s, 64, 1, 1);
+  TEST_VRSHR_N(, uint, u, 8, 8, 1);
+  TEST_VRSHR_N(, uint, u, 16, 4, 1);
+  TEST_VRSHR_N(, uint, u, 32, 2, 1);
+  TEST_VRSHR_N(, uint, u, 64, 1, 1);
+  TEST_VRSHR_N(q, int, s, 8, 16, 1);
+  TEST_VRSHR_N(q, int, s, 16, 8, 1);
+  TEST_VRSHR_N(q, int, s, 32, 4, 1);
+  TEST_VRSHR_N(q, int, s, 64, 2, 1);
+  TEST_VRSHR_N(q, uint, u, 8, 16, 1);
+  TEST_VRSHR_N(q, uint, u, 16, 8, 1);
+  TEST_VRSHR_N(q, uint, u, 32, 4, 1);
+  TEST_VRSHR_N(q, uint, u, 64, 2, 1);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (overflow test: shift by 1, with negative input)");
+
+  /* Use 3 as shift amount, to exercise saturation code in the lib */
+  TEST_VRSHR_N(, int, s, 8, 8, 3);
+  TEST_VRSHR_N(, int, s, 16, 4, 3);
+  TEST_VRSHR_N(, int, s, 32, 2, 3);
+  TEST_VRSHR_N(, int, s, 64, 1, 3);
+  TEST_VRSHR_N(, uint, u, 8, 8, 3);
+  TEST_VRSHR_N(, uint, u, 16, 4, 3);
+  TEST_VRSHR_N(, uint, u, 32, 2, 3);
+  TEST_VRSHR_N(, uint, u, 64, 1, 3);
+  TEST_VRSHR_N(q, int, s, 8, 16, 3);
+  TEST_VRSHR_N(q, int, s, 16, 8, 3);
+  TEST_VRSHR_N(q, int, s, 32, 4, 3);
+  TEST_VRSHR_N(q, int, s, 64, 2, 3);
+  TEST_VRSHR_N(q, uint, u, 8, 16, 3);
+  TEST_VRSHR_N(q, uint, u, 16, 8, 3);
+  TEST_VRSHR_N(q, uint, u, 32, 4, 3);
+  TEST_VRSHR_N(q, uint, u, 64, 2, 3);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (overflow test: shift by 3, with negative input)");
+}
diff --git a/ref_vrshrn_n.c b/ref_vrshrn_n.c
new file mode 100644
index 0000000..1cb67a0
--- /dev/null
+++ b/ref_vrshrn_n.c
@@ -0,0 +1,119 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VRSHRN_N"
+void exec_vrshrn_n (void)
+{
+  /* Basic test: v2=vrshrn_n(v1,v), then store the result.  */
+#define TEST_VRSHRN_N(T1, T2, W, N, W2, V)				\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    vrshrn_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		     V);						\
+  vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  /* vector is twice as large as vector_res */
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, uint, 64, 2);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+
+  clean_results ();
+
+  /* Fill input vector with 0, to check behavior on limits */
+  TEST_VDUP(vector, q, int, s, 16, 8, 0);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0);
+
+  /* Choose shift amount arbitrarily  */
+  TEST_VRSHRN_N(int, s, 16, 8, 8, 1);
+  TEST_VRSHRN_N(int, s, 32, 4, 16, 1);
+  TEST_VRSHRN_N(int, s, 64, 2, 32, 2);
+  TEST_VRSHRN_N(uint, u, 16, 8, 8, 2);
+  TEST_VRSHRN_N(uint, u, 32, 4, 16, 3);
+  TEST_VRSHRN_N(uint, u, 64, 2, 32, 3);
+
+  dump_results_hex2 (TEST_MSG, " (with input = 0)");
+
+  /* Test again, with predefined input values */
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, int, s, 64, 2);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  /* Choose shift amount arbitrarily  */
+  TEST_VRSHRN_N(int, s, 16, 8, 8, 1);
+  TEST_VRSHRN_N(int, s, 32, 4, 16, 1);
+  TEST_VRSHRN_N(int, s, 64, 2, 32, 2);
+  TEST_VRSHRN_N(uint, u, 16, 8, 8, 2);
+  TEST_VRSHRN_N(uint, u, 32, 4, 16, 3);
+  TEST_VRSHRN_N(uint, u, 64, 2, 32, 3);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+
+  /* Fill input arbitrary values */
+  TEST_VDUP(vector, q, int, s, 16, 8, 30);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0xFFF0);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFF0);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0);
+
+  /* Choose shift amount arbitrarily  */
+  TEST_VRSHRN_N(int, s, 16, 8, 8, 7);
+  TEST_VRSHRN_N(int, s, 32, 4, 16, 14);
+  TEST_VRSHRN_N(int, s, 64, 2, 32, 31);
+  TEST_VRSHRN_N(uint, u, 16, 8, 8, 7);
+  TEST_VRSHRN_N(uint, u, 32, 4, 16, 16);
+  TEST_VRSHRN_N(uint, u, 64, 2, 32, 3);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex2 (TEST_MSG, " (with large shift amount)");
+}
diff --git a/ref_vrsqrte.c b/ref_vrsqrte.c
new file mode 100644
index 0000000..f66bfb8
--- /dev/null
+++ b/ref_vrsqrte.c
@@ -0,0 +1,105 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VRSQRTE/VRSQRTEQ"
+void exec_vrsqrte(void)
+{
+  int i;
+
+  /* Basic test: y=vrsqrte(x), then store the result.  */
+#define TEST_VRSQRTE(Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vrsqrte##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N))
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  clean_results ();
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector, , uint, u, 32, 2, 0x12345678);
+  TEST_VDUP(vector, , float, f, 32, 2, 12.9);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10);
+  TEST_VDUP(vector, q, float, f, 32, 4, 18.2);
+
+  /* Apply the operator */
+  TEST_VRSQRTE(, uint, u, 32, 2);
+  TEST_VRSQRTE(, float, f, 32, 2);
+  TEST_VRSQRTE(q, uint, u, 32, 4);
+  TEST_VRSQRTE(q, float, f, 32, 4);
+
+  fprintf (ref_file, "\n%s output:\n", TEST_MSG);
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+  DUMP_FP(TEST_MSG, float, 32, 2, PRIx32);
+  DUMP_FP(TEST_MSG, float, 32, 4, PRIx32);
+
+  /* Don't test FP variants with negative inputs: the result depends
+     on the platform */
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0x89081234);
+
+  /* Apply the operator */
+  TEST_VRSQRTE(, uint, u, 32, 2);
+  TEST_VRSQRTE(q, uint, u, 32, 4);
+
+  fprintf (ref_file, "\n%s output:\n", TEST_MSG " (2)");
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector, , uint, u, 32, 2, 0x80000000);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0x4ABCDEF0);
+
+  /* Apply the operator */
+  TEST_VRSQRTE(, uint, u, 32, 2);
+  TEST_VRSQRTE(q, uint, u, 32, 4);
+
+  fprintf (ref_file, "\n%s output:\n", TEST_MSG " (3)");
+  DUMP(TEST_MSG, uint, 32, 2, PRIx32);
+  DUMP(TEST_MSG, uint, 32, 4, PRIx32);
+}
diff --git a/ref_vrsqrts.c b/ref_vrsqrts.c
new file mode 100644
index 0000000..a3f3d00
--- /dev/null
+++ b/ref_vrsqrts.c
@@ -0,0 +1,76 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VRSQRTS/VRSQRTSQ"
+void exec_vrsqrts(void)
+{
+  int i;
+
+  /* Basic test: y=vrsqrts(x), then store the result.  */
+#define TEST_VRSQRTS(Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vrsqrts##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			 VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N))
+
+    /* With ARM RVCT, we need to declare variables before any executable
+       statement  */
+
+  /* No need for integer variants */
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  clean_results ();
+
+  /* Choose init value arbitrarily */
+  TEST_VDUP(vector, , float, f, 32, 2, 12.9);
+  TEST_VDUP(vector, q, float, f, 32, 4, 9.1);
+
+  TEST_VDUP(vector2, , float, f, 32, 2, 9.9);
+  TEST_VDUP(vector2, q, float, f, 32, 4, 1.9);
+
+  /* Apply the operator */
+  TEST_VRSQRTS(, float, f, 32, 2);
+  TEST_VRSQRTS(q, float, f, 32, 4);
+
+  fprintf (ref_file, "\n%s output:\n", TEST_MSG);
+  DUMP_FP(TEST_MSG, float, 32, 2, PRIx32);
+  DUMP_FP(TEST_MSG, float, 32, 4, PRIx32);
+}
diff --git a/ref_vrsra_n.c b/ref_vrsra_n.c
new file mode 100644
index 0000000..f9e8df3
--- /dev/null
+++ b/ref_vrsra_n.c
@@ -0,0 +1,238 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VRSRA_N"
+void exec_vrsra_n (void)
+{
+  /* Basic test: y=vrsra_n(x,v), then store the result.  */
+#define TEST_VRSRA_N(Q, T1, T2, W, N, V)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vrsra##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			 VECT_VAR(vector2, T1, W, N),			\
+			 V);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Choose arbitrary initialization values */
+  TEST_VDUP(vector2, , int, s, 8, 8, 0x11);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x22);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x33);
+  TEST_VDUP(vector2, , int, s, 64, 1, 0x44);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0x55);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x66);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x77);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 0x88);
+
+  TEST_VDUP(vector2, q, int, s, 8, 16, 0x11);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x22);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x33);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 0x44);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0x55);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0x66);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0x77);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88);
+
+  /* Choose shift amount arbitrarily  */
+  TEST_VRSRA_N(, int, s, 8, 8, 1);
+  TEST_VRSRA_N(, int, s, 16, 4, 12);
+  TEST_VRSRA_N(, int, s, 32, 2, 2);
+  TEST_VRSRA_N(, int, s, 64, 1, 32);
+  TEST_VRSRA_N(, uint, u, 8, 8, 2);
+  TEST_VRSRA_N(, uint, u, 16, 4, 3);
+  TEST_VRSRA_N(, uint, u, 32, 2, 5);
+  TEST_VRSRA_N(, uint, u, 64, 1, 33);
+
+  TEST_VRSRA_N(q, int, s, 8, 16, 1);
+  TEST_VRSRA_N(q, int, s, 16, 8, 12);
+  TEST_VRSRA_N(q, int, s, 32, 4, 2);
+  TEST_VRSRA_N(q, int, s, 64, 2, 32);
+  TEST_VRSRA_N(q, uint, u, 8, 16, 2);
+  TEST_VRSRA_N(q, uint, u, 16, 8, 3);
+  TEST_VRSRA_N(q, uint, u, 32, 4, 5);
+  TEST_VRSRA_N(q, uint, u, 64, 2, 33);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+
+  /* Initialize the accumulator with 0 */
+  TEST_VDUP(vector, , int, s, 8, 8, 0);
+  TEST_VDUP(vector, , int, s, 16, 4, 0);
+  TEST_VDUP(vector, , int, s, 32, 2, 0);
+  TEST_VDUP(vector, , int, s, 64, 1, 0);
+  TEST_VDUP(vector, , uint, u, 8, 8, 0);
+  TEST_VDUP(vector, , uint, u, 16, 4, 0);
+  TEST_VDUP(vector, , uint, u, 32, 2, 0);
+  TEST_VDUP(vector, , uint, u, 64, 1, 0);
+  TEST_VDUP(vector, q, int, s, 8, 16, 0);
+  TEST_VDUP(vector, q, int, s, 16, 8, 0);
+  TEST_VDUP(vector, q, int, s, 32, 4, 0);
+  TEST_VDUP(vector, q, int, s, 64, 2, 0);
+  TEST_VDUP(vector, q, uint, u, 8, 16, 0);
+  TEST_VDUP(vector, q, uint, u, 16, 8, 0);
+  TEST_VDUP(vector, q, uint, u, 32, 4, 0);
+  TEST_VDUP(vector, q, uint, u, 64, 2, 0);
+
+  /* Initialize with max values to check overflow */
+  TEST_VDUP(vector2, , int, s, 8, 8, 0x7F);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x7FFF);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x7FFFFFFF);
+  TEST_VDUP(vector2, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0xFF);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0xFFFF);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFFF);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
+  TEST_VDUP(vector2, q, int, s, 8, 16, 0x7F);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x7FFF);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0xFF);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0xFFFF);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFFF);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
+
+  /* Shift by 1 to check overflow with rounding constant  */
+  TEST_VRSRA_N(, int, s, 8, 8, 1);
+  TEST_VRSRA_N(, int, s, 16, 4, 1);
+  TEST_VRSRA_N(, int, s, 32, 2, 1);
+  TEST_VRSRA_N(, int, s, 64, 1, 1);
+  TEST_VRSRA_N(, uint, u, 8, 8, 1);
+  TEST_VRSRA_N(, uint, u, 16, 4, 1);
+  TEST_VRSRA_N(, uint, u, 32, 2, 1);
+  TEST_VRSRA_N(, uint, u, 64, 1, 1);
+  TEST_VRSRA_N(q, int, s, 8, 16, 1);
+  TEST_VRSRA_N(q, int, s, 16, 8, 1);
+  TEST_VRSRA_N(q, int, s, 32, 4, 1);
+  TEST_VRSRA_N(q, int, s, 64, 2, 1);
+  TEST_VRSRA_N(q, uint, u, 8, 16, 1);
+  TEST_VRSRA_N(q, uint, u, 16, 8, 1);
+  TEST_VRSRA_N(q, uint, u, 32, 4, 1);
+  TEST_VRSRA_N(q, uint, u, 64, 2, 1);
+
+  dump_results_hex2 (TEST_MSG, " (checking overflow: shift by 1, positive input)");
+
+  /* Shift by 3 to check overflow with rounding constant  */
+  TEST_VRSRA_N(, int, s, 8, 8, 3);
+  TEST_VRSRA_N(, int, s, 16, 4, 3);
+  TEST_VRSRA_N(, int, s, 32, 2, 3);
+  TEST_VRSRA_N(, int, s, 64, 1, 3);
+  TEST_VRSRA_N(, uint, u, 8, 8, 3);
+  TEST_VRSRA_N(, uint, u, 16, 4, 3);
+  TEST_VRSRA_N(, uint, u, 32, 2, 3);
+  TEST_VRSRA_N(, uint, u, 64, 1, 3);
+  TEST_VRSRA_N(q, int, s, 8, 16, 3);
+  TEST_VRSRA_N(q, int, s, 16, 8, 3);
+  TEST_VRSRA_N(q, int, s, 32, 4, 3);
+  TEST_VRSRA_N(q, int, s, 64, 2, 3);
+  TEST_VRSRA_N(q, uint, u, 8, 16, 3);
+  TEST_VRSRA_N(q, uint, u, 16, 8, 3);
+  TEST_VRSRA_N(q, uint, u, 32, 4, 3);
+  TEST_VRSRA_N(q, uint, u, 64, 2, 3);
+
+  dump_results_hex2 (TEST_MSG, " (checking overflow: shift by 3, positive input)");
+
+  /* Shift by max to check overflow with rounding constant  */
+  TEST_VRSRA_N(, int, s, 8, 8, 8);
+  TEST_VRSRA_N(, int, s, 16, 4, 16);
+  TEST_VRSRA_N(, int, s, 32, 2, 32);
+  TEST_VRSRA_N(, int, s, 64, 1, 64);
+  TEST_VRSRA_N(, uint, u, 8, 8, 8);
+  TEST_VRSRA_N(, uint, u, 16, 4, 16);
+  TEST_VRSRA_N(, uint, u, 32, 2, 32);
+  TEST_VRSRA_N(, uint, u, 64, 1, 64);
+  TEST_VRSRA_N(q, int, s, 8, 16, 8);
+  TEST_VRSRA_N(q, int, s, 16, 8, 16);
+  TEST_VRSRA_N(q, int, s, 32, 4, 32);
+  TEST_VRSRA_N(q, int, s, 64, 2, 64);
+  TEST_VRSRA_N(q, uint, u, 8, 16, 8);
+  TEST_VRSRA_N(q, uint, u, 16, 8, 16);
+  TEST_VRSRA_N(q, uint, u, 32, 4, 32);
+  TEST_VRSRA_N(q, uint, u, 64, 2, 64);
+
+  dump_results_hex2 (TEST_MSG, " (checking overflow: shift by max, positive input)");
+  /* Initialize with min values to check overflow */
+  TEST_VDUP(vector2, , int, s, 8, 8, 0x80);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x8000);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x80000000);
+  TEST_VDUP(vector2, , int, s, 64, 1, 0x8000000000000000LL);
+  TEST_VDUP(vector2, q, int, s, 8, 16, 0x80);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x8000);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x80000000);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 0x8000000000000000ULL);
+
+  /* Shift by 1 to check overflow with rounding constant  */
+  TEST_VRSRA_N(, int, s, 8, 8, 1);
+  TEST_VRSRA_N(, int, s, 16, 4, 1);
+  TEST_VRSRA_N(, int, s, 32, 2, 1);
+  TEST_VRSRA_N(, int, s, 64, 1, 1);
+  TEST_VRSRA_N(q, int, s, 8, 16, 1);
+  TEST_VRSRA_N(q, int, s, 16, 8, 1);
+  TEST_VRSRA_N(q, int, s, 32, 4, 1);
+  TEST_VRSRA_N(q, int, s, 64, 2, 1);
+
+  dump_results_hex2 (TEST_MSG, " (checking overflow: shift by 1, negative input)");
+
+  /* Shift by 3 to check overflow with rounding constant  */
+  TEST_VRSRA_N(, int, s, 8, 8, 3);
+  TEST_VRSRA_N(, int, s, 16, 4, 3);
+  TEST_VRSRA_N(, int, s, 32, 2, 3);
+  TEST_VRSRA_N(, int, s, 64, 1, 3);
+  TEST_VRSRA_N(q, int, s, 8, 16, 3);
+  TEST_VRSRA_N(q, int, s, 16, 8, 3);
+  TEST_VRSRA_N(q, int, s, 32, 4, 3);
+  TEST_VRSRA_N(q, int, s, 64, 2, 3);
+
+  dump_results_hex2 (TEST_MSG, " (checking overflow: shift by max, negative input)");
+
+  /* Shift by max to check overflow with rounding constant  */
+  TEST_VRSRA_N(, int, s, 8, 8, 8);
+  TEST_VRSRA_N(, int, s, 16, 4, 16);
+  TEST_VRSRA_N(, int, s, 32, 2, 32);
+  TEST_VRSRA_N(, int, s, 64, 1, 64);
+  TEST_VRSRA_N(q, int, s, 8, 16, 8);
+  TEST_VRSRA_N(q, int, s, 16, 8, 16);
+  TEST_VRSRA_N(q, int, s, 32, 4, 32);
+  TEST_VRSRA_N(q, int, s, 64, 2, 64);
+
+  dump_results_hex2 (TEST_MSG, " (checking overflow: shift by max, negative input)");
+}
diff --git a/ref_vrsubhn.c b/ref_vrsubhn.c
new file mode 100644
index 0000000..e972d82
--- /dev/null
+++ b/ref_vrsubhn.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vrsubhn
+#define TEST_MSG "VRSUBHN"
+
+#include "ref_vaddhn.c"
diff --git a/ref_vset_lane.c b/ref_vset_lane.c
new file mode 100644
index 0000000..a938469
--- /dev/null
+++ b/ref_vset_lane.c
@@ -0,0 +1,78 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VSET_LANE/VSET_LANEQ"
+void exec_vset_lane (void)
+{
+  /* vec=vset_lane(val, vec, lane), then store the result.  */
+#define TEST_VSET_LANE_HERE(Q, T1, T2, W, N, V, L)			\
+  VECT_VAR(vector, T1, W, N) =						\
+    vset##Q##_lane_##T2##W(V,						\
+			   VECT_VAR(vector, T1, W, N),			\
+			   L);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+  TEST_VLOAD(vector, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose value and lane arbitrarily  */
+  TEST_VSET_LANE_HERE(, int, s, 8, 8, 0x11, 7);
+  TEST_VSET_LANE_HERE(, int, s, 16, 4, 0x22, 3);
+  TEST_VSET_LANE_HERE(, int, s, 32, 2, 0x33, 1);
+  TEST_VSET_LANE_HERE(, int, s, 64, 1, 0x44, 0);
+  TEST_VSET_LANE_HERE(, uint, u, 8, 8, 0x55, 6);
+  TEST_VSET_LANE_HERE(, uint, u, 16, 4, 0x66, 2);
+  TEST_VSET_LANE_HERE(, uint, u, 32, 2, 0x77, 1);
+  TEST_VSET_LANE_HERE(, uint, u, 64, 1, 0x88, 0);
+  TEST_VSET_LANE_HERE(, float, f, 32, 2, 33.2, 1);
+
+  TEST_VSET_LANE_HERE(q, int, s, 8, 16, 0x99, 15);
+  TEST_VSET_LANE_HERE(q, int, s, 16, 8, 0xAA, 5);
+  TEST_VSET_LANE_HERE(q, int, s, 32, 4, 0xBB, 3);
+  TEST_VSET_LANE_HERE(q, int, s, 64, 2, 0xCC, 1);
+  TEST_VSET_LANE_HERE(q, uint, u, 8, 16, 0xDD, 14);
+  TEST_VSET_LANE_HERE(q, uint, u, 16, 8, 0xEE, 6);
+  TEST_VSET_LANE_HERE(q, uint, u, 32, 4, 0xFF, 2);
+  TEST_VSET_LANE_HERE(q, uint, u, 64, 2, 0x11, 1);
+  TEST_VSET_LANE_HERE(q, float, f, 32, 4, 11.2, 3);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vshl.c b/ref_vshl.c
new file mode 100644
index 0000000..2af3f80
--- /dev/null
+++ b/ref_vshl.c
@@ -0,0 +1,98 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VSHL/VSHLQ"
+void exec_vshl (void)
+{
+  /* Basic test: v3=vshl(v1,v2), then store the result.  */
+#define TEST_VSHL(T3, Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		      VECT_VAR(vector_shift, T3, W, N));		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Choose init value arbitrarily, will be used as shift amount */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, 1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, 3);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, 8);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, 3);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, 5);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, 12);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, 30);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, 63);
+
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+
+  dump_results_hex (TEST_MSG);
+
+  /* Test large shift amount */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, 8);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, 16);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, 32);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, 64);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, 8);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, 17);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, 33);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, 65);
+
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+
+  dump_results_hex2 (TEST_MSG, " (large shift amount)");
+
+
+  /* Test negative shift amount */
+  TEST_VDUP(vector_shift, , int, s, 8, 8, -1);
+  TEST_VDUP(vector_shift, , int, s, 16, 4, -1);
+  TEST_VDUP(vector_shift, , int, s, 32, 2, -2);
+  TEST_VDUP(vector_shift, , int, s, 64, 1, -4);
+  TEST_VDUP(vector_shift, q, int, s, 8, 16, -2);
+  TEST_VDUP(vector_shift, q, int, s, 16, 8, -5);
+  TEST_VDUP(vector_shift, q, int, s, 32, 4, -3);
+  TEST_VDUP(vector_shift, q, int, s, 64, 2, -5);
+
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+
+  dump_results_hex2 (TEST_MSG, " (negative shift amount)");
+}
diff --git a/ref_vshl_n.c b/ref_vshl_n.c
new file mode 100644
index 0000000..92f0fa1
--- /dev/null
+++ b/ref_vshl_n.c
@@ -0,0 +1,75 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VSHL_N"
+void exec_vshl_n (void)
+{
+  /* Basic test: v2=vshl_n(v1,v), then store the result.  */
+#define TEST_VSHL_N(Q, T1, T2, W, N, V)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vshl##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			V);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Choose shift amount arbitrarily  */
+  TEST_VSHL_N(, int, s, 8, 8, 1);
+  TEST_VSHL_N(, int, s, 16, 4, 1);
+  TEST_VSHL_N(, int, s, 32, 2, 3);
+  TEST_VSHL_N(, int, s, 64, 1, 2);
+  TEST_VSHL_N(, uint, u, 8, 8, 2);
+  TEST_VSHL_N(, uint, u, 16, 4, 4);
+  TEST_VSHL_N(, uint, u, 32, 2, 3);
+  TEST_VSHL_N(, uint, u, 64, 1, 1);
+
+  TEST_VSHL_N(q, int, s, 8, 16, 5);
+  TEST_VSHL_N(q, int, s, 16, 8, 1);
+  TEST_VSHL_N(q, int, s, 32, 4, 2);
+  TEST_VSHL_N(q, int, s, 64, 2, 2);
+  TEST_VSHL_N(q, uint, u, 8, 16, 2);
+  TEST_VSHL_N(q, uint, u, 16, 8, 3);
+  TEST_VSHL_N(q, uint, u, 32, 4, 2);
+  TEST_VSHL_N(q, uint, u, 64, 2, 1);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vshll_n.c b/ref_vshll_n.c
new file mode 100644
index 0000000..e942adc
--- /dev/null
+++ b/ref_vshll_n.c
@@ -0,0 +1,64 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VSHLL_N"
+void exec_vshll_n (void)
+{
+  /* Basic test: v2=vshll_n(v1,v), then store the result.  */
+#define TEST_VSHLL_N(T1, T2, W, W2, N, V)				\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    vshll##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		      V);						\
+  vst1q##_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Choose shift amount arbitrarily  */
+  TEST_VSHLL_N(int, s, 8, 16, 8, 1);
+  TEST_VSHLL_N(int, s, 16, 32, 4, 1);
+  TEST_VSHLL_N(int, s, 32, 64, 2, 3);
+  TEST_VSHLL_N(uint, u, 8, 16, 8, 2);
+  TEST_VSHLL_N(uint, u, 16, 32, 4, 4);
+  TEST_VSHLL_N(uint, u, 32, 64, 2, 3);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vshr_n.c b/ref_vshr_n.c
new file mode 100644
index 0000000..d9e905b
--- /dev/null
+++ b/ref_vshr_n.c
@@ -0,0 +1,76 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VSHR_N"
+void exec_vshr_n (void)
+{
+  /* Basic test: y=vshr_n(x,v), then store the result.  */
+#define TEST_VSHR_N(Q, T1, T2, W, N, V)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vshr##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			V);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Choose shift amount arbitrarily  */
+  TEST_VSHR_N(, int, s, 8, 8, 1);
+  TEST_VSHR_N(, int, s, 16, 4, 12);
+  TEST_VSHR_N(, int, s, 32, 2, 2);
+  TEST_VSHR_N(, int, s, 64, 1, 32);
+  TEST_VSHR_N(, uint, u, 8, 8, 2);
+  TEST_VSHR_N(, uint, u, 16, 4, 3);
+  TEST_VSHR_N(, uint, u, 32, 2, 5);
+  TEST_VSHR_N(, uint, u, 64, 1, 33);
+
+  TEST_VSHR_N(q, int, s, 8, 16, 1);
+  TEST_VSHR_N(q, int, s, 16, 8, 12);
+  TEST_VSHR_N(q, int, s, 32, 4, 2);
+  TEST_VSHR_N(q, int, s, 64, 2, 32);
+  TEST_VSHR_N(q, uint, u, 8, 16, 2);
+  TEST_VSHR_N(q, uint, u, 16, 8, 3);
+  TEST_VSHR_N(q, uint, u, 32, 4, 5);
+  TEST_VSHR_N(q, uint, u, 64, 2, 33);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vshrn_n.c b/ref_vshrn_n.c
new file mode 100644
index 0000000..c520fbf
--- /dev/null
+++ b/ref_vshrn_n.c
@@ -0,0 +1,81 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VSHRN_N"
+void exec_vshrn_n (void)
+{
+  /* Basic test: y=vshrn_n(x,v), then store the result.  */
+#define TEST_VSHRN_N(T1, T2, W, W2, N, V)				\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    vshrn_n_##T2##W(VECT_VAR(vector, T1, W, N),				\
+		    V);							\
+  vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  /* vector is twice as large as vector_res */
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, uint, 64, 2);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+
+  clean_results ();
+
+  TEST_VLOAD(vector, buffer, q, int, s, 16, 8);
+  TEST_VLOAD(vector, buffer, q, int, s, 32, 4);
+  TEST_VLOAD(vector, buffer, q, int, s, 64, 2);
+  TEST_VLOAD(vector, buffer, q, uint, u, 16, 8);
+  TEST_VLOAD(vector, buffer, q, uint, u, 32, 4);
+  TEST_VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  /* Choose shift amount arbitrarily  */
+  TEST_VSHRN_N(int, s, 16, 8, 8, 1);
+  TEST_VSHRN_N(int, s, 32, 16, 4, 1);
+  TEST_VSHRN_N(int, s, 64, 32, 2, 2);
+  TEST_VSHRN_N(uint, u, 16, 8, 8, 2);
+  TEST_VSHRN_N(uint, u, 32, 16, 4, 3);
+  TEST_VSHRN_N(uint, u, 64, 32, 2, 3);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vsli_n.c b/ref_vsli_n.c
new file mode 100644
index 0000000..2666af6
--- /dev/null
+++ b/ref_vsli_n.c
@@ -0,0 +1,104 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vsli
+#define TEST_MSG "VSLI_N"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME ##_n (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* vector_res = vmlx_n(vector, vector2, val),
+     then store the result.  */
+#define TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			VECT_VAR(vector2, T1, W, N),			\
+			V);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLX_N(INSN, Q, T1, T2, W, N, V)	\
+  TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Fill input vector2 with arbitrary values */
+  TEST_VDUP(vector2, , int, s, 8, 8, 2);
+  TEST_VDUP(vector2, , int, s, 16, 4, -4);
+  TEST_VDUP(vector2, , int, s, 32, 2, 3);
+  TEST_VDUP(vector2, , int, s, 64, 1, 100);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 20);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 30);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 40);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 2);
+  TEST_VDUP(vector2, q, int, s, 8, 16, -10);
+  TEST_VDUP(vector2, q, int, s, 16, 8, -20);
+  TEST_VDUP(vector2, q, int, s, 32, 4, -30);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 24);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 12);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 3);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 55);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 3);
+
+  /* Choose shift amount arbitrarily */
+  TEST_VMLX_N(INSN_NAME, , int, s, 8, 8, 4);
+  TEST_VMLX_N(INSN_NAME, , int, s, 16, 4, 3);
+  TEST_VMLX_N(INSN_NAME, , int, s, 32, 2, 1);
+  TEST_VMLX_N(INSN_NAME, , int, s, 64, 1, 32);
+  TEST_VMLX_N(INSN_NAME, , uint, u, 8, 8, 2);
+  TEST_VMLX_N(INSN_NAME, , uint, u, 16, 4, 10);
+  TEST_VMLX_N(INSN_NAME, , uint, u, 32, 2, 30);
+  TEST_VMLX_N(INSN_NAME, , uint, u, 64, 1, 3);
+  TEST_VMLX_N(INSN_NAME, q, int, s, 8, 16, 5);
+  TEST_VMLX_N(INSN_NAME, q, int, s, 16, 8, 3);
+  TEST_VMLX_N(INSN_NAME, q, int, s, 32, 4, 20);
+  TEST_VMLX_N(INSN_NAME, q, int, s, 64, 2, 16);
+  TEST_VMLX_N(INSN_NAME, q, uint, u, 8, 16, 3);
+  TEST_VMLX_N(INSN_NAME, q, uint, u, 16, 8, 12);
+  TEST_VMLX_N(INSN_NAME, q, uint, u, 32, 4, 23);
+  TEST_VMLX_N(INSN_NAME, q, uint, u, 64, 2, 53);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vsra_n.c b/ref_vsra_n.c
new file mode 100644
index 0000000..aa9bdcc
--- /dev/null
+++ b/ref_vsra_n.c
@@ -0,0 +1,97 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VSRA_N"
+void exec_vsra_n (void)
+{
+  /* Basic test: y=vsra_n(x,v), then store the result.  */
+#define TEST_VSRA_N(Q, T1, T2, W, N, V)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vsra##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			VECT_VAR(vector2, T1, W, N),			\
+			V);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Choose arbitrary initialization values */
+  TEST_VDUP(vector2, , int, s, 8, 8, 0x11);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x22);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x33);
+  TEST_VDUP(vector2, , int, s, 64, 1, 0x44);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0x55);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x66);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x77);
+  TEST_VDUP(vector2, , uint, u, 64, 1, 0x88);
+
+  TEST_VDUP(vector2, q, int, s, 8, 16, 0x11);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x22);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x33);
+  TEST_VDUP(vector2, q, int, s, 64, 2, 0x44);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0x55);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0x66);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0x77);
+  TEST_VDUP(vector2, q, uint, u, 64, 2, 0x88);
+
+  /* Choose shift amount arbitrarily  */
+  TEST_VSRA_N(, int, s, 8, 8, 1);
+  TEST_VSRA_N(, int, s, 16, 4, 12);
+  TEST_VSRA_N(, int, s, 32, 2, 2);
+  TEST_VSRA_N(, int, s, 64, 1, 32);
+  TEST_VSRA_N(, uint, u, 8, 8, 2);
+  TEST_VSRA_N(, uint, u, 16, 4, 3);
+  TEST_VSRA_N(, uint, u, 32, 2, 5);
+  TEST_VSRA_N(, uint, u, 64, 1, 33);
+
+  TEST_VSRA_N(q, int, s, 8, 16, 1);
+  TEST_VSRA_N(q, int, s, 16, 8, 12);
+  TEST_VSRA_N(q, int, s, 32, 4, 2);
+  TEST_VSRA_N(q, int, s, 64, 2, 32);
+  TEST_VSRA_N(q, uint, u, 8, 16, 2);
+  TEST_VSRA_N(q, uint, u, 16, 8, 3);
+  TEST_VSRA_N(q, uint, u, 32, 4, 5);
+  TEST_VSRA_N(q, uint, u, 64, 2, 33);
+
+  /* FIXME: only a few result buffers are used, but we output all of them */
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vsri_n.c b/ref_vsri_n.c
new file mode 100644
index 0000000..a4e916a
--- /dev/null
+++ b/ref_vsri_n.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vsri
+#define TEST_MSG "VSRI_N"
+
+#include "ref_vsli_n.c"
diff --git a/ref_vst1_lane.c b/ref_vst1_lane.c
new file mode 100644
index 0000000..2c14c1c
--- /dev/null
+++ b/ref_vst1_lane.c
@@ -0,0 +1,71 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#define TEST_MSG "VST1_LANE/VST1_LANEQ"
+void exec_vst1_lane (void)
+{
+#define TEST_VST1_LANE(Q, T1, T2, W, N, L)				\
+  VECT_VAR(vector, T1, W, N) =						\
+    vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N));			\
+  vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N),			\
+			 VECT_VAR(vector, T1, W, N), L)
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  clean_results ();
+
+  /* Choose lane arbitrarily  */
+  TEST_VST1_LANE(, int, s, 8, 8, 7);
+  TEST_VST1_LANE(, int, s, 16, 4, 3);
+  TEST_VST1_LANE(, int, s, 32, 2, 1);
+  TEST_VST1_LANE(, int, s, 64, 1, 0);
+  TEST_VST1_LANE(, uint, u, 8, 8, 6);
+  TEST_VST1_LANE(, uint, u, 16, 4, 2);
+  TEST_VST1_LANE(, uint, u, 32, 2, 0);
+  TEST_VST1_LANE(, uint, u, 64, 1, 0);
+  TEST_VST1_LANE(, float, f, 32, 2, 1);
+
+  TEST_VST1_LANE(q, int, s, 8, 16, 15);
+  TEST_VST1_LANE(q, int, s, 16, 8, 5);
+  TEST_VST1_LANE(q, int, s, 32, 4, 1);
+  TEST_VST1_LANE(q, int, s, 64, 2, 1);
+  TEST_VST1_LANE(q, uint, u, 8, 16, 10);
+  TEST_VST1_LANE(q, uint, u, 16, 8, 4);
+  TEST_VST1_LANE(q, uint, u, 32, 4, 3);
+  TEST_VST1_LANE(q, uint, u, 64, 2, 0);
+  TEST_VST1_LANE(q, float, f, 32, 4, 1);
+
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vstX_lane.c b/ref_vstX_lane.c
new file mode 100644
index 0000000..b12fa80
--- /dev/null
+++ b/ref_vstX_lane.c
@@ -0,0 +1,176 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+void exec_vstX_lane (void)
+{
+  /* In this case, input variables are arrays of vectors */
+#define DECL_VSTX_LANE(T1, W, N, X)					\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X);	\
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+#define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L)				\
+  memset (VECT_VAR(buffer_src, T1, W, N), 0xAA,				\
+	  sizeof(VECT_VAR(buffer_src, T1, W, N)));			\
+  memset (VECT_VAR(result_bis_##X, T1, W, N), 0,			\
+	  sizeof(VECT_VAR(result_bis_##X, T1, W, N)));			\
+									\
+  VECT_ARRAY_VAR(vector_src, T1, W, N, X) =				\
+    vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N));		\
+									\
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =					\
+    vld##X##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N),		\
+			     VECT_ARRAY_VAR(vector_src, T1, W, N, X),	\
+			     L);					\
+  vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		\
+			   VECT_ARRAY_VAR(vector, T1, W, N, X),		\
+			   L);						\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* Overwrite "result" with the contents of "result_bis"[Y] */
+#define TEST_EXTRA_CHUNK(T1, W, N, X, Y)		\
+  memcpy(VECT_VAR(result, T1, W, N),			\
+	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  /* We need all variants in 64 bits, but there is no 64x2 variant */
+#define DECL_ALL_VSTX_LANE(X)			\
+  DECL_VSTX_LANE(int, 8, 8, X);			\
+  DECL_VSTX_LANE(int, 16, 4, X);		\
+  DECL_VSTX_LANE(int, 32, 2, X);		\
+  DECL_VSTX_LANE(uint, 8, 8, X);		\
+  DECL_VSTX_LANE(uint, 16, 4, X);		\
+  DECL_VSTX_LANE(uint, 32, 2, X);		\
+  DECL_VSTX_LANE(float, 32, 2, X);		\
+  DECL_VSTX_LANE(int, 16, 8, X);		\
+  DECL_VSTX_LANE(int, 32, 4, X);		\
+  DECL_VSTX_LANE(uint, 16, 8, X);		\
+  DECL_VSTX_LANE(uint, 32, 4, X);		\
+  DECL_VSTX_LANE(float, 32, 4, X)
+
+#define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L]
+
+  /* Use the same lanes regardless of the size of the array (X), for
+     simplicity */
+#define TEST_ALL_VSTX_LANE(X)			\
+  TEST_VSTX_LANE(, int, s, 8, 8, X, 7);		\
+  TEST_VSTX_LANE(, int, s, 16, 4, X, 2);	\
+  TEST_VSTX_LANE(, int, s, 32, 2, X, 0);	\
+  TEST_VSTX_LANE(, float, f, 32, 2, X, 0);	\
+  TEST_VSTX_LANE(, uint, u, 8, 8, X, 4);	\
+  TEST_VSTX_LANE(, uint, u, 16, 4, X, 3);	\
+  TEST_VSTX_LANE(, uint, u, 32, 2, X, 1);	\
+  TEST_VSTX_LANE(q, int, s, 16, 8, X, 6);	\
+  TEST_VSTX_LANE(q, int, s, 32, 4, X, 2);	\
+  TEST_VSTX_LANE(q, uint, u, 16, 8, X, 5);	\
+  TEST_VSTX_LANE(q, uint, u, 32, 4, X, 0);	\
+  TEST_VSTX_LANE(q, float, f, 32, 4, X, 2)
+
+#define TEST_ALL_EXTRA_CHUNKS(X, Y)		\
+  TEST_EXTRA_CHUNK(int, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 16, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 32, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
+
+  /* Declare the temporary buffers / variables */
+  DECL_ALL_VSTX_LANE(2);
+  DECL_ALL_VSTX_LANE(3);
+  DECL_ALL_VSTX_LANE(4);
+
+  /* Define dummy input arrays, large enough for x4 vectors */
+  DUMMY_ARRAY(buffer_src, int, 8, 8, 4);
+  DUMMY_ARRAY(buffer_src, int, 16, 4, 4);
+  DUMMY_ARRAY(buffer_src, int, 32, 2, 4);
+  DUMMY_ARRAY(buffer_src, uint, 8, 8, 4);
+  DUMMY_ARRAY(buffer_src, uint, 16, 4, 4);
+  DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
+  DUMMY_ARRAY(buffer_src, float, 32, 2, 4);
+  DUMMY_ARRAY(buffer_src, int, 16, 8, 4);
+  DUMMY_ARRAY(buffer_src, int, 32, 4, 4);
+  DUMMY_ARRAY(buffer_src, uint, 16, 8, 4);
+  DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
+  DUMMY_ARRAY(buffer_src, float, 32, 4, 4);
+
+  /* Check vst2_lane/vst2q_lane */
+  clean_results ();
+#define TEST_MSG "VST2_LANE/VST2Q_LANE"
+  TEST_ALL_VSTX_LANE(2);
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+
+  /* Check vst3_lane/vst3q_lane */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST3_LANE/VST3Q_LANE"
+  TEST_ALL_VSTX_LANE(3);
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+  dump_results_hex2 (TEST_MSG, " chunk 2");
+
+  /* Check vst4_lane/vst4q_lane */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST4_LANE/VST4Q_LANE"
+  TEST_ALL_VSTX_LANE(4);
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+  dump_results_hex2 (TEST_MSG, " chunk 2");
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+  dump_results_hex2 (TEST_MSG, " chunk 3");
+}
diff --git a/ref_vsub.c b/ref_vsub.c
new file mode 100644
index 0000000..1f81cd0
--- /dev/null
+++ b/ref_vsub.c
@@ -0,0 +1,60 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vsub
+#define TEST_MSG "VSUB/VSUBQ"
+
+/* Extra tests for functions requiring floating-point types */
+void exec_vsub_f32(void);
+#define EXTRA_TESTS exec_vsub_f32
+
+#include "ref_v_binary_op.c"
+
+void exec_vsub_f32(void)
+{
+  int i;
+
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  TEST_VDUP(vector, , float, f, 32, 2, 2.3);
+  TEST_VDUP(vector, q, float, f, 32, 4, 3.4);
+
+  TEST_VDUP(vector2, , float, f, 32, 2, 4.5);
+  TEST_VDUP(vector2, q, float, f, 32, 4, 5.6);
+
+  TEST_BINARY_OP(INSN_NAME, , float, f, 32, 2);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+
+  fprintf(ref_file, "\nfloat32:\n");
+  DUMP_FP(TEST_MSG, float, 32, 2, PRIx32);
+  DUMP_FP(TEST_MSG, float, 32, 4, PRIx32);
+}
diff --git a/ref_vsubhn.c b/ref_vsubhn.c
new file mode 100644
index 0000000..ef68d60
--- /dev/null
+++ b/ref_vsubhn.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vsubhn
+#define TEST_MSG "VSUBHN"
+
+#include "ref_vaddhn.c"
diff --git a/ref_vsubl.c b/ref_vsubl.c
new file mode 100644
index 0000000..093ab53
--- /dev/null
+++ b/ref_vsubl.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vsubl
+#define TEST_MSG "VSUBL"
+
+#include "ref_vaddl.c"
diff --git a/ref_vsubw.c b/ref_vsubw.c
new file mode 100644
index 0000000..1df07d1
--- /dev/null
+++ b/ref_vsubw.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vsubw
+#define TEST_MSG "VSUBW"
+
+#include "ref_vaddw.c"
diff --git a/ref_vtbX.c b/ref_vtbX.c
new file mode 100644
index 0000000..26f0b3d
--- /dev/null
+++ b/ref_vtbX.c
@@ -0,0 +1,213 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+void exec_vtbX (void)
+{
+  int i;
+
+  /* In this case, input variables are arrays of vectors */
+#define DECL_VTBX(T1, W, N, X)						\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(table_vector, T1, W, N, X)
+
+  /* The vtbl1 variant is different from vtbl{2,3,4} because it takes a
+     vector as 1st param, instead of an array of vectors */
+#define TEST_VTBL1(T1, T2, W, N)			\
+  VECT_VAR(table_vector, T1, W, N) =			\
+    vld1##_##T2##W((T1##W##_t *)lookup_table);		\
+							\
+  VECT_VAR(vector_res, T1, W, N) =			\
+    vtbl1_##T2##W(VECT_VAR(table_vector, T1, W, N),	\
+		  VECT_VAR(vector, T1, W, N));		\
+  vst1_##T2##W(VECT_VAR(result, T1, W, N),		\
+	       VECT_VAR(vector_res, T1, W, N));
+
+#define TEST_VTBLX(T1, T2, W, N, X)					\
+  VECT_ARRAY_VAR(table_vector, T1, W, N, X) =				\
+    vld##X##_##T2##W((T1##W##_t *)lookup_table);			\
+									\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vtbl##X##_##T2##W(VECT_ARRAY_VAR(table_vector, T1, W, N, X),	\
+		      VECT_VAR(vector, T1, W, N));			\
+  vst1_##T2##W(VECT_VAR(result, T1, W, N),				\
+	       VECT_VAR(vector_res, T1, W, N));
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+
+  /* We need to define a lookup table */
+  uint8_t lookup_table[32];
+
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+
+  /* For vtbl1 */
+  DECL_VARIABLE(table_vector, int, 8, 8);
+  DECL_VARIABLE(table_vector, uint, 8, 8);
+
+  /* For vtbx* */
+  DECL_VARIABLE(default_vector, int, 8, 8);
+  DECL_VARIABLE(default_vector, uint, 8, 8);
+
+  /* We need only 8 bits variants */
+#define DECL_ALL_VTBLX(X)			\
+  DECL_VTBX(int, 8, 8, X);			\
+  DECL_VTBX(uint, 8, 8, X)
+
+#define TEST_ALL_VTBL1()			\
+  TEST_VTBL1(int, s, 8, 8);			\
+  TEST_VTBL1(uint, u, 8, 8)
+
+#define TEST_ALL_VTBLX(X)			\
+  TEST_VTBLX(int, s, 8, 8, X);			\
+  TEST_VTBLX(uint, u, 8, 8, X)
+
+  /* Declare the temporary buffers / variables */
+  DECL_ALL_VTBLX(2);
+  DECL_ALL_VTBLX(3);
+  DECL_ALL_VTBLX(4);
+
+  /* Fill the lookup table */
+  for (i=0; i<32; i++) {
+    lookup_table[i] = i-15;
+  }
+
+  /* Choose init value arbitrarily, will be used as table index */
+  TEST_VDUP(vector, , int, s, 8, 8, 1);
+  TEST_VDUP(vector, , uint, u, 8, 8, 2);
+
+  /* To ensure code coverage of lib, add some indexes larger than 8,16 and 32 */
+  /* except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9) */
+  TEST_VSET_LANE(vector, , int, s, 8, 8, 0, 10);
+  TEST_VSET_LANE(vector, , int, s, 8, 8, 4, 20);
+  TEST_VSET_LANE(vector, , int, s, 8, 8, 5, 40);
+  TEST_VSET_LANE(vector, , uint, u, 8, 8, 0, 10);
+  TEST_VSET_LANE(vector, , uint, u, 8, 8, 4, 20);
+  TEST_VSET_LANE(vector, , uint, u, 8, 8, 5, 40);
+
+
+  /* Check vtbl1 */
+  clean_results ();
+#define TEST_MSG "VTBL1"
+  TEST_ALL_VTBL1();
+  dump_results_hex (TEST_MSG);
+
+  /* Check vtbl2 */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VTBL2"
+  TEST_ALL_VTBLX(2);
+  dump_results_hex (TEST_MSG);
+
+  /* Check vtbl3 */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VTBL3"
+  TEST_ALL_VTBLX(3);
+  dump_results_hex (TEST_MSG);
+
+  /* Check vtbl4 */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VTBL4"
+  TEST_ALL_VTBLX(4);
+  dump_results_hex (TEST_MSG);
+
+
+  /* Now test VTBX */
+
+  /* The vtbx1 variant is different from vtbx{2,3,4} because it takes a
+     vector as 1st param, instead of an array of vectors */
+#define TEST_VTBX1(T1, T2, W, N)			\
+  VECT_VAR(table_vector, T1, W, N) =			\
+    vld1##_##T2##W((T1##W##_t *)lookup_table);		\
+							\
+  VECT_VAR(vector_res, T1, W, N) =			\
+    vtbx1_##T2##W(VECT_VAR(default_vector, T1, W, N),	\
+		  VECT_VAR(table_vector, T1, W, N),	\
+		  VECT_VAR(vector, T1, W, N));		\
+  vst1_##T2##W(VECT_VAR(result, T1, W, N),		\
+	       VECT_VAR(vector_res, T1, W, N));
+
+#define TEST_VTBXX(T1, T2, W, N, X)					\
+  VECT_ARRAY_VAR(table_vector, T1, W, N, X) =				\
+    vld##X##_##T2##W((T1##W##_t *)lookup_table);			\
+									\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vtbx##X##_##T2##W(VECT_VAR(default_vector, T1, W, N),		\
+		      VECT_ARRAY_VAR(table_vector, T1, W, N, X),	\
+		      VECT_VAR(vector, T1, W, N));			\
+  vst1_##T2##W(VECT_VAR(result, T1, W, N),				\
+	       VECT_VAR(vector_res, T1, W, N));
+
+#define TEST_ALL_VTBX1()			\
+  TEST_VTBX1(int, s, 8, 8);			\
+  TEST_VTBX1(uint, u, 8, 8)
+
+#define TEST_ALL_VTBXX(X)			\
+  TEST_VTBXX(int, s, 8, 8, X);			\
+  TEST_VTBXX(uint, u, 8, 8, X)
+
+  /* Choose init value arbitrarily, will be used as default value */
+  TEST_VDUP(default_vector, , int, s, 8, 8, 0x33);
+  TEST_VDUP(default_vector, , uint, u, 8, 8, 0xCC);
+
+  /* Check vtbx1 */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VTBX1"
+  TEST_ALL_VTBX1();
+  dump_results_hex (TEST_MSG);
+
+  /* Check vtbx2 */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VTBX2"
+  TEST_ALL_VTBXX(2);
+  dump_results_hex (TEST_MSG);
+
+  /* Check vtbx3 */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VTBX3"
+  TEST_ALL_VTBXX(3);
+  dump_results_hex (TEST_MSG);
+
+  /* Check vtbx4 */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VTBX4"
+  TEST_ALL_VTBXX(4);
+  dump_results_hex (TEST_MSG);
+}
diff --git a/ref_vtrn.c b/ref_vtrn.c
new file mode 100644
index 0000000..4ac3e84
--- /dev/null
+++ b/ref_vtrn.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vtrn
+#define TEST_MSG "VTRN/VTRNQ"
+
+#include "ref_vuzp.c"
diff --git a/ref_vtst.c b/ref_vtst.c
new file mode 100644
index 0000000..2aaeaae
--- /dev/null
+++ b/ref_vtst.c
@@ -0,0 +1,99 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vtst
+#define TEST_MSG "VTST/VTSTQ"
+#endif
+
+/* Can't use the standard ref_v_binary_op.c template because vtst has
+   no 64 bits variant, and outputs are always of uint type */
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x,x), then store the result.  */
+#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, uint, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_u##W(VECT_VAR(result, uint, W, N),				\
+		 VECT_VAR(vector_res, uint, W, N))
+
+#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N)				\
+  TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)				\
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement  */
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_UNSIGNED_VARIANTS(vector_res);
+
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+  /* Choose init value arbitrarily, will be used as comparison value */
+  TEST_VDUP(vector2, , int, s, 8, 8, 15);
+  TEST_VDUP(vector2, , int, s, 16, 4, 5);
+  TEST_VDUP(vector2, , int, s, 32, 2, 1);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 15);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 5);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 1);
+  TEST_VDUP(vector2, q, int, s, 8, 16, 15);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 5);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 1);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 15);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 5);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 1);
+
+#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR, T1, T2)	\
+  MACRO(VAR, , T1, T2, 8, 8);				\
+  MACRO(VAR, , T1, T2, 16, 4);				\
+  MACRO(VAR, , T1, T2, 32, 2);				\
+  MACRO(VAR, q, T1, T2, 8, 16);				\
+  MACRO(VAR, q, T1, T2, 16, 8);				\
+  MACRO(VAR, q, T1, T2, 32, 4)
+
+  /* Split the test, as both signed and unsigned variants output their
+     result in an unsigned form (thus the same output variable is used
+     in these tests) */
+  TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME, int, s);
+  dump_results_hex2 (TEST_MSG, " (signed input)");
+
+  TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME, uint, u);
+  dump_results_hex2 (TEST_MSG, " (unsigned input)");
+}
diff --git a/ref_vuzp.c b/ref_vuzp.c
new file mode 100644
index 0000000..aa5854d
--- /dev/null
+++ b/ref_vuzp.c
@@ -0,0 +1,155 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vuzp
+#define TEST_MSG "VUZP/VUZPQ"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* In this case, output variables are arrays of vectors */
+#define DECL_VUZP(T1, W, N)						\
+  VECT_ARRAY_TYPE(T1, W, N, 2) VECT_ARRAY_VAR(result_vec, T1, W, N, 2);	\
+  VECT_VAR_DECL(result_bis, T1, W, N)[2 * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+#define TEST_VUZP(INSN, Q, T1, T2, W, N)				\
+  VECT_ARRAY_VAR(result_vec, T1, W, N, 2) =				\
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst2##Q##_##T2##W(VECT_VAR(result_bis, T1, W, N),			\
+		    VECT_ARRAY_VAR(result_vec, T1, W, N, 2));		\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis, T1, W, N),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* Overwrite "result" with the contents of "result_bis"[X] */
+#define TEST_EXTRA_CHUNK(T1, W, N, X)					\
+  memcpy(VECT_VAR(result, T1, W, N), &(VECT_VAR(result_bis, T1, W, N)[X]), \
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* With ARM RVCT, we need to declare variables before any executable
+     statement */
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+
+  /* We don't need 64 bits variants */
+#define DECL_ALL_VUZP()				\
+  DECL_VUZP(int, 8, 8);				\
+  DECL_VUZP(int, 16, 4);			\
+  DECL_VUZP(int, 32, 2);			\
+  DECL_VUZP(uint, 8, 8);			\
+  DECL_VUZP(uint, 16, 4);			\
+  DECL_VUZP(uint, 32, 2);			\
+  DECL_VUZP(float, 32, 2);			\
+  DECL_VUZP(int, 8, 16);			\
+  DECL_VUZP(int, 16, 8);			\
+  DECL_VUZP(int, 32, 4);			\
+  DECL_VUZP(uint, 8, 16);			\
+  DECL_VUZP(uint, 16, 8);			\
+  DECL_VUZP(uint, 32, 4);			\
+  DECL_VUZP(float, 32, 4)
+
+  DECL_ALL_VUZP();
+
+  /* Initialize input "vector" from "buffer"  */
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector1, buffer);
+  TEST_VLOAD(vector1, buffer, , float, f, 32, 2);
+  TEST_VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  /* Choose arbitrary initialization values */
+  TEST_VDUP(vector2, , int, s, 8, 8, 0x11);
+  TEST_VDUP(vector2, , int, s, 16, 4, 0x22);
+  TEST_VDUP(vector2, , int, s, 32, 2, 0x33);
+  TEST_VDUP(vector2, , uint, u, 8, 8, 0x55);
+  TEST_VDUP(vector2, , uint, u, 16, 4, 0x66);
+  TEST_VDUP(vector2, , uint, u, 32, 2, 0x77);
+  TEST_VDUP(vector2, , float, f, 32, 2, 33.6);
+
+  TEST_VDUP(vector2, q, int, s, 8, 16, 0x11);
+  TEST_VDUP(vector2, q, int, s, 16, 8, 0x22);
+  TEST_VDUP(vector2, q, int, s, 32, 4, 0x33);
+  TEST_VDUP(vector2, q, uint, u, 8, 16, 0x55);
+  TEST_VDUP(vector2, q, uint, u, 16, 8, 0x66);
+  TEST_VDUP(vector2, q, uint, u, 32, 4, 0x77);
+  TEST_VDUP(vector2, q, float, f, 32, 4, 33.8);
+
+#define TEST_ALL_VUZP(INSN)			\
+  TEST_VUZP(INSN, , int, s, 8, 8);		\
+  TEST_VUZP(INSN, , int, s, 16, 4);		\
+  TEST_VUZP(INSN, , int, s, 32, 2);		\
+  TEST_VUZP(INSN, , uint, u, 8, 8);		\
+  TEST_VUZP(INSN, , uint, u, 16, 4);		\
+  TEST_VUZP(INSN, , uint, u, 32, 2);		\
+  TEST_VUZP(INSN, , float, f, 32, 2);		\
+  TEST_VUZP(INSN, q, int, s, 8, 16);		\
+  TEST_VUZP(INSN, q, int, s, 16, 8);		\
+  TEST_VUZP(INSN, q, int, s, 32, 4);		\
+  TEST_VUZP(INSN, q, uint, u, 8, 16);		\
+  TEST_VUZP(INSN, q, uint, u, 16, 8);		\
+  TEST_VUZP(INSN, q, uint, u, 32, 4);		\
+  TEST_VUZP(INSN, q, float, f, 32, 4)
+
+#define TEST_ALL_EXTRA_CHUNKS()			\
+  TEST_EXTRA_CHUNK(int, 8, 8, 1);		\
+  TEST_EXTRA_CHUNK(int, 16, 4, 1);		\
+  TEST_EXTRA_CHUNK(int, 32, 2, 1);		\
+  TEST_EXTRA_CHUNK(uint, 8, 8, 1);		\
+  TEST_EXTRA_CHUNK(uint, 16, 4, 1);		\
+  TEST_EXTRA_CHUNK(uint, 32, 2, 1);		\
+  TEST_EXTRA_CHUNK(float, 32, 2, 1);		\
+  TEST_EXTRA_CHUNK(int, 8, 16, 1);		\
+  TEST_EXTRA_CHUNK(int, 16, 8, 1);		\
+  TEST_EXTRA_CHUNK(int, 32, 4, 1);		\
+  TEST_EXTRA_CHUNK(uint, 8, 16, 1);		\
+  TEST_EXTRA_CHUNK(uint, 16, 8, 1);		\
+  TEST_EXTRA_CHUNK(uint, 32, 4, 1);		\
+  TEST_EXTRA_CHUNK(float, 32, 4, 1)
+
+  /* Check vuzp/vuzpq */
+  clean_results ();
+  TEST_ALL_VUZP(INSN_NAME);
+
+  dump_results_hex2 (TEST_MSG, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS();
+  dump_results_hex2 (TEST_MSG, " chunk 1");
+}
diff --git a/ref_vzip.c b/ref_vzip.c
new file mode 100644
index 0000000..8c0b363
--- /dev/null
+++ b/ref_vzip.c
@@ -0,0 +1,29 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#define INSN_NAME vzip
+#define TEST_MSG "VZIP/VZIPQ"
+
+#include "ref_vuzp.c"
diff --git a/retarget.c b/retarget.c
new file mode 100644
index 0000000..7fb7fdf
--- /dev/null
+++ b/retarget.c
@@ -0,0 +1,42 @@
+/*
+** Copyright (C) ARM Limited, 2005. All rights reserved.
+*/
+
+#include <stdio.h>
+#include <rt_misc.h>
+
+// Heap base from scatter file
+extern int Image$$HEAP$$ZI$$Base;
+//#pragma import(__use_two_region_memory)
+
+extern void core_init(void);
+
+/*
+The functions below are patched onto main.
+*/
+
+extern void $Super$$main(void);
+
+void $Sub$$main(void)
+{
+  core_init();                    // does some extra setup work
+
+  $Super$$main();                 // calls the original function
+}
+
+
+/*
+This function re-implements the C Library semihosted function. The stack pointer
+has aready been set and is passed back to the function, The base of the heap is
+set from the scatter file
+*/
+__value_in_regs struct __initial_stackheap __user_initial_stackheap(
+        unsigned R0, unsigned SP, unsigned R2, unsigned SL)
+{
+    struct __initial_stackheap config;
+
+    config.heap_base = (unsigned int)&Image$$HEAP$$ZI$$Base; // placed by scatterfile
+    config.stack_base = SP;   // inherit SP from the execution environment
+
+    return config;
+}
diff --git a/scatter.scat b/scatter.scat
new file mode 100644
index 0000000..1f03141
--- /dev/null
+++ b/scatter.scat
@@ -0,0 +1,29 @@
+;;  Copyright ARM Ltd 2005. All rights reserved.
+
+ROM_LOAD 0x2000
+{
+
+    ROM_EXEC 0x2000
+    {
+        init.o (CortexA8, +First)     ; Create Translation Table
+        * (InRoot$$Sections)          ; this section must be in a root region
+    }
+
+    I-TCM 0x30000 FIXED ; 0x1E000     ; built at 0x100 to avoid vector space
+    {                                 ; assumes 32K I-TCM
+
+        * (+RO)                       ; any remaining code inc C lib.
+    }
+
+    D-TCM 0x200000 0x40000            ; 8 Kb of D-TCM used for RW/ZI
+    {
+        * (+RW,+ZI)
+    }
+
+    HEAP 0x4E0000 EMPTY 0x100000 {}   ; 8Kb Heap follows direcly after RW/ZI
+
+    STACK 0x300000 EMPTY -0x8000 {}   ; 32KB Stack, starts after DTCM block.
+
+    TTB 0x20000 EMPTY 0x4000 {}       ; place translation table at 0x28000, 16Kb required
+
+}
diff --git a/stm-arm-neon-ref.h b/stm-arm-neon-ref.h
new file mode 100644
index 0000000..ae24127
--- /dev/null
+++ b/stm-arm-neon-ref.h
@@ -0,0 +1,438 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifndef _STM_ARM_NEON_REF_H_
+#define _STM_ARM_NEON_REF_H_
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <string.h>
+
+#define xSTR(X) #X
+#define STR(X) xSTR(X)
+
+#define xNAME1(V,T) V ## _ ##  T
+#define xNAME(V,T) xNAME1(V,T)
+
+#define VAR(V,T,W) xNAME(V,T##W)
+#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
+
+#define VECT_NAME(T, W, N) T##W##x##N
+#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
+#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
+#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
+
+#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
+#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
+#define VECT_VAR_DECL_INIT(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N) INIT_TAB(T##W##_t)
+#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
+
+#define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L))
+
+static int result_idx = 0;
+#define DUMP(MSG,T,W,N,FMT)						\
+  fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
+	  STR(VECT_VAR(result, T, W, N)));				\
+  for(i=0; i<N ; i++)							\
+    {									\
+      fprintf(ref_file, "%" FMT ", ", VECT_VAR(result, T, W, N)[i]);	\
+    }									\
+  fprintf(ref_file, " }\n");
+
+#define DUMP_FP(MSG,T,W,N,FMT)						\
+  fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
+	  STR(VECT_VAR(result, T, W, N)));				\
+  for(i=0; i<N ; i++)							\
+    {									\
+      union fp_operand {						\
+	uint##W##_t i;							\
+	float##W##_t f;							\
+      } tmp;								\
+      tmp.f = VECT_VAR(result, T, W, N)[i];				\
+      fprintf(ref_file, "%" FMT " %a %g, ", tmp.i, tmp.f, tmp.f);	\
+    }									\
+  fprintf(ref_file, " }\n");
+
+#define CLEAN_PATTERN_8  0x33
+#define CLEAN_PATTERN_16 0x3333
+#define CLEAN_PATTERN_32 0x33333333
+#define CLEAN_PATTERN_64 0x3333333333333333
+
+#define CLEAN(VAR,T,W,N)						\
+  memset(VECT_VAR(VAR, T, W, N),					\
+	 CLEAN_PATTERN_8,						\
+	 sizeof(VECT_VAR(VAR, T, W, N)));
+
+#define CHECK_INIT(VAR,Q,T1,T2,W,N)					\
+  {									\
+    ARRAY(check_result, T1, W, N);					\
+    int i;								\
+									\
+    vst1##Q##_##T2##W(VECT_VAR(check_result, T1, W, N),			\
+		      VECT_VAR(VAR, T1, W, N));				\
+    for(i=0; i<N ; i++)							\
+      {									\
+	/*if (VECT_VAR(check_result, T1, W, N)[i] == CLEAN_PATTERN_##W)*/ { \
+	  fprintf(stdout, "%s:%d: %s[%d] unintialized! %#x\n",		\
+		  __FUNCTION__,	__LINE__,				\
+		  STR(VECT_VAR(VAR, T1, W, N)), i,			\
+		  VECT_VAR(check_result, T1, W, N)[i]);			\
+	}								\
+      }									\
+  }
+
+/* Generic declarations: */
+extern FILE* log_file;
+extern FILE* ref_file;
+
+/* Sample initialization vectors */
+#define INIT_TAB(T) [] = { (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, \
+			   (T)-10, (T)-9, (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, \
+			   (T)-3, (T)-2, (T)-1, (T)0, (T)1, (T)2, (T)3, (T)4, \
+			   (T)5, (T)6, (T)7, (T)8, (T)9, (T)10, (T)11, (T)12, \
+			   (T)13, (T)14, (T)15 }
+
+/* Input buffers, 1 of each size */
+static VECT_VAR_DECL_INIT(buffer, int, 8, 8);
+static VECT_VAR_DECL_INIT(buffer, int, 16, 4);
+static VECT_VAR_DECL_INIT(buffer, int, 32, 2);
+static VECT_VAR_DECL_INIT(buffer, int, 64, 1);
+static VECT_VAR_DECL_INIT(buffer, uint, 8, 8);
+static VECT_VAR_DECL_INIT(buffer, uint, 16, 4);
+static VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
+static VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
+static VECT_VAR_DECL_INIT(buffer, float, 32, 2);
+static VECT_VAR_DECL_INIT(buffer, int, 8, 16);
+static VECT_VAR_DECL_INIT(buffer, int, 16, 8);
+static VECT_VAR_DECL_INIT(buffer, int, 32, 4);
+static VECT_VAR_DECL_INIT(buffer, int, 64, 2);
+static VECT_VAR_DECL_INIT(buffer, uint, 8, 16);
+static VECT_VAR_DECL_INIT(buffer, uint, 16, 8);
+static VECT_VAR_DECL_INIT(buffer, uint, 32, 4);
+static VECT_VAR_DECL_INIT(buffer, uint, 64, 2);
+static VECT_VAR_DECL_INIT(buffer, float, 32, 4);
+
+/* Output buffers, 1 of each size */
+static ARRAY(result, int, 8, 8);
+static ARRAY(result, int, 16, 4);
+static ARRAY(result, int, 32, 2);
+static ARRAY(result, int, 64, 1);
+static ARRAY(result, uint, 8, 8);
+static ARRAY(result, uint, 16, 4);
+static ARRAY(result, uint, 32, 2);
+static ARRAY(result, uint, 64, 1);
+static ARRAY(result, float, 32, 2);
+static ARRAY(result, int, 8, 16);
+static ARRAY(result, int, 16, 8);
+static ARRAY(result, int, 32, 4);
+static ARRAY(result, int, 64, 2);
+static ARRAY(result, uint, 8, 16);
+static ARRAY(result, uint, 16, 8);
+static ARRAY(result, uint, 32, 4);
+static ARRAY(result, uint, 64, 2);
+static ARRAY(result, float, 32, 4);
+
+/* Dump results (generic function) */
+static void dump_results (char *test_name)
+{
+  int i;
+
+  fprintf(ref_file, "\n%s output:\n", test_name);
+
+  DUMP(test_name, int, 8, 8, PRId8);
+  DUMP(test_name, int, 16, 4, PRId16);
+  DUMP(test_name, int, 32, 2, PRId32);
+  DUMP(test_name, int, 64, 1, PRId64);
+  DUMP(test_name, uint, 8, 8, PRIu8);
+  DUMP(test_name, uint, 16, 4, PRIu16);
+  DUMP(test_name, uint, 32, 2, PRIu32);
+  DUMP(test_name, uint, 64, 1, PRIu64);
+  DUMP_FP(test_name, float, 32, 2, PRIx32);
+
+  DUMP(test_name, int, 8, 16, PRId8);
+  DUMP(test_name, int, 16, 8, PRId16);
+  DUMP(test_name, int, 32, 4, PRId32);
+  DUMP(test_name, int, 64, 2, PRId64);
+  DUMP(test_name, uint, 8, 16, PRIu8);
+  DUMP(test_name, uint, 16, 8, PRIu16);
+  DUMP(test_name, uint, 32, 4, PRIu32);
+  DUMP(test_name, uint, 64, 2, PRIu64);
+  DUMP_FP(test_name, float, 32, 4, PRIx32);
+}
+
+/* Dump results in hex (generic function) */
+static void dump_results_hex2 (char *test_name, char* comment)
+{
+  int i;
+
+  fprintf(ref_file, "\n%s%s output:\n", test_name, comment);
+
+  DUMP(test_name, int, 8, 8, PRIx8);
+  DUMP(test_name, int, 16, 4, PRIx16);
+  DUMP(test_name, int, 32, 2, PRIx32);
+  DUMP(test_name, int, 64, 1, PRIx64);
+  DUMP(test_name, uint, 8, 8, PRIx8);
+  DUMP(test_name, uint, 16, 4, PRIx16);
+  DUMP(test_name, uint, 32, 2, PRIx32);
+  DUMP(test_name, uint, 64, 1, PRIx64);
+  DUMP_FP(test_name, float, 32, 2, PRIx32);
+
+  DUMP(test_name, int, 8, 16, PRIx8);
+  DUMP(test_name, int, 16, 8, PRIx16);
+  DUMP(test_name, int, 32, 4, PRIx32);
+  DUMP(test_name, int, 64, 2, PRIx64);
+  DUMP(test_name, uint, 8, 16, PRIx8);
+  DUMP(test_name, uint, 16, 8, PRIx16);
+  DUMP(test_name, uint, 32, 4, PRIx32);
+  DUMP(test_name, uint, 64, 2, PRIx64);
+  DUMP_FP(test_name, float, 32, 4, PRIx32);
+}
+
+static void dump_results_hex (char *test_name)
+{
+  dump_results_hex2(test_name, "");
+}
+
+#ifndef STM_ARM_NEON_MODELS
+
+#ifndef __BIG_ENDIAN
+
+typedef union {
+  struct {
+    int _xxx:27;
+    int QC:1;
+    int V:1;
+    int C:1;
+    int Z:1;
+    int N:1;
+  } b;
+  unsigned int word;
+} _ARM_FPSCR;
+
+#else /* __BIG_ENDIAN */
+
+typedef union {
+  struct {
+    int N:1;
+    int Z:1;
+    int C:1;
+    int V:1;
+    int QC:1;
+    int _dnm:27;
+  } b;
+  unsigned int word;
+} _ARM_FPSCR;
+
+#endif /* __BIG_ENDIAN */
+
+#ifdef __ARMCC_VERSION
+register _ARM_FPSCR _afpscr_for_qc __asm("fpscr");
+#define Neon_Overflow _afpscr_for_qc.b.QC
+#else
+/* Fake declaration because GCC/ARM does not know this register */
+extern int errno;
+#define Neon_Overflow errno
+#endif
+
+#endif /* STM_ARM_NEON_MODELS */
+
+static void dump_neon_overflow(char* msg, char *name)
+{
+  fprintf(ref_file, "%s:%d:%s Neon overflow %d\n", msg, result_idx++,
+	  name, Neon_Overflow);
+}
+
+/* Clean output buffers before execution */
+static void clean_results (void)
+{
+  result_idx = 0;
+  CLEAN(result, int, 8, 8);
+  CLEAN(result, int, 16, 4);
+  CLEAN(result, int, 32, 2);
+  CLEAN(result, int, 64, 1);
+  CLEAN(result, uint, 8, 8);
+  CLEAN(result, uint, 16, 4);
+  CLEAN(result, uint, 32, 2);
+  CLEAN(result, uint, 64, 1);
+  CLEAN(result, float, 32, 2);
+
+  CLEAN(result, int, 8, 16);
+  CLEAN(result, int, 16, 8);
+  CLEAN(result, int, 32, 4);
+  CLEAN(result, int, 64, 2);
+  CLEAN(result, uint, 8, 16);
+  CLEAN(result, uint, 16, 8);
+  CLEAN(result, uint, 32, 4);
+  CLEAN(result, uint, 64, 2);
+  CLEAN(result, float, 32, 4);
+}
+
+
+/* Helpers to declare variables of various types  */
+#define DECL_VARIABLE(VAR, T1, W, N)		\
+  VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
+
+#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE(VAR, int, 8, 8);			\
+  DECL_VARIABLE(VAR, int, 16, 4);			\
+  DECL_VARIABLE(VAR, int, 32, 2);			\
+  DECL_VARIABLE(VAR, int, 64, 1)
+
+#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE(VAR, uint, 8, 8);			\
+  DECL_VARIABLE(VAR, uint, 16, 4);			\
+  DECL_VARIABLE(VAR, uint, 32, 2);			\
+  DECL_VARIABLE(VAR, uint, 64, 1)
+
+#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE(VAR, int, 8, 16);			\
+  DECL_VARIABLE(VAR, int, 16, 8);			\
+  DECL_VARIABLE(VAR, int, 32, 4);			\
+  DECL_VARIABLE(VAR, int, 64, 2)
+
+#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE(VAR, uint, 8, 16);			\
+  DECL_VARIABLE(VAR, uint, 16, 8);			\
+  DECL_VARIABLE(VAR, uint, 32, 4);			\
+  DECL_VARIABLE(VAR, uint, 64, 2)
+
+#define DECL_VARIABLE_64BITS_VARIANTS(VAR)	\
+  DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE(VAR, float, 32, 2)
+
+#define DECL_VARIABLE_128BITS_VARIANTS(VAR)	\
+  DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+#define DECL_VARIABLE_ALL_VARIANTS(VAR)		\
+  DECL_VARIABLE_64BITS_VARIANTS(VAR);		\
+  DECL_VARIABLE_128BITS_VARIANTS(VAR)
+
+#define DECL_VARIABLE_SIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)
+
+#define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)
+
+/* Helpers to initialize vectors */
+#define TEST_VDUP(VAR, Q, T1, T2, W, N, V)		\
+  VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
+
+#define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V)			\
+  VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V,			\
+						   VECT_VAR(VAR, T1, W, N), \
+						   L)
+
+/* We need to load initial values first, so rely on VLD1 */
+#define TEST_VLOAD(VAR, BUF, Q, T1, T2, W, N)				\
+  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
+
+/* Helpers for macros with 1 constant and 5 variable arguments */
+#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
+  MACRO(VAR, , int, s, 8, 8);					\
+  MACRO(VAR, , int, s, 16, 4);					\
+  MACRO(VAR, , int, s, 32, 2);					\
+  MACRO(VAR, , int, s, 64, 1)
+
+#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)	\
+  MACRO(VAR, , uint, u, 8, 8);					\
+  MACRO(VAR, , uint, u, 16, 4);					\
+  MACRO(VAR, , uint, u, 32, 2);					\
+  MACRO(VAR, , uint, u, 64, 1)
+
+#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
+  MACRO(VAR, q, int, s, 8, 16);					\
+  MACRO(VAR, q, int, s, 16, 8);					\
+  MACRO(VAR, q, int, s, 32, 4);					\
+  MACRO(VAR, q, int, s, 64, 2)
+
+#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR)	\
+  MACRO(VAR, q, uint, u, 8, 16);				\
+  MACRO(VAR, q, uint, u, 16, 8);				\
+  MACRO(VAR, q, uint, u, 32, 4);				\
+  MACRO(VAR, q, uint, u, 64, 2)
+
+#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR)	\
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
+  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)	\
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
+  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR)	\
+  TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR);	\
+  TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
+
+/* Helpers for macros with 2 constant and 5 variable arguments */
+#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  MACRO(VAR1, VAR2, , int, s, 8, 8);					\
+  MACRO(VAR1, VAR2, , int, s, 16, 4);					\
+  MACRO(VAR1, VAR2, , int, s, 32, 2);					\
+  MACRO(VAR1, VAR2 , , int, s, 64, 1)
+
+#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  MACRO(VAR1, VAR2, , uint, u, 8, 8);					\
+  MACRO(VAR1, VAR2, , uint, u, 16, 4);					\
+  MACRO(VAR1, VAR2, , uint, u, 32, 2);					\
+  MACRO(VAR1, VAR2, , uint, u, 64, 1)
+
+#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  MACRO(VAR1, VAR2, q, int, s, 8, 16);					\
+  MACRO(VAR1, VAR2, q, int, s, 16, 8);					\
+  MACRO(VAR1, VAR2, q, int, s, 32, 4);					\
+  MACRO(VAR1, VAR2, q, int, s, 64, 2)
+
+#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  MACRO(VAR1, VAR2, q, uint, u, 8, 16);					\
+  MACRO(VAR1, VAR2, q, uint, u, 16, 8);					\
+  MACRO(VAR1, VAR2, q, uint, u, 32, 4);					\
+  MACRO(VAR1, VAR2, q, uint, u, 64, 2)
+
+#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
+
+#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
+
+#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
+
+#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
+
+#endif /* _STM_ARM_NEON_REF_H_ */
author	Christophe Lyon <christophe.lyon@st.com>	2011-01-24 17:37:40 +0100
committer	Christophe Lyon <christophe.lyon@st.com>	2011-01-24 17:37:40 +0100
commit	073831adf9442c019e8d34b18b0c04b1d780a19b (patch)
tree	391f6efd9ceabde2554e9d2c637c9fdafdc9617a
download	platform_external_arm-neon-tests-073831adf9442c019e8d34b18b0c04b1d780a19b.tar.gz platform_external_arm-neon-tests-073831adf9442c019e8d34b18b0c04b1d780a19b.tar.bz2 platform_external_arm-neon-tests-073831adf9442c019e8d34b18b0c04b1d780a19b.zip