summaryrefslogtreecommitdiffstats
path: root/vp8/decoder/arm
diff options
context:
space:
mode:
authorAndreas Huber <andih@google.com>2010-09-15 15:12:42 -0700
committerAndreas Huber <andih@google.com>2010-09-15 15:12:42 -0700
commitf71323e297a928af368937089d3ed71239786f86 (patch)
treedad338caad00af21fd81f0975335c343a91be085 /vp8/decoder/arm
parent8aa17fc40a750935d80b115d89ca9403f42ab211 (diff)
downloadandroid_external_libvpx-f71323e297a928af368937089d3ed71239786f86.tar.gz
android_external_libvpx-f71323e297a928af368937089d3ed71239786f86.tar.bz2
android_external_libvpx-f71323e297a928af368937089d3ed71239786f86.zip
Upgrade to the latest .webm project code.
Change-Id: I33907e0c9ded667e54d31e2f9226c77501731c6c
Diffstat (limited to 'vp8/decoder/arm')
-rw-r--r--vp8/decoder/arm/armv5/dequantize_v5.asm11
-rw-r--r--vp8/decoder/arm/armv6/dboolhuff_v6.asm11
-rw-r--r--vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm218
-rw-r--r--vp8/decoder/arm/armv6/dequant_idct_v6.asm196
-rw-r--r--vp8/decoder/arm/armv6/dequantdcidct_v6.asm202
-rw-r--r--vp8/decoder/arm/armv6/dequantidct_v6.asm183
-rw-r--r--vp8/decoder/arm/armv6/dequantize_v6.asm11
-rw-r--r--vp8/decoder/arm/armv6/idct_blk_v6.c151
-rw-r--r--vp8/decoder/arm/dboolhuff_arm.h6
-rw-r--r--vp8/decoder/arm/dequantize_arm.c11
-rw-r--r--vp8/decoder/arm/dequantize_arm.h59
-rw-r--r--vp8/decoder/arm/detokenize.asm320
-rw-r--r--vp8/decoder/arm/detokenize_arm.h22
-rw-r--r--vp8/decoder/arm/detokenizearm_sjl.c730
-rw-r--r--vp8/decoder/arm/detokenizearm_v6.asm364
-rw-r--r--vp8/decoder/arm/dsystemdependent.c17
-rw-r--r--vp8/decoder/arm/neon/dboolhuff_neon.asm11
-rw-r--r--vp8/decoder/arm/neon/dequant_dc_idct_neon.asm (renamed from vp8/decoder/arm/neon/dequantdcidct_neon.asm)83
-rw-r--r--vp8/decoder/arm/neon/dequant_idct_neon.asm (renamed from vp8/decoder/arm/neon/dequantidct_neon.asm)78
-rw-r--r--vp8/decoder/arm/neon/dequantizeb_neon.asm11
-rw-r--r--vp8/decoder/arm/neon/idct_blk_neon.c151
21 files changed, 1225 insertions, 1621 deletions
diff --git a/vp8/decoder/arm/armv5/dequantize_v5.asm b/vp8/decoder/arm/armv5/dequantize_v5.asm
index eb3f030..de3648a 100644
--- a/vp8/decoder/arm/armv5/dequantize_v5.asm
+++ b/vp8/decoder/arm/armv5/dequantize_v5.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/armv6/dboolhuff_v6.asm b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
index 143e33e..6515804 100644
--- a/vp8/decoder/arm/armv6/dboolhuff_v6.asm
+++ b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
new file mode 100644
index 0000000..6bebda2
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
@@ -0,0 +1,218 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_dequant_dc_idct_add_v6|
+
+ AREA |.text|, CODE, READONLY
+
+;void vp8_dequant_dc_idct_v6(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride, int Dc)
+; r0 = input
+; r1 = dq
+; r2 = pred
+; r3 = dest
+; sp + 36 = pitch ; +4 = 40
+; sp + 40 = stride ; +4 = 44
+; sp + 44 = Dc ; +4 = 48
+
+
+|vp8_dequant_dc_idct_add_v6| PROC
+ stmdb sp!, {r4-r11, lr}
+
+ ldr r6, [sp, #44]
+
+ ldr r4, [r0] ;input
+ ldr r5, [r1], #4 ;dq
+
+ sub sp, sp, #4
+ str r3, [sp]
+
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ mov r12, #3
+
+vp8_dequant_dc_add_loop
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ subs r12, r12, #1
+
+ ldrne r4, [r0, #4]
+ ldrne r5, [r1], #4
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ bne vp8_dequant_dc_add_loop
+
+ sub r0, r0, #32
+ mov r1, r0
+
+; short_idct4x4llm_v6_dual
+ ldr r3, cospi8sqrt2minus1
+ ldr r4, sinpi8sqrt2
+ ldr r6, [r0, #8]
+ mov r5, #2
+vp8_dequant_dc_idct_loop1_v6
+ ldr r12, [r0, #24]
+ ldr r14, [r0, #16]
+ smulwt r9, r3, r6
+ smulwb r7, r3, r6
+ smulwt r10, r4, r6
+ smulwb r8, r4, r6
+ pkhbt r7, r7, r9, lsl #16
+ smulwt r11, r3, r12
+ pkhbt r8, r8, r10, lsl #16
+ uadd16 r6, r6, r7
+ smulwt r7, r4, r12
+ smulwb r9, r3, r12
+ smulwb r10, r4, r12
+ subs r5, r5, #1
+ pkhbt r9, r9, r11, lsl #16
+ ldr r11, [r0], #4
+ pkhbt r10, r10, r7, lsl #16
+ uadd16 r7, r12, r9
+ usub16 r7, r8, r7
+ uadd16 r6, r6, r10
+ uadd16 r10, r11, r14
+ usub16 r8, r11, r14
+ uadd16 r9, r10, r6
+ usub16 r10, r10, r6
+ uadd16 r6, r8, r7
+ usub16 r7, r8, r7
+ str r6, [r1, #8]
+ ldrne r6, [r0, #8]
+ str r7, [r1, #16]
+ str r10, [r1, #24]
+ str r9, [r1], #4
+ bne vp8_dequant_dc_idct_loop1_v6
+
+ mov r5, #2
+ sub r0, r1, #8
+vp8_dequant_dc_idct_loop2_v6
+ ldr r6, [r0], #4
+ ldr r7, [r0], #4
+ ldr r8, [r0], #4
+ ldr r9, [r0], #4
+ smulwt r1, r3, r6
+ smulwt r12, r4, r6
+ smulwt lr, r3, r8
+ smulwt r10, r4, r8
+ pkhbt r11, r8, r6, lsl #16
+ pkhbt r1, lr, r1, lsl #16
+ pkhbt r12, r10, r12, lsl #16
+ pkhtb r6, r6, r8, asr #16
+ uadd16 r6, r1, r6
+ pkhbt lr, r9, r7, lsl #16
+ uadd16 r10, r11, lr
+ usub16 lr, r11, lr
+ pkhtb r8, r7, r9, asr #16
+ subs r5, r5, #1
+ smulwt r1, r3, r8
+ smulwb r7, r3, r8
+ smulwt r11, r4, r8
+ smulwb r9, r4, r8
+ pkhbt r1, r7, r1, lsl #16
+ uadd16 r8, r1, r8
+ pkhbt r11, r9, r11, lsl #16
+ usub16 r1, r12, r8
+ uadd16 r8, r11, r6
+ ldr r9, c0x00040004
+ ldr r12, [sp, #40]
+ uadd16 r6, r10, r8
+ usub16 r7, r10, r8
+ uadd16 r7, r7, r9
+ uadd16 r6, r6, r9
+ uadd16 r10, r14, r1
+ usub16 r1, r14, r1
+ uadd16 r10, r10, r9
+ uadd16 r1, r1, r9
+ ldr r11, [r2], r12
+ mov r8, r7, asr #3
+ pkhtb r9, r8, r10, asr #19
+ mov r8, r1, asr #3
+ pkhtb r8, r8, r6, asr #19
+ uxtb16 lr, r11, ror #8
+ qadd16 r9, r9, lr
+ uxtb16 lr, r11
+ qadd16 r8, r8, lr
+ usat16 r9, #8, r9
+ usat16 r8, #8, r8
+ orr r9, r8, r9, lsl #8
+ ldr r11, [r2], r12
+ ldr lr, [sp]
+ ldr r12, [sp, #44]
+ mov r7, r7, lsl #16
+ mov r1, r1, lsl #16
+ mov r10, r10, lsl #16
+ mov r6, r6, lsl #16
+ mov r7, r7, asr #3
+ pkhtb r7, r7, r10, asr #19
+ mov r1, r1, asr #3
+ pkhtb r1, r1, r6, asr #19
+ uxtb16 r8, r11, ror #8
+ qadd16 r7, r7, r8
+ uxtb16 r8, r11
+ qadd16 r1, r1, r8
+ usat16 r7, #8, r7
+ usat16 r1, #8, r1
+ orr r1, r1, r7, lsl #8
+ str r9, [lr], r12
+ str r1, [lr], r12
+ str lr, [sp]
+ bne vp8_dequant_dc_idct_loop2_v6
+
+; vpx_memset
+ sub r0, r0, #32
+ add sp, sp, #4
+
+ mov r12, #0
+ str r12, [r0]
+ str r12, [r0, #4]
+ str r12, [r0, #8]
+ str r12, [r0, #12]
+ str r12, [r0, #16]
+ str r12, [r0, #20]
+ str r12, [r0, #24]
+ str r12, [r0, #28]
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ; |vp8_dequant_dc_idct_add_v6|
+
+; Constant Pool
+cospi8sqrt2minus1 DCD 0x00004E7B
+sinpi8sqrt2 DCD 0x00008A8C
+c0x00040004 DCD 0x00040004
+
+ END
diff --git a/vp8/decoder/arm/armv6/dequant_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
new file mode 100644
index 0000000..47b671c
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
@@ -0,0 +1,196 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+ EXPORT |vp8_dequant_idct_add_v6|
+
+ AREA |.text|, CODE, READONLY
+;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride)
+; r0 = input
+; r1 = dq
+; r2 = pred
+; r3 = dest
+; sp + 36 = pitch ; +4 = 40
+; sp + 40 = stride ; +4 = 44
+
+
+|vp8_dequant_idct_add_v6| PROC
+ stmdb sp!, {r4-r11, lr}
+
+ ldr r4, [r0] ;input
+ ldr r5, [r1], #4 ;dq
+
+ sub sp, sp, #4
+ str r3, [sp]
+
+ mov r12, #4
+
+vp8_dequant_add_loop
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ subs r12, r12, #1
+
+ ldrne r4, [r0, #4]
+ ldrne r5, [r1], #4
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ bne vp8_dequant_add_loop
+
+ sub r0, r0, #32
+ mov r1, r0
+
+; short_idct4x4llm_v6_dual
+ ldr r3, cospi8sqrt2minus1
+ ldr r4, sinpi8sqrt2
+ ldr r6, [r0, #8]
+ mov r5, #2
+vp8_dequant_idct_loop1_v6
+ ldr r12, [r0, #24]
+ ldr r14, [r0, #16]
+ smulwt r9, r3, r6
+ smulwb r7, r3, r6
+ smulwt r10, r4, r6
+ smulwb r8, r4, r6
+ pkhbt r7, r7, r9, lsl #16
+ smulwt r11, r3, r12
+ pkhbt r8, r8, r10, lsl #16
+ uadd16 r6, r6, r7
+ smulwt r7, r4, r12
+ smulwb r9, r3, r12
+ smulwb r10, r4, r12
+ subs r5, r5, #1
+ pkhbt r9, r9, r11, lsl #16
+ ldr r11, [r0], #4
+ pkhbt r10, r10, r7, lsl #16
+ uadd16 r7, r12, r9
+ usub16 r7, r8, r7
+ uadd16 r6, r6, r10
+ uadd16 r10, r11, r14
+ usub16 r8, r11, r14
+ uadd16 r9, r10, r6
+ usub16 r10, r10, r6
+ uadd16 r6, r8, r7
+ usub16 r7, r8, r7
+ str r6, [r1, #8]
+ ldrne r6, [r0, #8]
+ str r7, [r1, #16]
+ str r10, [r1, #24]
+ str r9, [r1], #4
+ bne vp8_dequant_idct_loop1_v6
+
+ mov r5, #2
+ sub r0, r1, #8
+vp8_dequant_idct_loop2_v6
+ ldr r6, [r0], #4
+ ldr r7, [r0], #4
+ ldr r8, [r0], #4
+ ldr r9, [r0], #4
+ smulwt r1, r3, r6
+ smulwt r12, r4, r6
+ smulwt lr, r3, r8
+ smulwt r10, r4, r8
+ pkhbt r11, r8, r6, lsl #16
+ pkhbt r1, lr, r1, lsl #16
+ pkhbt r12, r10, r12, lsl #16
+ pkhtb r6, r6, r8, asr #16
+ uadd16 r6, r1, r6
+ pkhbt lr, r9, r7, lsl #16
+ uadd16 r10, r11, lr
+ usub16 lr, r11, lr
+ pkhtb r8, r7, r9, asr #16
+ subs r5, r5, #1
+ smulwt r1, r3, r8
+ smulwb r7, r3, r8
+ smulwt r11, r4, r8
+ smulwb r9, r4, r8
+ pkhbt r1, r7, r1, lsl #16
+ uadd16 r8, r1, r8
+ pkhbt r11, r9, r11, lsl #16
+ usub16 r1, r12, r8
+ uadd16 r8, r11, r6
+ ldr r9, c0x00040004
+ ldr r12, [sp, #40]
+ uadd16 r6, r10, r8
+ usub16 r7, r10, r8
+ uadd16 r7, r7, r9
+ uadd16 r6, r6, r9
+ uadd16 r10, r14, r1
+ usub16 r1, r14, r1
+ uadd16 r10, r10, r9
+ uadd16 r1, r1, r9
+ ldr r11, [r2], r12
+ mov r8, r7, asr #3
+ pkhtb r9, r8, r10, asr #19
+ mov r8, r1, asr #3
+ pkhtb r8, r8, r6, asr #19
+ uxtb16 lr, r11, ror #8
+ qadd16 r9, r9, lr
+ uxtb16 lr, r11
+ qadd16 r8, r8, lr
+ usat16 r9, #8, r9
+ usat16 r8, #8, r8
+ orr r9, r8, r9, lsl #8
+ ldr r11, [r2], r12
+ ldr lr, [sp]
+ ldr r12, [sp, #44]
+ mov r7, r7, lsl #16
+ mov r1, r1, lsl #16
+ mov r10, r10, lsl #16
+ mov r6, r6, lsl #16
+ mov r7, r7, asr #3
+ pkhtb r7, r7, r10, asr #19
+ mov r1, r1, asr #3
+ pkhtb r1, r1, r6, asr #19
+ uxtb16 r8, r11, ror #8
+ qadd16 r7, r7, r8
+ uxtb16 r8, r11
+ qadd16 r1, r1, r8
+ usat16 r7, #8, r7
+ usat16 r1, #8, r1
+ orr r1, r1, r7, lsl #8
+ str r9, [lr], r12
+ str r1, [lr], r12
+ str lr, [sp]
+ bne vp8_dequant_idct_loop2_v6
+
+; vpx_memset
+ sub r0, r0, #32
+ add sp, sp, #4
+
+ mov r12, #0
+ str r12, [r0]
+ str r12, [r0, #4]
+ str r12, [r0, #8]
+ str r12, [r0, #12]
+ str r12, [r0, #16]
+ str r12, [r0, #20]
+ str r12, [r0, #24]
+ str r12, [r0, #28]
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ; |vp8_dequant_idct_add_v6|
+
+; Constant Pool
+cospi8sqrt2minus1 DCD 0x00004E7B
+sinpi8sqrt2 DCD 0x00008A8C
+c0x00040004 DCD 0x00040004
+
+ END
diff --git a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm b/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
deleted file mode 100644
index 3daa9b3..0000000
--- a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
+++ /dev/null
@@ -1,202 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequant_dc_idct_v6|
- ; ARM
- ; REQUIRE8
- ; PRESERVE8
-
- AREA |.text|, CODE, READONLY ; name this block of code
-;void vp8_dequant_dc_idct_v6(short *input, short *dq, short *output, int pitch,int Dc)
-|vp8_dequant_dc_idct_v6| PROC
- stmdb sp!, {r4-r11, lr}
-
- ldr r6, [sp, #36] ;load Dc
-
- ldr r4, [r0] ;input
- ldr r5, [r1], #4 ;dq
-
- sub sp, sp, #4
- str r0, [sp]
-
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- mov r12, #3
-
-dequant_dc_idct_loop
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- subs r12, r12, #1
-
- ldrne r4, [r0, #4]
- ldrne r5, [r1], #4
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- bne dequant_dc_idct_loop
-
- sub r0, r0, #32
- mov r1, r2
- mov r2, r3
-
-; short_idct4x4llm_v6_dual
-
- mov r3, #0x00004E00 ; cos
- orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
- mov r4, #0x00008A00 ; sin
- orr r4, r4, #0x0000008C ; sinpi8sqrt2
- mov r5, #0x2 ; i=2 i
-loop1_dual_11
- ldr r6, [r0, #(4*2)] ; i5 | i4 5|4
- ldr r12, [r0, #(12*2)] ; i13 | i12 13|12
- ldr r14, [r0, #(8*2)] ; i9 | i8 9|8
-
- smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
- smulwb r7, r3, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 4c
- smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
- smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 4s
- pkhbt r7, r7, r9, lsl #16 ; 5c | 4c
- smulwt r11, r3, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 13c
- pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
- uadd16 r6, r6, r7 ; 5c+5 | 4c+4
- smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 13s
- smulwb r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 12c
- smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 12s
- subs r5, r5, #0x1 ; i-- --
- pkhbt r9, r9, r11, lsl #16 ; 13c | 12c
- ldr r11, [r0], #0x4 ; i1 | i0 ++ 1|0
- pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
- uadd16 r7, r12, r9 ; 13c+13 | 12c+12
- usub16 r7, r8, r7 ; c c
- uadd16 r6, r6, r10 ; d d
- uadd16 r10, r11, r14 ; a a
- usub16 r8, r11, r14 ; b b
- uadd16 r9, r10, r6 ; a+d a+d
- usub16 r10, r10, r6 ; a-d a-d
- uadd16 r6, r8, r7 ; b+c b+c
- usub16 r7, r8, r7 ; b-c b-c
- str r6, [r1, r2] ; o5 | o4
- add r6, r2, r2 ; pitch * 2 p2
- str r7, [r1, r6] ; o9 | o8
- add r6, r6, r2 ; pitch * 3 p3
- str r10, [r1, r6] ; o13 | o12
- str r9, [r1], #0x4 ; o1 | o0 ++
- bne loop1_dual_11 ;
- mov r5, #0x2 ; i=2 i
- sub r0, r1, #8 ; reset input/output i/o
-loop2_dual_22
- ldr r6, [r0, r2] ; i5 | i4 5|4
- ldr r1, [r0] ; i1 | i0 1|0
- ldr r12, [r0, #0x4] ; i3 | i2 3|2
- add r14, r2, #0x4 ; pitch + 2 p+2
- ldr r14, [r0, r14] ; i7 | i6 7|6
- smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
- smulwt r7, r3, r1 ; (ip[1] * cospi8sqrt2minus1) >> 16 1c
- smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
- smulwt r8, r4, r1 ; (ip[1] * sinpi8sqrt2) >> 16 1s
- pkhbt r11, r6, r1, lsl #16 ; i0 | i4 0|4
- pkhbt r7, r9, r7, lsl #16 ; 1c | 5c
- pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 © tc1
- pkhtb r1, r1, r6, asr #16 ; i1 | i5 1|5
- uadd16 r1, r7, r1 ; 1c+1 | 5c+5 = temp2 (d) td2
- pkhbt r9, r14, r12, lsl #16 ; i2 | i6 2|6
- uadd16 r10, r11, r9 ; a a
- usub16 r9, r11, r9 ; b b
- pkhtb r6, r12, r14, asr #16 ; i3 | i7 3|7
- subs r5, r5, #0x1 ; i-- --
- smulwt r7, r3, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 3c
- smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 3s
- smulwb r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 7c
- smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 7s
-
- pkhbt r7, r12, r7, lsl #16 ; 3c | 7c
- pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 (d) td1
- uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 (c) tc2
- usub16 r12, r8, r6 ; c (o1 | o5) c
- uadd16 r6, r11, r1 ; d (o3 | o7) d
- uadd16 r7, r10, r6 ; a+d a+d
- mov r8, #0x4 ; set up 4's 4
- orr r8, r8, #0x40000 ; 4|4
- usub16 r6, r10, r6 ; a-d a-d
- uadd16 r6, r6, r8 ; a-d+4 3|7
- uadd16 r7, r7, r8 ; a+d+4 0|4
- uadd16 r10, r9, r12 ; b+c b+c
- usub16 r1, r9, r12 ; b-c b-c
- uadd16 r10, r10, r8 ; b+c+4 1|5
- uadd16 r1, r1, r8 ; b-c+4 2|6
- mov r8, r10, asr #19 ; o1 >> 3
- strh r8, [r0, #2] ; o1
- mov r8, r1, asr #19 ; o2 >> 3
- strh r8, [r0, #4] ; o2
- mov r8, r6, asr #19 ; o3 >> 3
- strh r8, [r0, #6] ; o3
- mov r8, r7, asr #19 ; o0 >> 3
- strh r8, [r0], r2 ; o0 +p
- sxth r10, r10 ;
- mov r8, r10, asr #3 ; o5 >> 3
- strh r8, [r0, #2] ; o5
- sxth r1, r1 ;
- mov r8, r1, asr #3 ; o6 >> 3
- strh r8, [r0, #4] ; o6
- sxth r6, r6 ;
- mov r8, r6, asr #3 ; o7 >> 3
- strh r8, [r0, #6] ; o7
- sxth r7, r7 ;
- mov r8, r7, asr #3 ; o4 >> 3
- strh r8, [r0], r2 ; o4 +p
-;;;;; subs r5, r5, #0x1 ; i-- --
- bne loop2_dual_22 ;
-
-
-;vpx_memset
- ldr r0, [sp]
- add sp, sp, #4
-
- mov r12, #0
- str r12, [r0]
- str r12, [r0, #4]
- str r12, [r0, #8]
- str r12, [r0, #12]
- str r12, [r0, #16]
- str r12, [r0, #20]
- str r12, [r0, #24]
- str r12, [r0, #28]
-
- ldmia sp!, {r4 - r11, pc} ; replace vars, return restore
-
- ENDP ;|vp8_dequant_dc_idct_v68|
-
- END
diff --git a/vp8/decoder/arm/armv6/dequantidct_v6.asm b/vp8/decoder/arm/armv6/dequantidct_v6.asm
deleted file mode 100644
index 61bb48d..0000000
--- a/vp8/decoder/arm/armv6/dequantidct_v6.asm
+++ /dev/null
@@ -1,183 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequant_idct_v6|
- ; ARM
- ; REQUIRE8
- ; PRESERVE8
-
- AREA |.text|, CODE, READONLY ; name this block of code
-;void vp8_dequant_idct_v6(short *input, short *dq, short *output, int pitch)
-|vp8_dequant_idct_v6| PROC
- stmdb sp!, {r4-r11, lr}
-
- ldr r4, [r0] ;input
- ldr r5, [r1], #4 ;dq
-
- sub sp, sp, #4
- str r0, [sp]
-
- mov r12, #4
-
-dequant_idct_loop
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- subs r12, r12, #1
-
- ldrne r4, [r0, #4]
- ldrne r5, [r1], #4
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- bne dequant_idct_loop
-
- sub r0, r0, #32
- mov r1, r2
- mov r2, r3
-
-; short_idct4x4llm_v6_dual
-
- mov r3, #0x00004E00 ; cos
- orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
- mov r4, #0x00008A00 ; sin
- orr r4, r4, #0x0000008C ; sinpi8sqrt2
- mov r5, #0x2 ; i=2 i
-loop1_dual_1
- ldr r6, [r0, #(4*2)] ; i5 | i4 5|4
- ldr r12, [r0, #(12*2)] ; i13 | i12 13|12
- ldr r14, [r0, #(8*2)] ; i9 | i8 9|8
-
- smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
- smulwb r7, r3, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 4c
- smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
- smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 4s
- pkhbt r7, r7, r9, lsl #16 ; 5c | 4c
- smulwt r11, r3, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 13c
- pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
- uadd16 r6, r6, r7 ; 5c+5 | 4c+4
- smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 13s
- smulwb r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 12c
- smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 12s
- subs r5, r5, #0x1 ; i-- --
- pkhbt r9, r9, r11, lsl #16 ; 13c | 12c
- ldr r11, [r0], #0x4 ; i1 | i0 ++ 1|0
- pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
- uadd16 r7, r12, r9 ; 13c+13 | 12c+12
- usub16 r7, r8, r7 ; c c
- uadd16 r6, r6, r10 ; d d
- uadd16 r10, r11, r14 ; a a
- usub16 r8, r11, r14 ; b b
- uadd16 r9, r10, r6 ; a+d a+d
- usub16 r10, r10, r6 ; a-d a-d
- uadd16 r6, r8, r7 ; b+c b+c
- usub16 r7, r8, r7 ; b-c b-c
- str r6, [r1, r2] ; o5 | o4
- add r6, r2, r2 ; pitch * 2 p2
- str r7, [r1, r6] ; o9 | o8
- add r6, r6, r2 ; pitch * 3 p3
- str r10, [r1, r6] ; o13 | o12
- str r9, [r1], #0x4 ; o1 | o0 ++
- bne loop1_dual_1 ;
- mov r5, #0x2 ; i=2 i
- sub r0, r1, #8 ; reset input/output i/o
-loop2_dual_2
- ldr r6, [r0, r2] ; i5 | i4 5|4
- ldr r1, [r0] ; i1 | i0 1|0
- ldr r12, [r0, #0x4] ; i3 | i2 3|2
- add r14, r2, #0x4 ; pitch + 2 p+2
- ldr r14, [r0, r14] ; i7 | i6 7|6
- smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
- smulwt r7, r3, r1 ; (ip[1] * cospi8sqrt2minus1) >> 16 1c
- smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
- smulwt r8, r4, r1 ; (ip[1] * sinpi8sqrt2) >> 16 1s
- pkhbt r11, r6, r1, lsl #16 ; i0 | i4 0|4
- pkhbt r7, r9, r7, lsl #16 ; 1c | 5c
- pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 © tc1
- pkhtb r1, r1, r6, asr #16 ; i1 | i5 1|5
- uadd16 r1, r7, r1 ; 1c+1 | 5c+5 = temp2 (d) td2
- pkhbt r9, r14, r12, lsl #16 ; i2 | i6 2|6
- uadd16 r10, r11, r9 ; a a
- usub16 r9, r11, r9 ; b b
- pkhtb r6, r12, r14, asr #16 ; i3 | i7 3|7
- subs r5, r5, #0x1 ; i-- --
- smulwt r7, r3, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 3c
- smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 3s
- smulwb r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 7c
- smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 7s
-
- pkhbt r7, r12, r7, lsl #16 ; 3c | 7c
- pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 (d) td1
- uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 (c) tc2
- usub16 r12, r8, r6 ; c (o1 | o5) c
- uadd16 r6, r11, r1 ; d (o3 | o7) d
- uadd16 r7, r10, r6 ; a+d a+d
- mov r8, #0x4 ; set up 4's 4
- orr r8, r8, #0x40000 ; 4|4
- usub16 r6, r10, r6 ; a-d a-d
- uadd16 r6, r6, r8 ; a-d+4 3|7
- uadd16 r7, r7, r8 ; a+d+4 0|4
- uadd16 r10, r9, r12 ; b+c b+c
- usub16 r1, r9, r12 ; b-c b-c
- uadd16 r10, r10, r8 ; b+c+4 1|5
- uadd16 r1, r1, r8 ; b-c+4 2|6
- mov r8, r10, asr #19 ; o1 >> 3
- strh r8, [r0, #2] ; o1
- mov r8, r1, asr #19 ; o2 >> 3
- strh r8, [r0, #4] ; o2
- mov r8, r6, asr #19 ; o3 >> 3
- strh r8, [r0, #6] ; o3
- mov r8, r7, asr #19 ; o0 >> 3
- strh r8, [r0], r2 ; o0 +p
- sxth r10, r10 ;
- mov r8, r10, asr #3 ; o5 >> 3
- strh r8, [r0, #2] ; o5
- sxth r1, r1 ;
- mov r8, r1, asr #3 ; o6 >> 3
- strh r8, [r0, #4] ; o6
- sxth r6, r6 ;
- mov r8, r6, asr #3 ; o7 >> 3
- strh r8, [r0, #6] ; o7
- sxth r7, r7 ;
- mov r8, r7, asr #3 ; o4 >> 3
- strh r8, [r0], r2 ; o4 +p
-;;;;; subs r5, r5, #0x1 ; i-- --
- bne loop2_dual_2 ;
- ;
-
-;vpx_memset
- ldr r0, [sp]
- add sp, sp, #4
-
- mov r12, #0
- str r12, [r0]
- str r12, [r0, #4]
- str r12, [r0, #8]
- str r12, [r0, #12]
- str r12, [r0, #16]
- str r12, [r0, #20]
- str r12, [r0, #24]
- str r12, [r0, #28]
-
- ldmia sp!, {r4 - r11, pc} ; replace vars, return restore
-
- ENDP ;|vp8_dequant_idct_v6|
-
- END
diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/decoder/arm/armv6/dequantize_v6.asm
index 95e3859..72f7e0e 100644
--- a/vp8/decoder/arm/armv6/dequantize_v6.asm
+++ b/vp8/decoder/arm/armv6/dequantize_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/decoder/arm/armv6/idct_blk_v6.c
new file mode 100644
index 0000000..3c7bc50
--- /dev/null
+++ b/vp8/decoder/arm/armv6/idct_blk_v6.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_v6
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs, short *dc)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_dc_idct_add_v6 (q, dq, pre, dst, 16, stride, dc[0]);
+ else
+ vp8_dc_only_idct_add_v6 (dc[0], pre, dst, 16, stride);
+
+ if (eobs[1] > 1)
+ vp8_dequant_dc_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+ else
+ vp8_dc_only_idct_add_v6 (dc[1], pre+4, dst+4, 16, stride);
+
+ if (eobs[2] > 1)
+ vp8_dequant_dc_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
+ else
+ vp8_dc_only_idct_add_v6 (dc[2], pre+8, dst+8, 16, stride);
+
+ if (eobs[3] > 1)
+ vp8_dequant_dc_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
+ else
+ vp8_dc_only_idct_add_v6 (dc[3], pre+12, dst+12, 16, stride);
+
+ q += 64;
+ dc += 4;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_y_block_v6
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_v6 (q, dq, pre, dst, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dst, 16, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dst+4, 16, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ if (eobs[2] > 1)
+ vp8_dequant_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[32]*dq[0], pre+8, dst+8, 16, stride);
+ ((int *)(q+32))[0] = 0;
+ }
+
+ if (eobs[3] > 1)
+ vp8_dequant_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[48]*dq[0], pre+12, dst+12, 16, stride);
+ ((int *)(q+48))[0] = 0;
+ }
+
+ q += 64;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_v6
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_v6 (q, dq, pre, dstu, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstu, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstu+4, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstu += 4*stride;
+ eobs += 2;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_v6 (q, dq, pre, dstv, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstv, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstv+4, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstv += 4*stride;
+ eobs += 2;
+ }
+}
diff --git a/vp8/decoder/arm/dboolhuff_arm.h b/vp8/decoder/arm/dboolhuff_arm.h
index 495004f..d2ebc71 100644
--- a/vp8/decoder/arm/dboolhuff_arm.h
+++ b/vp8/decoder/arm/dboolhuff_arm.h
@@ -16,9 +16,6 @@
#undef vp8_dbool_start
#define vp8_dbool_start vp8dx_start_decode_v6
-#undef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_v6
-
#undef vp8_dbool_fill
#define vp8_dbool_fill vp8_bool_decoder_fill_v6
@@ -33,9 +30,6 @@
#undef vp8_dbool_start
#define vp8_dbool_start vp8dx_start_decode_neon
-#undef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_neon
-
#undef vp8_dbool_fill
#define vp8_dbool_fill vp8_bool_decoder_fill_neon
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c
index 54006a9..3926587 100644
--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/decoder/arm/dequantize_arm.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index c8a61a4..40151e0 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,32 +14,56 @@
#if HAVE_ARMV6
extern prototype_dequant_block(vp8_dequantize_b_v6);
-extern prototype_dequant_idct(vp8_dequant_idct_v6);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_v6);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_v6);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_v6);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_v6
-#undef vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_v6
+#undef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_v6
-#undef vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_v6
+#undef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_v6
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_v6
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_v6
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
#endif
#if HAVE_ARMV7
extern prototype_dequant_block(vp8_dequantize_b_neon);
-extern prototype_dequant_idct(vp8_dequant_idct_neon);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_neon);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_neon);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_neon);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neon);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_neon
-#undef vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_neon
+#undef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_neon
+
+#undef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_neon
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_neon
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_neon
-#undef vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_neon
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
#endif
#endif
diff --git a/vp8/decoder/arm/detokenize.asm b/vp8/decoder/arm/detokenize.asm
new file mode 100644
index 0000000..45e068a
--- /dev/null
+++ b/vp8/decoder/arm/detokenize.asm
@@ -0,0 +1,320 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_decode_mb_tokens_v6|
+
+ AREA |.text|, CODE, READONLY ; name this block of code
+
+ INCLUDE vpx_asm_offsets.asm
+
+l_qcoeff EQU 0
+l_i EQU 4
+l_type EQU 8
+l_stop EQU 12
+l_c EQU 16
+l_l_ptr EQU 20
+l_a_ptr EQU 24
+l_bc EQU 28
+l_coef_ptr EQU 32
+l_stacksize EQU 64
+
+
+;; constant offsets -- these should be created at build time
+c_block2above_offset EQU 25
+c_entropy_nodes EQU 11
+c_dct_eob_token EQU 11
+
+|vp8_decode_mb_tokens_v6| PROC
+ stmdb sp!, {r4 - r11, lr}
+ sub sp, sp, #l_stacksize
+ mov r7, r1 ; type
+ mov r9, r0 ; detoken
+
+ ldr r1, [r9, #detok_current_bc]
+ ldr r0, [r9, #detok_qcoeff_start_ptr]
+ mov r11, #0 ; i
+ mov r3, #16 ; stop
+
+ cmp r7, #1 ; type ?= 1
+ addeq r11, r11, #24 ; i = 24
+ addeq r3, r3, #8 ; stop = 24
+ addeq r0, r0, #3, 24 ; qcoefptr += 24*16
+
+ str r0, [sp, #l_qcoeff]
+ str r11, [sp, #l_i]
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+ str r1, [sp, #l_bc]
+
+ add lr, r9, r7, lsl #2 ; detoken + type*4
+
+ ldr r8, [r1, #bool_decoder_user_buffer]
+
+ ldr r10, [lr, #detok_coef_probs]
+ ldr r5, [r1, #bool_decoder_count]
+ ldr r6, [r1, #bool_decoder_range]
+ ldr r4, [r1, #bool_decoder_value]
+
+ str r10, [sp, #l_coef_ptr]
+
+BLOCK_LOOP
+ ldr r3, [r9, #detok_ptr_block2leftabove]
+ ldr r1, [r9, #detok_L]
+ ldr r2, [r9, #detok_A]
+ ldrb r12, [r3, r11]! ; block2left[i]
+ ldrb r3, [r3, #c_block2above_offset]; block2above[i]
+
+ cmp r7, #0 ; c = !type
+ moveq r7, #1
+ movne r7, #0
+
+ ldrb r0, [r1, r12]! ; *(L += block2left[i])
+ ldrb r3, [r2, r3]! ; *(A += block2above[i])
+ mov lr, #c_entropy_nodes ; ENTROPY_NODES = 11
+
+; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
+ cmp r0, #0 ; *l ?= 0
+ movne r0, #1
+ cmp r3, #0 ; *a ?= 0
+ addne r0, r0, #1 ; t
+
+ str r1, [sp, #l_l_ptr] ; save &l
+ str r2, [sp, #l_a_ptr] ; save &a
+ smlabb r0, r0, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES)
+ mov r1, #0 ; t = 0
+ str r7, [sp, #l_c]
+
+ ;align 4
+COEFF_LOOP
+ ldr r3, [r9, #detok_ptr_coef_bands_x]
+ ldr lr, [r9, #detok_coef_tree_ptr]
+ ;STALL
+ ldrb r3, [r3, r7] ; coef_bands_x[c]
+ ;STALL
+ ;STALL
+ add r0, r0, r3 ; Prob += coef_bands_x[c]
+
+get_token_loop
+ ldrb r2, [r0, +r1, asr #1] ; Prob[t >> 1]
+ mov r3, r6, lsl #8 ; range << 8
+ sub r3, r3, #256 ; (range << 8) - (1 << 8)
+ mov r10, #1 ; 1
+
+ smlawb r2, r3, r2, r10 ; split = 1 + (((range-1) * probability) >> 8)
+
+ ldrb r12, [r8] ; load cx data byte in stall slot : r8 = bufptr
+ ;++
+
+ subs r3, r4, r2, lsl #24 ; value-(split<<24): used later to calculate shift for NORMALIZE
+ addhs r1, r1, #1 ; t += 1
+ movhs r4, r3 ; value -= bigsplit (split << 24)
+ subhs r2, r6, r2 ; range -= split
+ ; movlo r6, r2 ; range = split
+
+ ldrsb r1, [lr, r1] ; t = onyx_coef_tree_ptr[t]
+
+; NORMALIZE
+ clz r3, r2 ; vp8dx_bitreader_norm[range] + 24
+ sub r3, r3, #24 ; vp8dx_bitreader_norm[range]
+ subs r5, r5, r3 ; count -= shift
+ mov r6, r2, lsl r3 ; range <<= shift
+ mov r4, r4, lsl r3 ; value <<= shift
+
+; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
+ addle r5, r5, #8 ; count += 8
+ rsble r3, r5, #24 ; 24 - count
+ addle r8, r8, #1 ; bufptr++
+ orrle r4, r4, r12, lsl r3 ; value |= *bufptr << shift + 16
+
+ cmp r1, #0 ; t ?= 0
+ bgt get_token_loop ; while (t > 0)
+
+ cmn r1, #c_dct_eob_token ; if(t == -DCT_EOB_TOKEN)
+ beq END_OF_BLOCK ; break
+
+ rsb lr, r1, #0 ; v = -t;
+
+ cmp lr, #4 ; if(v > FOUR_TOKEN)
+ ble SKIP_EXTRABITS
+
+ ldr r3, [r9, #detok_teb_base_ptr]
+ mov r11, #1 ; 1 in split = 1 + ... nope, v+= 1 << bits_count
+ add r7, r3, lr, lsl #4 ; detok_teb_base_ptr + (v << 4)
+
+ ldrsh lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
+ ldrsh r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length
+
+extrabits_loop
+ add r3, r0, r7 ; &teb_ptr->Probs[bits_count]
+
+ ldrb r2, [r3, #4] ; probability. why +4?
+ mov r3, r6, lsl #8 ; range << 8
+ sub r3, r3, #256 ; range << 8 + 1 << 8
+
+ smlawb r2, r3, r2, r11 ; split = 1 + (((range-1) * probability) >> 8)
+
+ ldrb r12, [r8] ; *bufptr
+ ;++
+
+ subs r10, r4, r2, lsl #24 ; value - (split<<24)
+ movhs r4, r10 ; value = value - (split << 24)
+ subhs r2, r6, r2 ; range = range - split
+ addhs lr, lr, r11, lsl r0 ; v += ((UINT16)1<<bits_count)
+
+; NORMALIZE
+ clz r3, r2 ; shift - leading zeros in split
+ sub r3, r3, #24 ; don't count first 3 bytes
+ subs r5, r5, r3 ; count -= shift
+ mov r6, r2, lsl r3 ; range = range << shift
+ mov r4, r4, lsl r3 ; value <<= shift
+
+ addle r5, r5, #8 ; count += BR_COUNT
+ addle r8, r8, #1 ; bufptr++
+ rsble r3, r5, #24 ; BR_COUNT - count
+ orrle r4, r4, r12, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
+
+ subs r0, r0, #1 ; bits_count --
+ bpl extrabits_loop
+
+
+SKIP_EXTRABITS
+ ldr r11, [sp, #l_qcoeff]
+ ldr r0, [sp, #l_coef_ptr] ; Prob = coef_probs
+
+ cmp r1, #0 ; check for nonzero token - if (t)
+ beq SKIP_EOB_CHECK ; if t is zero, we will skip the eob table chec
+
+ add r3, r6, #1 ; range + 1
+ mov r2, r3, lsr #1 ; split = (range + 1) >> 1
+
+ subs r3, r4, r2, lsl #24 ; value - (split<<24)
+ movhs r4, r3 ; value -= (split << 24)
+ subhs r2, r6, r2 ; range -= split
+ mvnhs r3, lr ; -v
+ addhs lr, r3, #1 ; v = (v ^ -1) + 1
+
+; NORMALIZE
+ clz r3, r2 ; leading 0s in split
+ sub r3, r3, #24 ; shift
+ subs r5, r5, r3 ; count -= shift
+ mov r6, r2, lsl r3 ; range <<= shift
+ mov r4, r4, lsl r3 ; value <<= shift
+ ldrleb r2, [r8], #1 ; *(bufptr++)
+ addle r5, r5, #8 ; count += 8
+ rsble r3, r5, #24 ; BR_COUNT - count
+ orrle r4, r4, r2, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
+
+ add r0, r0, #11 ; Prob += ENTROPY_NODES (11)
+
+ cmn r1, #1 ; t < -ONE_TOKEN
+
+ addlt r0, r0, #11 ; Prob += ENTROPY_NODES (11)
+
+ mvn r1, #1 ; t = -1 ???? C is -2
+
+SKIP_EOB_CHECK
+ ldr r7, [sp, #l_c] ; c
+ ldr r3, [r9, #detok_scan]
+ add r1, r1, #2 ; t+= 2
+ cmp r7, #15 ; c should will be one higher
+
+ ldr r3, [r3, +r7, lsl #2] ; scan[c] this needs pre-inc c value
+ add r7, r7, #1 ; c++
+ add r3, r11, r3, lsl #1 ; qcoeff + scan[c]
+
+ str r7, [sp, #l_c] ; store c
+ strh lr, [r3] ; qcoef_ptr[scan[c]] = v
+
+ blt COEFF_LOOP
+
+ sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c
+
+END_OF_BLOCK
+ ldr r3, [sp, #l_type] ; type
+ ldr r10, [sp, #l_coef_ptr] ; coef_ptr
+ ldr r0, [sp, #l_qcoeff] ; qcoeff
+ ldr r11, [sp, #l_i] ; i
+ ldr r12, [sp, #l_stop] ; stop
+
+ cmp r3, #0 ; type ?= 0
+ moveq r1, #1
+ movne r1, #0
+ add r3, r11, r9 ; detok + i
+
+ cmp r7, r1 ; c ?= !type
+ strb r7, [r3, #detok_eob] ; eob[i] = c
+
+ ldr r7, [sp, #l_l_ptr] ; l
+ ldr r2, [sp, #l_a_ptr] ; a
+ movne r3, #1 ; t
+ moveq r3, #0
+
+ add r0, r0, #32 ; qcoeff += 32 (16 * 2?)
+ add r11, r11, #1 ; i++
+ strb r3, [r7] ; *l = t
+ strb r3, [r2] ; *a = t
+ str r0, [sp, #l_qcoeff] ; qcoeff
+ str r11, [sp, #l_i] ; i
+
+ cmp r11, r12 ; i < stop
+ ldr r7, [sp, #l_type] ; type
+
+ blt BLOCK_LOOP
+
+ cmp r11, #25 ; i ?= 25
+ bne ln2_decode_mb_to
+
+ ldr r12, [r9, #detok_qcoeff_start_ptr]
+ ldr r10, [r9, #detok_coef_probs]
+ mov r7, #0 ; type/i = 0
+ mov r3, #16 ; stop = 16
+ str r12, [sp, #l_qcoeff] ; qcoeff_ptr = qcoeff_start_ptr
+ str r7, [sp, #l_i]
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+
+ str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type=0]
+
+ b BLOCK_LOOP
+
+ln2_decode_mb_to
+ cmp r11, #16 ; i ?= 16
+ bne ln1_decode_mb_to
+
+ mov r10, #detok_coef_probs
+ add r10, r10, #2*4 ; coef_probs[type]
+ ldr r10, [r9, r10] ; detok + detok_coef_probs[type]
+
+ mov r7, #2 ; type = 2
+ mov r3, #24 ; stop = 24
+
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+
+ str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type]
+ b BLOCK_LOOP
+
+ln1_decode_mb_to
+ ldr r2, [sp, #l_bc]
+ mov r0, #0
+ nop
+
+ str r8, [r2, #bool_decoder_user_buffer]
+ str r5, [r2, #bool_decoder_count]
+ str r4, [r2, #bool_decoder_value]
+ str r6, [r2, #bool_decoder_range]
+
+ add sp, sp, #l_stacksize
+ ldmia sp!, {r4 - r11, pc}
+
+ ENDP ; |vp8_decode_mb_tokens_v6|
+
+ END
diff --git a/vp8/decoder/arm/detokenize_arm.h b/vp8/decoder/arm/detokenize_arm.h
new file mode 100644
index 0000000..9bb19b6
--- /dev/null
+++ b/vp8/decoder/arm/detokenize_arm.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef DETOKENIZE_ARM_H
+#define DETOKENIZE_ARM_H
+
+#if HAVE_ARMV6
+#if CONFIG_ARM_ASM_DETOK
+void vp8_init_detokenizer(VP8D_COMP *dx);
+void vp8_decode_mb_tokens_v6(DETOK *detoken, int type);
+#endif
+#endif
+
+#endif
diff --git a/vp8/decoder/arm/detokenizearm_sjl.c b/vp8/decoder/arm/detokenizearm_sjl.c
deleted file mode 100644
index c714452..0000000
--- a/vp8/decoder/arm/detokenizearm_sjl.c
+++ /dev/null
@@ -1,730 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "type_aliases.h"
-#include "blockd.h"
-#include "onyxd_int.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_ports/mem.h"
-
-#define BR_COUNT 8
-#define BOOL_DATA UINT8
-
-#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-//ALIGN16 UINT16 onyx_coef_bands_x[16] = { 0, 1*OCB_X, 2*OCB_X, 3*OCB_X, 6*OCB_X, 4*OCB_X, 5*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 7*OCB_X};
-DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
-
-#define EOB_CONTEXT_NODE 0
-#define ZERO_CONTEXT_NODE 1
-#define ONE_CONTEXT_NODE 2
-#define LOW_VAL_CONTEXT_NODE 3
-#define TWO_CONTEXT_NODE 4
-#define THREE_CONTEXT_NODE 5
-#define HIGH_LOW_CONTEXT_NODE 6
-#define CAT_ONE_CONTEXT_NODE 7
-#define CAT_THREEFOUR_CONTEXT_NODE 8
-#define CAT_THREE_CONTEXT_NODE 9
-#define CAT_FIVE_CONTEXT_NODE 10
-
-
-
-
-DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
-{
- { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ZERO_TOKEN
- { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ONE_TOKEN
- { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //TWO_TOKEN
- { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //THREE_TOKEN
- { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //FOUR_TOKEN
- { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY1
- { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY2
- { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY3
- { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY4
- { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY5
- { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, //DCT_VAL_CATEGORY6
- { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, // EOB TOKEN
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-DECLARE_ALIGNED(16, const UINT8, vp8_block2context_leftabove[25*3]) =
-{
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, //end of vp8_block2context
- 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0, //end of vp8_block2left
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0 //end of vp8_block2above
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
-{
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
- int i;
-
- for (i = 0; i < 24; i++)
- {
-
- a = A[ vp8_block2context[i] ] + vp8_block2above[i];
- l = L[ vp8_block2context[i] ] + vp8_block2left[i];
-
- *a = *l = 0;
- }
-
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
- {
- a = A[Y2CONTEXT] + vp8_block2above[24];
- l = L[Y2CONTEXT] + vp8_block2left[24];
- *a = *l = 0;
- }
-
-
-}
-
-#define ONYXBLOCK2CONTEXT_OFFSET 0
-#define ONYXBLOCK2LEFT_OFFSET 25
-#define ONYXBLOCK2ABOVE_OFFSET 50
-
-DECLARE_ALIGNED(16, const static unsigned char, norm[128]) =
-{
- 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-void init_detokenizer(VP8D_COMP *dx)
-{
- const VP8_COMMON *const oc = & dx->common;
- MACROBLOCKD *x = & dx->mb;
-
- dx->detoken.norm_ptr = (unsigned char *)norm;
- dx->detoken.vp8_coef_tree_ptr = (vp8_tree_index *)vp8_coef_tree;
- dx->detoken.ptr_onyxblock2context_leftabove = (UINT8 *)vp8_block2context_leftabove;
- dx->detoken.ptr_onyx_coef_bands_x = vp8_coef_bands_x;
- dx->detoken.scan = (int *)vp8_default_zig_zag1d;
- dx->detoken.teb_base_ptr = (TOKENEXTRABITS *)vp8d_token_extra_bits2;
-
- dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
-
-
- dx->detoken.coef_probs[0] = (unsigned char *)(oc->fc.coef_probs [0] [ 0 ] [0]);
- dx->detoken.coef_probs[1] = (unsigned char *)(oc->fc.coef_probs [1] [ 0 ] [0]);
- dx->detoken.coef_probs[2] = (unsigned char *)(oc->fc.coef_probs [2] [ 0 ] [0]);
- dx->detoken.coef_probs[3] = (unsigned char *)(oc->fc.coef_probs [3] [ 0 ] [0]);
-
-}
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-
-//shift = norm[range]; \
-// shift = norm_ptr[range]; \
-
-#define NORMALIZE \
- /*if(range < 0x80)*/ \
- { \
- shift = detoken->norm_ptr[range]; \
- range <<= shift; \
- value <<= shift; \
- count -= shift; \
- if(count <= 0) \
- { \
- count += BR_COUNT ; \
- value |= (*bufptr) << (BR_COUNT-count); \
- bufptr++; \
- } \
- }
-#if 1
-#define DECODE_AND_APPLYSIGN(value_to_sign) \
- split = (range + 1) >> 1; \
- if ( (value >> 24) < split ) \
- { \
- range = split; \
- v= value_to_sign; \
- } \
- else \
- { \
- range = range-split; \
- value = value-(split<<24); \
- v = -value_to_sign; \
- } \
- range +=range; \
- value +=value; \
- if (!--count) \
- { \
- count = BR_COUNT; \
- value |= *bufptr; \
- bufptr++; \
- }
-
-#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
- { \
- split = 1 + ((( probability*(range-1) ) )>> 8); \
- if ( (value >> 24) < split ) \
- { \
- range = split; \
- NORMALIZE \
- goto branch; \
- } \
- value -= (split<<24); \
- range = range - split; \
- NORMALIZE \
- }
-
-#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
- { \
- split = 1 + ((( probability*(range-1) ) ) >> 8); \
- if ( (value >> 24) < split ) \
- { \
- range = split; \
- NORMALIZE \
- Prob = coef_probs; \
- ++c; \
- Prob += vp8_coef_bands_x[c]; \
- goto branch; \
- } \
- value -= (split<<24); \
- range = range - split; \
- NORMALIZE \
- }
-
-#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \
- DECODE_AND_APPLYSIGN(val) \
- Prob = coef_probs + (ENTROPY_NODES*2); \
- if(c < 15){\
- qcoeff_ptr [ scan[c] ] = (INT16) v; \
- ++c; \
- goto DO_WHILE; }\
- qcoeff_ptr [ scan[15] ] = (INT16) v; \
- goto BLOCK_FINISHED;
-
-
-#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
- split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
- if(value >= (split<<24))\
- {\
- range = range-split;\
- value = value-(split<<24);\
- val += ((UINT16)1<<bits_count);\
- }\
- else\
- {\
- range = split;\
- }\
- NORMALIZE
-#endif
-
-#if 0
-int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
-{
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
- const VP8_COMMON *const oc = & dx->common;
-
- BOOL_DECODER *bc = x->current_bc;
-
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
- int i;
-
- int eobtotal = 0;
-
- register int count;
-
- BOOL_DATA *bufptr;
- register unsigned int range;
- register unsigned int value;
- const int *scan;
- register unsigned int shift;
- UINT32 split;
- INT16 *qcoeff_ptr;
-
- UINT8 *coef_probs;
- int type;
- int stop;
- INT16 val, bits_count;
- INT16 c;
- INT16 t;
- INT16 v;
- vp8_prob *Prob;
-
- //int *scan;
- type = 3;
- i = 0;
- stop = 16;
-
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
- {
- i = 24;
- stop = 24;
- type = 1;
- qcoeff_ptr = &x->qcoeff[24*16];
- scan = vp8_default_zig_zag1d;
- eobtotal -= 16;
- }
- else
- {
- scan = vp8_default_zig_zag1d;
- qcoeff_ptr = &x->qcoeff[0];
- }
-
- count = bc->count;
- range = bc->range;
- value = bc->value;
- bufptr = &bc->buffer[bc->pos];
-
-
- coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
-
-BLOCK_LOOP:
- a = A[ vp8_block2context[i] ] + vp8_block2above[i];
- l = L[ vp8_block2context[i] ] + vp8_block2left[i];
- c = (INT16)(!type);
-
- VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
- Prob = coef_probs;
- Prob += t * ENTROPY_NODES;
-
-DO_WHILE:
- Prob += vp8_coef_bands_x[c];
- DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
-
-CHECK_0_:
- DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_);
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val;
- bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length;
-
- do
- {
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count);
- bits_count -- ;
- }
- while (bits_count >= 0);
-
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_FIVE_CONTEXT_NODE_0_:
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREEFOUR_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_);
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREE_CONTEXT_NODE_0_:
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-HIGH_LOW_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_);
-
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_ONE_CONTEXT_NODE_0_:
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-LOW_VAL_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4);
-
-THREE_CONTEXT_NODE_0_:
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3);
-
-TWO_CONTEXT_NODE_0_:
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2);
-
-ONE_CONTEXT_NODE_0_:
- DECODE_AND_APPLYSIGN(1);
- Prob = coef_probs + ENTROPY_NODES;
-
- if (c < 15)
- {
- qcoeff_ptr [ scan[c] ] = (INT16) v;
- ++c;
- goto DO_WHILE;
- }
-
- qcoeff_ptr [ scan[15] ] = (INT16) v;
-BLOCK_FINISHED:
- t = ((x->Block[i].eob = c) != !type); // any nonzero data?
- eobtotal += x->Block[i].eob;
- *a = *l = t;
- qcoeff_ptr += 16;
-
- i++;
-
- if (i < stop)
- goto BLOCK_LOOP;
-
- if (i == 25)
- {
- scan = vp8_default_zig_zag1d;//x->scan_order1d;
- type = 0;
- i = 0;
- stop = 16;
- coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
- qcoeff_ptr = &x->qcoeff[0];
- goto BLOCK_LOOP;
- }
-
- if (i == 16)
- {
- type = 2;
- coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
- stop = 24;
- goto BLOCK_LOOP;
- }
-
- bc->count = count;
- bc->value = value;
- bc->range = range;
- bc->pos = bufptr - bc->buffer;
- return eobtotal;
-
-}
-//#endif
-#else
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-#if 0
-//uses relative offsets
-
-const vp8_tree_index vp8_coef_tree_x[ 22] = /* corresponding _CONTEXT_NODEs */
-{
- -DCT_EOB_TOKEN, 1, /* 0 = EOB */
- -ZERO_TOKEN, 1, /* 1 = ZERO */
- -ONE_TOKEN, 1, /* 2 = ONE */
- 2, 5, /* 3 = LOW_VAL */
- -TWO_TOKEN, 1, /* 4 = TWO */
- -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
- 2, 3, /* 6 = HIGH_LOW */
- -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
- 2, 3, /* 8 = CAT_THREEFOUR */
- -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
- -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
-};
-#endif
-
-#define _SCALEDOWN 8 //16 //8
-
-int vp8_decode_mb_tokens_v5(DETOK *detoken, int type);
-
-int vp8_decode_mb_tokens_v5_c(DETOK *detoken, int type)
-{
- BOOL_DECODER *bc = detoken->current_bc;
-
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
- int i;
-
- register int count;
-
- BOOL_DATA *bufptr;
- register unsigned int range;
- register unsigned int value;
- register unsigned int shift;
- UINT32 split;
- INT16 *qcoeff_ptr;
-
- UINT8 *coef_probs;
-// int type;
- int stop;
- INT16 c;
- INT16 t;
- INT16 v;
- vp8_prob *Prob;
-
-
-
-// type = 3;
- i = 0;
- stop = 16;
- qcoeff_ptr = detoken->qcoeff_start_ptr;
-
-// if( detoken->mode != B_PRED && detoken->mode != SPLITMV)
- if (type == 1)
- {
- i += 24;
- stop += 8; //24;
-// type = 1;
- qcoeff_ptr += 24 * 16;
-// eobtotal-=16;
- }
-
- count = bc->count;
- range = bc->range;
- value = bc->value;
- bufptr = &bc->buffer[bc->pos];
-
-
- coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
-
-BLOCK_LOOP:
- a = detoken->A[ detoken->ptr_onyxblock2context_leftabove[i] ];
- l = detoken->L[ detoken->ptr_onyxblock2context_leftabove[i] ];
- c = !type;
- a += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2ABOVE_OFFSET];
- l += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2LEFT_OFFSET];
-
- //#define ONYX_COMBINEENTROPYCONTEXTS( Dest, A, B) \
- //Dest = ((A)!=0) + ((B)!=0);
-
- VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
-
- Prob = coef_probs;
- Prob += t * ENTROPY_NODES;
- t = 0;
-
- do
- {
-
- {
-// onyx_tree_index * onyx_coef_tree_ptr = onyx_coef_tree_x;
-
- Prob += detoken->ptr_onyx_coef_bands_x[c];
-
- GET_TOKEN_START:
-
- do
- {
- split = 1 + (((range - 1) * (Prob[t>>1])) >> 8);
-
- if (value >> 24 >= split)
- {
- range = range - split;
- value = value - (split << 24);
- t += 1;
-
- //used to eliminate else branch
- split = range;
- }
-
- range = split;
-
- t = detoken->vp8_coef_tree_ptr[ t ];
-
- NORMALIZE
-
- }
- while (t > 0) ;
- }
- GET_TOKEN_STOP:
-
- if (t == -DCT_EOB_TOKEN)
- {
- break;
- }
-
- v = -t;
-
- if (v > FOUR_TOKEN)
- {
- INT16 bits_count;
- TOKENEXTRABITS *teb_ptr;
-
-// teb_ptr = &onyxd_token_extra_bits2[t];
-// teb_ptr = &onyxd_token_extra_bits2[v];
- teb_ptr = &detoken->teb_base_ptr[v];
-
-
- v = teb_ptr->min_val;
- bits_count = teb_ptr->Length;
-
- do
- {
- split = 1 + (((range - 1) * teb_ptr->Probs[bits_count]) >> _SCALEDOWN);
-
- if ((value >> 24) >= split)
- {
- range = range - split;
- value = value - (split << 24);
- v += ((UINT16)1 << bits_count);
-
- //used to eliminate else branch
- split = range;
- }
-
- range = split;
-
- NORMALIZE
-
- bits_count -- ;
- }
- while (bits_count >= 0);
- }
-
- Prob = coef_probs;
-
- if (t)
- {
- split = 1 + (((range - 1) * vp8_prob_half) >> 8);
-
- if ((value >> 24) >= split)
- {
- range = range - split;
- value = value - (split << 24);
- v = (v ^ -1) + 1; /* negate w/out conditionals */
-
- //used to eliminate else branch
- split = range;
- }
-
- range = split;
-
- NORMALIZE
- Prob += ENTROPY_NODES;
-
- if (t < -ONE_TOKEN)
- Prob += ENTROPY_NODES;
-
- t = -2;
- }
-
- //if t is zero, we will skip the eob table check
- t += 2;
- qcoeff_ptr [detoken->scan [c] ] = (INT16) v;
-
- }
- while (++c < 16);
-
- if (t != -DCT_EOB_TOKEN)
- {
- --c;
- }
-
- t = ((detoken->eob[i] = c) != !type); // any nonzero data?
-// eobtotal += detoken->eob[i];
- *a = *l = t;
- qcoeff_ptr += 16;
-
- i++;
-
- if (i < stop)
- goto BLOCK_LOOP;
-
- if (i == 25)
- {
- type = 0;
- i = 0;
- stop = 16;
-// coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
- coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
- qcoeff_ptr = detoken->qcoeff_start_ptr;
- goto BLOCK_LOOP;
- }
-
- if (i == 16)
- {
- type = 2;
-// coef_probs =(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
- coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
- stop = 24;
- goto BLOCK_LOOP;
- }
-
- bc->count = count;
- bc->value = value;
- bc->range = range;
- bc->pos = bufptr - bc->buffer;
- return 0;
-}
-//#if 0
-int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
-{
-// const ONYX_COMMON * const oc = & dx->common;
- int eobtotal = 0;
- int i, type;
- /*
- dx->detoken.norm_ptr = norm;
- dx->detoken.onyx_coef_tree_ptr = onyx_coef_tree;
- dx->detoken.ptr_onyxblock2context_leftabove = ONYXBLOCK2CONTEXT_LEFTABOVE;
- dx->detoken.ptr_onyx_coef_bands_x = onyx_coef_bands_x;
- dx->detoken.scan = default_zig_zag1d;
- dx->detoken.teb_base_ptr = onyxd_token_extra_bits2;
-
- dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
-
- dx->detoken.A = x->above_context;
- dx->detoken.L = x->left_context;
-
- dx->detoken.coef_probs[0] = (unsigned char *)( oc->fc.coef_probs [0] [ 0 ] [0]);
- dx->detoken.coef_probs[1] = (unsigned char *)( oc->fc.coef_probs [1] [ 0 ] [0]);
- dx->detoken.coef_probs[2] = (unsigned char *)( oc->fc.coef_probs [2] [ 0 ] [0]);
- dx->detoken.coef_probs[3] = (unsigned char *)( oc->fc.coef_probs [3] [ 0 ] [0]);
- */
-
- dx->detoken.current_bc = x->current_bc;
- dx->detoken.A = x->above_context;
- dx->detoken.L = x->left_context;
-
- type = 3;
-
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
- {
- type = 1;
- eobtotal -= 16;
- }
-
- vp8_decode_mb_tokens_v5(&dx->detoken, type);
-
- for (i = 0; i < 25; i++)
- {
- x->Block[i].eob = dx->detoken.eob[i];
- eobtotal += dx->detoken.eob[i];
- }
-
- return eobtotal;
-}
-#endif
diff --git a/vp8/decoder/arm/detokenizearm_v6.asm b/vp8/decoder/arm/detokenizearm_v6.asm
deleted file mode 100644
index 4d87ee5..0000000
--- a/vp8/decoder/arm/detokenizearm_v6.asm
+++ /dev/null
@@ -1,364 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_decode_mb_tokens_v5|
-
- AREA |.text|, CODE, READONLY ; name this block of code
-
- INCLUDE vpx_asm_offsets.asm
-
-l_qcoeff EQU 0
-l_i EQU 4
-l_type EQU 8
-l_stop EQU 12
-l_c EQU 16
-l_l_ptr EQU 20
-l_a_ptr EQU 24
-l_bc EQU 28
-l_coef_ptr EQU 32
-l_stacksize EQU 64
-
-
-;; constant offsets -- these should be created at build time
-c_onyxblock2left_offset EQU 25
-c_onyxblock2above_offset EQU 50
-c_entropy_nodes EQU 11
-c_dct_eob_token EQU 11
-
-|vp8_decode_mb_tokens_v5| PROC
- stmdb sp!, {r4 - r11, lr}
- sub sp, sp, #l_stacksize
- mov r7, r1
- mov r9, r0 ;DETOK *detoken
-
- ldr r1, [r9, #detok_current_bc]
- ldr r0, [r9, #detok_qcoeff_start_ptr]
- mov r11, #0
- mov r3, #0x10
-
- cmp r7, #1
- addeq r11, r11, #24
- addeq r3, r3, #8
- addeq r0, r0, #3, 24
-
- str r0, [sp, #l_qcoeff]
- str r11, [sp, #l_i]
- str r7, [sp, #l_type]
- str r3, [sp, #l_stop]
- str r1, [sp, #l_bc]
-
- add lr, r9, r7, lsl #2
-
- ldr r2, [r1, #bool_decoder_buffer]
- ldr r3, [r1, #bool_decoder_pos]
-
- ldr r10, [lr, #detok_coef_probs]
- ldr r5, [r1, #bool_decoder_count]
- ldr r6, [r1, #bool_decoder_range]
- ldr r4, [r1, #bool_decoder_value]
- add r8, r2, r3
-
- str r10, [sp, #l_coef_ptr]
-
-
- ;align 4
-BLOCK_LOOP
- ldr r3, [r9, #detok_ptr_onyxblock2context_leftabove]
- ldr r2, [r9, #DETOK_A]
- ldr r1, [r9, #DETOK_L]
- ldrb r12, [r3, +r11] ; detoken->ptr_onyxblock2context_leftabove[i]
-
- cmp r7, #0 ; check type
- moveq r7, #1
- movne r7, #0
-
- ldr r0, [r2, +r12, lsl #2] ; a
- add r1, r1, r12, lsl #4
- add r3, r3, r11
-
- ldrb r2, [r3, #c_onyxblock2above_offset]
- ldrb r3, [r3, #c_onyxblock2left_offset]
- mov lr, #c_entropy_nodes
-;; ;++
-
- ldr r2, [r0, +r2, lsl #2]!
- add r3, r1, r3, lsl #2
- str r3, [sp, #l_l_ptr]
- ldr r3, [r3]
-
- cmp r2, #0
- movne r2, #1
- cmp r3, #0
- addne r2, r2, #1
-
- str r0, [sp, #l_a_ptr]
- smlabb r0, r2, lr, r10
- mov r1, #0 ; t = 0
- str r7, [sp, #l_c]
-
- ;align 4
-COEFF_LOOP
- ldr r3, [r9, #detok_ptr_onyx_coef_bands_x]
- ldr lr, [r9, #detok_onyx_coef_tree_ptr]
-
-;;the following two lines are used if onyx_coef_bands_x is UINT16
-;; add r3, r3, r7, lsl #1
-;; ldrh r3, [r3]
-
-;;the following line is used if onyx_coef_bands_x is UINT8
- ldrb r3, [r7, +r3]
-
-
-;; ;++
-;; pld [r8]
- ;++
- add r0, r0, r3
-
- ;align 4
-get_token_loop
- ldrb r2, [r0, +r1, asr #1]
- mov r3, r6, lsl #8
- sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
- mov r10, #1
-
- smlawb r2, r3, r2, r10
- ldrb r12, [r8] ;load cx data byte in stall slot
- ;++
-
- subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
- addhs r1, r1, #1 ;t += 1
- movhs r4, r3 ;update value
- subhs r2, r6, r2 ;range = range - split
- movlo r6, r2
-
-;;; ldrsbhs r1, [r1, +lr]
- ldrsb r1, [r1, +lr]
-
-
-;; use branch for short pipelines ???
-;; cmp r2, #0x80
-;; bcs |$LN22@decode_mb_to|
-
- clz r3, r2
- sub r3, r3, #24
- subs r5, r5, r3
- mov r6, r2, lsl r3
- mov r4, r4, lsl r3
-
-;; use branch for short pipelines ???
-;; bgt |$LN22@decode_mb_to|
-
- addle r5, r5, #8
- rsble r3, r5, #8
- addle r8, r8, #1
- orrle r4, r4, r12, lsl r3
-
-;;|$LN22@decode_mb_to|
-
- cmp r1, #0
- bgt get_token_loop
-
- cmn r1, #c_dct_eob_token ;if(t == -DCT_EOB_TOKEN)
- beq END_OF_BLOCK
-
- rsb lr, r1, #0 ;v = -t;
-
- cmp lr, #4 ;if(v > FOUR_TOKEN)
- ble SKIP_EXTRABITS
-
- ldr r3, [r9, #detok_teb_base_ptr]
- mov r11, #1
- add r7, r3, lr, lsl #4
-
- ldrsh lr, [r7, #tokenextrabits_min_val];v = teb_ptr->min_val
- ldrsh r0, [r7, #tokenextrabits_length];bits_count = teb_ptr->Length
-
-extrabits_loop
- add r3, r0, r7
-
- ldrb r2, [r3, #4]
- mov r3, r6, lsl #8
- sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
- mov r10, #1
-
- smlawb r2, r3, r2, r10
- ldrb r12, [r8]
- ;++
-
- subs r10, r4, r2, lsl #24 ;x = value-(split<<24)
- movhs r4, r10 ;update value
- subhs r2, r6, r2 ;range = range - split
- addhs lr, lr, r11, lsl r0 ;v += ((UINT16)1<<bits_count)
- movlo r6, r2 ;range = split
-
-
-;; use branch for short pipelines ???
-;; cmp r2, #0x80
-;; bcs |$LN10@decode_mb_to|
-
- clz r3, r2
- sub r3, r3, #24
- subs r5, r5, r3
- mov r6, r2, lsl r3 ;range
- mov r4, r4, lsl r3 ;value
-
- addle r5, r5, #8
- addle r8, r8, #1
- rsble r3, r5, #8
- orrle r4, r4, r12, lsl r3
-
-;;|$LN10@decode_mb_to|
- subs r0, r0, #1
- bpl extrabits_loop
-
-
-SKIP_EXTRABITS
- ldr r11, [sp, #l_qcoeff]
- ldr r0, [sp, #l_coef_ptr]
-
- cmp r1, #0 ;check for nonzero token
- beq SKIP_EOB_CHECK ;if t is zero, we will skip the eob table chec
-
- sub r3, r6, #1 ;range - 1
- ;++
- mov r3, r3, lsl #7 ; *= onyx_prob_half (128)
- ;++
- mov r3, r3, lsr #8
- add r2, r3, #1 ;split
-
- subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
- movhs r4, r3 ;update value
- subhs r2, r6, r2 ;range = range - split
- mvnhs r3, lr
- addhs lr, r3, #1 ;v = (v ^ -1) + 1
- movlo r6, r2 ;range = split
-
-;; use branch for short pipelines ???
-;; cmp r2, #0x80
-;; bcs |$LN6@decode_mb_to|
-
- clz r3, r2
- sub r3, r3, #24
- subs r5, r5, r3
- mov r6, r2, lsl r3
- mov r4, r4, lsl r3
- ldrleb r2, [r8], #1
- addle r5, r5, #8
- rsble r3, r5, #8
- orrle r4, r4, r2, lsl r3
-
-;;|$LN6@decode_mb_to|
- add r0, r0, #0xB
-
- cmn r1, #1
-
- addlt r0, r0, #0xB
-
- mvn r1, #1
-
-SKIP_EOB_CHECK
- ldr r7, [sp, #l_c]
- ldr r3, [r9, #detok_scan]
- add r1, r1, #2
- cmp r7, #(0x10 - 1) ;assume one less for now.... increment below
-
- ldr r3, [r3, +r7, lsl #2]
- add r7, r7, #1
- add r3, r11, r3, lsl #1
-
- str r7, [sp, #l_c]
- strh lr, [r3]
-
- blt COEFF_LOOP
-
- sub r7, r7, #1 ;if(t != -DCT_EOB_TOKEN) --c
-
-END_OF_BLOCK
- ldr r3, [sp, #l_type]
- ldr r10, [sp, #l_coef_ptr]
- ldr r0, [sp, #l_qcoeff]
- ldr r11, [sp, #l_i]
- ldr r12, [sp, #l_stop]
-
- cmp r3, #0
- moveq r1, #1
- movne r1, #0
- add r3, r11, r9
-
- cmp r7, r1
- strb r7, [r3, #detok_eob]
-
- ldr r7, [sp, #l_l_ptr]
- ldr r2, [sp, #l_a_ptr]
- movne r3, #1
- moveq r3, #0
-
- add r0, r0, #0x20
- add r11, r11, #1
- str r3, [r7]
- str r3, [r2]
- str r0, [sp, #l_qcoeff]
- str r11, [sp, #l_i]
-
- cmp r11, r12 ;i >= stop ?
- ldr r7, [sp, #l_type]
- mov lr, #0xB
-
- blt BLOCK_LOOP
-
- cmp r11, #0x19
- bne ln2_decode_mb_to
-
- ldr r12, [r9, #detok_qcoeff_start_ptr]
- ldr r10, [r9, #detok_coef_probs]
- mov r7, #0
- mov r3, #0x10
- str r12, [sp, #l_qcoeff]
- str r7, [sp, #l_i]
- str r7, [sp, #l_type]
- str r3, [sp, #l_stop]
-
- str r10, [sp, #l_coef_ptr]
-
- b BLOCK_LOOP
-
-ln2_decode_mb_to
- cmp r11, #0x10
- bne ln1_decode_mb_to
-
- ldr r10, [r9, #0x30]
-
- mov r7, #2
- mov r3, #0x18
-
- str r7, [sp, #l_type]
- str r3, [sp, #l_stop]
-
- str r10, [sp, #l_coef_ptr]
- b BLOCK_LOOP
-
-ln1_decode_mb_to
- ldr r2, [sp, #l_bc]
- mov r0, #0
- nop
-
- ldr r3, [r2, #bool_decoder_buffer]
- str r5, [r2, #bool_decoder_count]
- str r4, [r2, #bool_decoder_value]
- sub r3, r8, r3
- str r3, [r2, #bool_decoder_pos]
- str r6, [r2, #bool_decoder_range]
-
- add sp, sp, #l_stacksize
- ldmia sp!, {r4 - r11, pc}
-
- ENDP ; |vp8_decode_mb_tokens_v5|
-
- END
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
index 455c83a..9dcf7b6 100644
--- a/vp8/decoder/arm/dsystemdependent.c
+++ b/vp8/decoder/arm/dsystemdependent.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -22,20 +23,14 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
pbi->mb.rtcd = &pbi->common.rtcd;
#if HAVE_ARMV7
pbi->dequant.block = vp8_dequantize_b_neon;
- pbi->dequant.idct = vp8_dequant_idct_neon;
- pbi->dequant.idct_dc = vp8_dequant_dc_idct_neon;
pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.stop = vp8dx_stop_decode_c;
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
pbi->dboolhuff.devalue = vp8dx_decode_value_c;
#elif HAVE_ARMV6
pbi->dequant.block = vp8_dequantize_b_v6;
- pbi->dequant.idct = vp8_dequant_idct_v6;
- pbi->dequant.idct_dc = vp8_dequant_dc_idct_v6;
pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.stop = vp8dx_stop_decode_c;
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
pbi->dboolhuff.devalue = vp8dx_decode_value_c;
diff --git a/vp8/decoder/arm/neon/dboolhuff_neon.asm b/vp8/decoder/arm/neon/dboolhuff_neon.asm
index 7ec62a3..ff3ffda 100644
--- a/vp8/decoder/arm/neon/dboolhuff_neon.asm
+++ b/vp8/decoder/arm/neon/dboolhuff_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/neon/dequantdcidct_neon.asm b/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
index 3392f2c..f68a780 100644
--- a/vp8/decoder/arm/neon/dequantdcidct_neon.asm
+++ b/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
@@ -1,38 +1,51 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp8_dequant_dc_idct_neon|
+ EXPORT |vp8_dequant_dc_idct_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc);
+;void vp8_dequant_dc_idct_add_neon(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride,
+; int Dc);
; r0 short *input,
; r1 short *dq,
-; r2 short *output,
-; r3 int pitch,
-; (stack) int Dc
-|vp8_dequant_dc_idct_neon| PROC
+; r2 unsigned char *pred
+; r3 unsigned char *dest
+; sp int pitch
+; sp+4 int stride
+; sp+8 int Dc
+|vp8_dequant_dc_idct_add_neon| PROC
vld1.16 {q3, q4}, [r0]
vld1.16 {q5, q6}, [r1]
- ldr r1, [sp] ;load Dc from stack
+ ldr r1, [sp, #8] ;load Dc from stack
- ldr r12, _dcidct_coeff_
+ ldr r12, _CONSTANTS_
vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
vmul.i16 q2, q4, q6
vmov.16 d2[0], r1
+ ldr r1, [sp] ; pitch
+ vld1.32 {d14[0]}, [r2], r1
+ vld1.32 {d14[1]}, [r2], r1
+ vld1.32 {d15[0]}, [r2], r1
+ vld1.32 {d15[1]}, [r2]
+
+ ldr r1, [sp, #4] ; stride
+
;|short_idct4x4llm_neon| PROC
vld1.16 {d0}, [r12]
vswp d3, d4 ;q2(vp[4] vp[12])
@@ -46,14 +59,9 @@
vshr.s16 q3, q3, #1
vshr.s16 q4, q4, #1
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+ vqadd.s16 q3, q3, q2
vqadd.s16 q4, q4, q2
- ;d6 - c1:temp1
- ;d7 - d1:temp2
- ;d8 - d1:temp1
- ;d9 - c1:temp2
-
vqsub.s16 d10, d6, d9 ;c1
vqadd.s16 d11, d7, d8 ;d1
@@ -82,7 +90,7 @@
vshr.s16 q3, q3, #1
vshr.s16 q4, q4, #1
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+ vqadd.s16 q3, q3, q2
vqadd.s16 q4, q4, q2
vqsub.s16 d10, d6, d9 ;c1
@@ -100,34 +108,29 @@
vrshr.s16 d4, d4, #3
vrshr.s16 d5, d5, #3
- add r1, r2, r3
- add r12, r1, r3
- add r0, r12, r3
-
vtrn.32 d2, d4
vtrn.32 d3, d5
vtrn.16 d2, d3
vtrn.16 d4, d5
- vst1.16 {d2}, [r2]
- vst1.16 {d3}, [r1]
- vst1.16 {d4}, [r12]
- vst1.16 {d5}, [r0]
+ vaddw.u8 q1, q1, d14
+ vaddw.u8 q2, q2, d15
- bx lr
+ vqmovun.s16 d0, q1
+ vqmovun.s16 d1, q2
+
+ vst1.32 {d0[0]}, [r3], r1
+ vst1.32 {d0[1]}, [r3], r1
+ vst1.32 {d1[0]}, [r3], r1
+ vst1.32 {d1[1]}, [r3]
- ENDP
+ bx lr
-;-----------------
- AREA dcidct4x4_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_dcidct_coeff_
- DCD dcidct_coeff
-dcidct_coeff
- DCD 0x4e7b4e7b, 0x8a8c8a8c
+ ENDP ; |vp8_dequant_dc_idct_add_neon|
-;20091, 20091, 35468, 35468
+; Constant Pool
+_CONSTANTS_ DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b4e7b
+sinpi8sqrt2 DCD 0x8a8c8a8c
END
diff --git a/vp8/decoder/arm/neon/dequantidct_neon.asm b/vp8/decoder/arm/neon/dequant_idct_neon.asm
index bba4d5d..1923be4 100644
--- a/vp8/decoder/arm/neon/dequantidct_neon.asm
+++ b/vp8/decoder/arm/neon/dequant_idct_neon.asm
@@ -1,29 +1,41 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp8_dequant_idct_neon|
+ EXPORT |vp8_dequant_idct_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch);
+;void vp8_dequant_idct_neon(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride)
; r0 short *input,
; r1 short *dq,
-; r2 short *output,
-; r3 int pitch,
-|vp8_dequant_idct_neon| PROC
+; r2 unsigned char *pred
+; r3 unsigned char *dest
+; sp int pitch
+; sp+4 int stride
+
+|vp8_dequant_idct_add_neon| PROC
vld1.16 {q3, q4}, [r0]
vld1.16 {q5, q6}, [r1]
+ ldr r1, [sp] ; pitch
+ vld1.32 {d14[0]}, [r2], r1
+ vld1.32 {d14[1]}, [r2], r1
+ vld1.32 {d15[0]}, [r2], r1
+ vld1.32 {d15[1]}, [r2]
+
+ ldr r1, [sp, #4] ; stride
- ldr r12, _didct_coeff_
+ ldr r12, _CONSTANTS_
vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
vmul.i16 q2, q4, q6
@@ -41,14 +53,9 @@
vshr.s16 q3, q3, #1
vshr.s16 q4, q4, #1
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+ vqadd.s16 q3, q3, q2
vqadd.s16 q4, q4, q2
- ;d6 - c1:temp1
- ;d7 - d1:temp2
- ;d8 - d1:temp1
- ;d9 - c1:temp2
-
vqsub.s16 d10, d6, d9 ;c1
vqadd.s16 d11, d7, d8 ;d1
@@ -77,7 +84,7 @@
vshr.s16 q3, q3, #1
vshr.s16 q4, q4, #1
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+ vqadd.s16 q3, q3, q2
vqadd.s16 q4, q4, q2
vqsub.s16 d10, d6, d9 ;c1
@@ -95,34 +102,29 @@
vrshr.s16 d4, d4, #3
vrshr.s16 d5, d5, #3
- add r1, r2, r3
- add r12, r1, r3
- add r0, r12, r3
-
vtrn.32 d2, d4
vtrn.32 d3, d5
vtrn.16 d2, d3
vtrn.16 d4, d5
- vst1.16 {d2}, [r2]
- vst1.16 {d3}, [r1]
- vst1.16 {d4}, [r12]
- vst1.16 {d5}, [r0]
+ vaddw.u8 q1, q1, d14
+ vaddw.u8 q2, q2, d15
- bx lr
+ vqmovun.s16 d0, q1
+ vqmovun.s16 d1, q2
+
+ vst1.32 {d0[0]}, [r3], r1
+ vst1.32 {d0[1]}, [r3], r1
+ vst1.32 {d1[0]}, [r3], r1
+ vst1.32 {d1[1]}, [r3]
- ENDP
+ bx lr
-;-----------------
- AREA didct4x4_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_didct_coeff_
- DCD didct_coeff
-didct_coeff
- DCD 0x4e7b4e7b, 0x8a8c8a8c
+ ENDP ; |vp8_dequant_idct_add_neon|
-;20091, 20091, 35468, 35468
+; Constant Pool
+_CONSTANTS_ DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b4e7b
+sinpi8sqrt2 DCD 0x8a8c8a8c
END
diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/decoder/arm/neon/dequantizeb_neon.asm
index 1bde946..c8e0c31 100644
--- a/vp8/decoder/arm/neon/dequantizeb_neon.asm
+++ b/vp8/decoder/arm/neon/dequantizeb_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c
new file mode 100644
index 0000000..4725e62
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_blk_neon.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_neon
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs, short *dc)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_dc_idct_add_neon (q, dq, pre, dst, 16, stride, dc[0]);
+ else
+ vp8_dc_only_idct_add_neon (dc[0], pre, dst, 16, stride);
+
+ if (eobs[1] > 1)
+ vp8_dequant_dc_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+ else
+ vp8_dc_only_idct_add_neon (dc[1], pre+4, dst+4, 16, stride);
+
+ if (eobs[2] > 1)
+ vp8_dequant_dc_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
+ else
+ vp8_dc_only_idct_add_neon (dc[2], pre+8, dst+8, 16, stride);
+
+ if (eobs[3] > 1)
+ vp8_dequant_dc_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
+ else
+ vp8_dc_only_idct_add_neon (dc[3], pre+12, dst+12, 16, stride);
+
+ q += 64;
+ dc += 4;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_y_block_neon
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_neon (q, dq, pre, dst, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dst, 16, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dst+4, 16, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ if (eobs[2] > 1)
+ vp8_dequant_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_neon (q[32]*dq[0], pre+8, dst+8, 16, stride);
+ ((int *)(q+32))[0] = 0;
+ }
+
+ if (eobs[3] > 1)
+ vp8_dequant_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_neon (q[48]*dq[0], pre+12, dst+12, 16, stride);
+ ((int *)(q+48))[0] = 0;
+ }
+
+ q += 64;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_neon
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_neon (q, dq, pre, dstu, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstu, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstu+4, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstu += 4*stride;
+ eobs += 2;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_neon (q, dq, pre, dstv, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstv, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstv+4, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstv += 4*stride;
+ eobs += 2;
+ }
+}