summaryrefslogtreecommitdiffstats
path: root/libvpx/vp8
diff options
context:
space:
mode:
Diffstat (limited to 'libvpx/vp8')
-rw-r--r--libvpx/vp8/common/alloccommon.h12
-rw-r--r--libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm8
-rw-r--r--libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm4
-rw-r--r--libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm8
-rw-r--r--libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm8
-rw-r--r--libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm8
-rw-r--r--libvpx/vp8/common/arm/bilinearfilter_arm.h14
-rw-r--r--libvpx/vp8/common/arm/neon/bilinearpredict16x16_neon.asm357
-rw-r--r--libvpx/vp8/common/arm/neon/bilinearpredict4x4_neon.asm130
-rw-r--r--libvpx/vp8/common/arm/neon/bilinearpredict8x4_neon.asm135
-rw-r--r--libvpx/vp8/common/arm/neon/bilinearpredict8x8_neon.asm183
-rw-r--r--libvpx/vp8/common/arm/neon/bilinearpredict_neon.c696
-rw-r--r--libvpx/vp8/common/arm/neon/copymem16x16_neon.asm59
-rw-r--r--libvpx/vp8/common/arm/neon/copymem8x4_neon.asm34
-rw-r--r--libvpx/vp8/common/arm/neon/copymem8x8_neon.asm43
-rw-r--r--libvpx/vp8/common/arm/neon/copymem_neon.c59
-rw-r--r--libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.asm54
-rw-r--r--libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c42
-rw-r--r--libvpx/vp8/common/arm/neon/dequant_idct_neon.asm131
-rw-r--r--libvpx/vp8/common/arm/neon/dequant_idct_neon.c142
-rw-r--r--libvpx/vp8/common/arm/neon/dequantizeb_neon.asm34
-rw-r--r--libvpx/vp8/common/arm/neon/dequantizeb_neon.c27
-rw-r--r--libvpx/vp8/common/blockd.h14
-rw-r--r--libvpx/vp8/common/coefupdateprobs.h12
-rw-r--r--libvpx/vp8/common/common.h14
-rw-r--r--libvpx/vp8/common/default_coef_probs.h12
-rw-r--r--libvpx/vp8/common/entropy.h12
-rw-r--r--libvpx/vp8/common/entropymode.h12
-rw-r--r--libvpx/vp8/common/entropymv.h12
-rw-r--r--libvpx/vp8/common/extend.h12
-rw-r--r--libvpx/vp8/common/filter.h12
-rw-r--r--libvpx/vp8/common/findnearmv.h12
-rw-r--r--libvpx/vp8/common/header.h12
-rw-r--r--libvpx/vp8/common/invtrans.h12
-rw-r--r--libvpx/vp8/common/loopfilter.h12
-rw-r--r--libvpx/vp8/common/modecont.h12
-rw-r--r--libvpx/vp8/common/mv.h12
-rw-r--r--libvpx/vp8/common/onyx.h10
-rw-r--r--libvpx/vp8/common/onyxc_int.h12
-rw-r--r--libvpx/vp8/common/onyxd.h6
-rw-r--r--libvpx/vp8/common/postproc.c5
-rw-r--r--libvpx/vp8/common/postproc.h12
-rw-r--r--libvpx/vp8/common/ppflags.h12
-rw-r--r--libvpx/vp8/common/pragmas.h12
-rw-r--r--libvpx/vp8/common/quant_common.h13
-rw-r--r--libvpx/vp8/common/reconinter.h12
-rw-r--r--libvpx/vp8/common/reconintra4x4.h12
-rw-r--r--libvpx/vp8/common/rtcd_defs.pl541
-rw-r--r--libvpx/vp8/common/rtcd_defs.sh542
-rw-r--r--libvpx/vp8/common/setupintrarecon.h12
-rw-r--r--libvpx/vp8/common/swapyv12buffer.h12
-rw-r--r--libvpx/vp8/common/systemdependent.h12
-rw-r--r--libvpx/vp8/common/threading.h12
-rw-r--r--libvpx/vp8/common/treecoder.h12
-rw-r--r--libvpx/vp8/common/variance.h12
-rw-r--r--libvpx/vp8/common/vp8_entropymodedata.h12
-rw-r--r--libvpx/vp8/common/x86/filter_x86.h14
-rw-r--r--libvpx/vp8/common/x86/loopfilter_mmx.asm4
-rw-r--r--libvpx/vp8/common/x86/loopfilter_sse2.asm4
-rw-r--r--libvpx/vp8/decoder/dboolhuff.h14
-rw-r--r--libvpx/vp8/decoder/decodeframe.c (renamed from libvpx/vp8/decoder/decodframe.c)11
-rw-r--r--libvpx/vp8/decoder/decodemv.h14
-rw-r--r--libvpx/vp8/decoder/decoderthreading.h14
-rw-r--r--libvpx/vp8/decoder/detokenize.h14
-rw-r--r--libvpx/vp8/decoder/ec_types.h14
-rw-r--r--libvpx/vp8/decoder/error_concealment.h14
-rw-r--r--libvpx/vp8/decoder/onyxd_int.h14
-rw-r--r--libvpx/vp8/decoder/treereader.h14
-rw-r--r--libvpx/vp8/encoder/arm/neon/denoising_neon.c167
-rw-r--r--libvpx/vp8/encoder/bitstream.h12
-rw-r--r--libvpx/vp8/encoder/block.h12
-rw-r--r--libvpx/vp8/encoder/boolhuff.h12
-rw-r--r--libvpx/vp8/encoder/dct_value_cost.h13
-rw-r--r--libvpx/vp8/encoder/dct_value_tokens.h13
-rw-r--r--libvpx/vp8/encoder/defaultcoefcounts.h13
-rw-r--r--libvpx/vp8/encoder/denoising.h10
-rw-r--r--libvpx/vp8/encoder/encodeframe.h12
-rw-r--r--libvpx/vp8/encoder/encodeintra.h12
-rw-r--r--libvpx/vp8/encoder/encodemb.h12
-rw-r--r--libvpx/vp8/encoder/encodemv.h12
-rw-r--r--libvpx/vp8/encoder/firstpass.c16
-rw-r--r--libvpx/vp8/encoder/firstpass.h12
-rw-r--r--libvpx/vp8/encoder/lookahead.h12
-rw-r--r--libvpx/vp8/encoder/mcomp.h12
-rw-r--r--libvpx/vp8/encoder/modecosts.h12
-rw-r--r--libvpx/vp8/encoder/mr_dissim.h12
-rw-r--r--libvpx/vp8/encoder/onyx_if.c91
-rw-r--r--libvpx/vp8/encoder/onyx_int.h12
-rw-r--r--libvpx/vp8/encoder/pickinter.h12
-rw-r--r--libvpx/vp8/encoder/psnr.c31
-rw-r--r--libvpx/vp8/encoder/psnr.h17
-rw-r--r--libvpx/vp8/encoder/quantize.h12
-rw-r--r--libvpx/vp8/encoder/ratectrl.c9
-rw-r--r--libvpx/vp8/encoder/ratectrl.h11
-rw-r--r--libvpx/vp8/encoder/rdopt.c9
-rw-r--r--libvpx/vp8/encoder/rdopt.h12
-rw-r--r--libvpx/vp8/encoder/segmentation.h12
-rw-r--r--libvpx/vp8/encoder/temporal_filter.c1
-rw-r--r--libvpx/vp8/encoder/tokenize.c12
-rw-r--r--libvpx/vp8/encoder/tokenize.h14
-rw-r--r--libvpx/vp8/encoder/treewriter.h12
-rw-r--r--libvpx/vp8/vp8_common.mk22
-rw-r--r--libvpx/vp8/vp8_cx_iface.c6
-rw-r--r--libvpx/vp8/vp8_dx_iface.c1
-rw-r--r--libvpx/vp8/vp8cx.mk2
-rw-r--r--libvpx/vp8/vp8cx_arm.mk1
-rw-r--r--libvpx/vp8/vp8dx.mk2
107 files changed, 2464 insertions, 2003 deletions
diff --git a/libvpx/vp8/common/alloccommon.h b/libvpx/vp8/common/alloccommon.h
index ea93c25..93e99d7 100644
--- a/libvpx/vp8/common/alloccommon.h
+++ b/libvpx/vp8/common/alloccommon.h
@@ -9,15 +9,23 @@
*/
-#ifndef __INC_ALLOCCOMMON_H
-#define __INC_ALLOCCOMMON_H
+#ifndef VP8_COMMON_ALLOCCOMMON_H_
+#define VP8_COMMON_ALLOCCOMMON_H_
#include "onyxc_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp8_create_common(VP8_COMMON *oci);
void vp8_remove_common(VP8_COMMON *oci);
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci);
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height);
void vp8_setup_version(VP8_COMMON *oci);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_ALLOCCOMMON_H_
diff --git a/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm b/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
index dc84c30..3991957 100644
--- a/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
+++ b/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
@@ -53,7 +53,7 @@ loop
orr r6, r6, r7 ; differences of all 4 pixels
; calculate total sum
adds r8, r8, r4 ; add positive differences to sum
- subs r8, r8, r5 ; substract negative differences from sum
+ subs r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
@@ -77,7 +77,7 @@ loop
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; substract negative differences from sum
+ sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
@@ -101,7 +101,7 @@ loop
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; substract negative differences from sum
+ sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
@@ -127,7 +127,7 @@ loop
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; substract negative differences from sum
+ sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
diff --git a/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm b/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
index adc353d..915ee49 100644
--- a/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
+++ b/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
@@ -51,7 +51,7 @@ loop
orr r8, r8, r10 ; differences of all 4 pixels
; calculate total sum
add r4, r4, r6 ; add positive differences to sum
- sub r4, r4, r7 ; substract negative differences from sum
+ sub r4, r4, r7 ; subtract negative differences from sum
; calculate sse
uxtb16 r7, r8 ; byte (two pixels) to halfwords
@@ -77,7 +77,7 @@ loop
; calculate total sum
add r4, r4, r6 ; add positive differences to sum
- sub r4, r4, r7 ; substract negative differences from sum
+ sub r4, r4, r7 ; subtract negative differences from sum
; calculate sse
uxtb16 r7, r8 ; byte (two pixels) to halfwords
diff --git a/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm b/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
index dd2ce68..3668dc5 100644
--- a/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+++ b/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
@@ -58,7 +58,7 @@ loop
orr r6, r6, r7 ; differences of all 4 pixels
; calculate total sum
adds r8, r8, r4 ; add positive differences to sum
- subs r8, r8, r5 ; substract negative differences from sum
+ subs r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
@@ -89,7 +89,7 @@ loop
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; substract negative differences from sum
+ sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
@@ -120,7 +120,7 @@ loop
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; substract negative differences from sum
+ sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
@@ -153,7 +153,7 @@ loop
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; substract negative differences from sum
+ sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
diff --git a/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm b/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
index f972d9b..b4e0959 100644
--- a/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+++ b/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
@@ -69,7 +69,7 @@ loop
orr r6, r6, r7 ; differences of all 4 pixels
; calculate total sum
adds r8, r8, r4 ; add positive differences to sum
- subs r8, r8, r5 ; substract negative differences from sum
+ subs r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
@@ -111,7 +111,7 @@ loop
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; substract negative differences from sum
+ sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
@@ -153,7 +153,7 @@ loop
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; substract negative differences from sum
+ sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
@@ -195,7 +195,7 @@ loop
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; substract negative differences from sum
+ sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
diff --git a/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm b/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
index f5da9c0..10863e2 100644
--- a/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+++ b/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
@@ -59,7 +59,7 @@ loop
orr r6, r6, r7 ; differences of all 4 pixels
; calculate total sum
adds r8, r8, r4 ; add positive differences to sum
- subs r8, r8, r5 ; substract negative differences from sum
+ subs r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
@@ -90,7 +90,7 @@ loop
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; substract negative differences from sum
+ sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
@@ -121,7 +121,7 @@ loop
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; substract negative differences from sum
+ sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
@@ -154,7 +154,7 @@ loop
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; substract negative differences from sum
+ sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
diff --git a/libvpx/vp8/common/arm/bilinearfilter_arm.h b/libvpx/vp8/common/arm/bilinearfilter_arm.h
index b7155d3..6b84e6f 100644
--- a/libvpx/vp8/common/arm/bilinearfilter_arm.h
+++ b/libvpx/vp8/common/arm/bilinearfilter_arm.h
@@ -9,8 +9,12 @@
*/
-#ifndef BILINEARFILTER_ARM_H
-#define BILINEARFILTER_ARM_H
+#ifndef VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
+#define VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
extern void vp8_filter_block2d_bil_first_pass_armv6
(
@@ -32,4 +36,8 @@ extern void vp8_filter_block2d_bil_second_pass_armv6
const short *vp8_filter
);
-#endif /* BILINEARFILTER_ARM_H */
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
diff --git a/libvpx/vp8/common/arm/neon/bilinearpredict16x16_neon.asm b/libvpx/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
deleted file mode 100644
index e392786..0000000
--- a/libvpx/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
+++ /dev/null
@@ -1,357 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_bilinear_predict16x16_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0 unsigned char *src_ptr,
-; r1 int src_pixels_per_line,
-; r2 int xoffset,
-; r3 int yoffset,
-; r4 unsigned char *dst_ptr,
-; stack(r5) int dst_pitch
-
-|vp8_bilinear_predict16x16_neon| PROC
- push {r4-r5, lr}
-
- adr r12, bifilter16_coeff
- ldr r4, [sp, #12] ;load parameters from stack
- ldr r5, [sp, #16] ;load parameters from stack
-
- cmp r2, #0 ;skip first_pass filter if xoffset=0
- beq secondpass_bfilter16x16_only
-
- add r2, r12, r2, lsl #3 ;calculate filter location
-
- cmp r3, #0 ;skip second_pass filter if yoffset=0
-
- vld1.s32 {d31}, [r2] ;load first_pass filter
-
- beq firstpass_bfilter16x16_only
-
- sub sp, sp, #272 ;reserve space on stack for temporary storage
- vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data
- mov lr, sp
- vld1.u8 {d5, d6, d7}, [r0], r1
-
- mov r2, #3 ;loop counter
- vld1.u8 {d8, d9, d10}, [r0], r1
-
- vdup.8 d0, d31[0] ;first_pass filter (d0 d1)
- vld1.u8 {d11, d12, d13}, [r0], r1
-
- vdup.8 d1, d31[4]
-
-;First Pass: output_height lines x output_width columns (17x16)
-filt_blk2d_fp16x16_loop_neon
- pld [r0]
- pld [r0, r1]
- pld [r0, r1, lsl #1]
-
- vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp8_filter[0])
- vmull.u8 q8, d3, d0
- vmull.u8 q9, d5, d0
- vmull.u8 q10, d6, d0
- vmull.u8 q11, d8, d0
- vmull.u8 q12, d9, d0
- vmull.u8 q13, d11, d0
- vmull.u8 q14, d12, d0
-
- vext.8 d2, d2, d3, #1 ;construct src_ptr[1]
- vext.8 d5, d5, d6, #1
- vext.8 d8, d8, d9, #1
- vext.8 d11, d11, d12, #1
-
- vmlal.u8 q7, d2, d1 ;(src_ptr[0] * vp8_filter[1])
- vmlal.u8 q9, d5, d1
- vmlal.u8 q11, d8, d1
- vmlal.u8 q13, d11, d1
-
- vext.8 d3, d3, d4, #1
- vext.8 d6, d6, d7, #1
- vext.8 d9, d9, d10, #1
- vext.8 d12, d12, d13, #1
-
- vmlal.u8 q8, d3, d1 ;(src_ptr[0] * vp8_filter[1])
- vmlal.u8 q10, d6, d1
- vmlal.u8 q12, d9, d1
- vmlal.u8 q14, d12, d1
-
- subs r2, r2, #1
-
- vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8
- vqrshrn.u16 d15, q8, #7
- vqrshrn.u16 d16, q9, #7
- vqrshrn.u16 d17, q10, #7
- vqrshrn.u16 d18, q11, #7
- vqrshrn.u16 d19, q12, #7
- vqrshrn.u16 d20, q13, #7
-
- vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data
- vqrshrn.u16 d21, q14, #7
- vld1.u8 {d5, d6, d7}, [r0], r1
-
- vst1.u8 {d14, d15, d16, d17}, [lr]! ;store result
- vld1.u8 {d8, d9, d10}, [r0], r1
- vst1.u8 {d18, d19, d20, d21}, [lr]!
- vld1.u8 {d11, d12, d13}, [r0], r1
-
- bne filt_blk2d_fp16x16_loop_neon
-
-;First-pass filtering for rest 5 lines
- vld1.u8 {d14, d15, d16}, [r0], r1
-
- vmull.u8 q9, d2, d0 ;(src_ptr[0] * vp8_filter[0])
- vmull.u8 q10, d3, d0
- vmull.u8 q11, d5, d0
- vmull.u8 q12, d6, d0
- vmull.u8 q13, d8, d0
- vmull.u8 q14, d9, d0
-
- vext.8 d2, d2, d3, #1 ;construct src_ptr[1]
- vext.8 d5, d5, d6, #1
- vext.8 d8, d8, d9, #1
-
- vmlal.u8 q9, d2, d1 ;(src_ptr[0] * vp8_filter[1])
- vmlal.u8 q11, d5, d1
- vmlal.u8 q13, d8, d1
-
- vext.8 d3, d3, d4, #1
- vext.8 d6, d6, d7, #1
- vext.8 d9, d9, d10, #1
-
- vmlal.u8 q10, d3, d1 ;(src_ptr[0] * vp8_filter[1])
- vmlal.u8 q12, d6, d1
- vmlal.u8 q14, d9, d1
-
- vmull.u8 q1, d11, d0
- vmull.u8 q2, d12, d0
- vmull.u8 q3, d14, d0
- vmull.u8 q4, d15, d0
-
- vext.8 d11, d11, d12, #1 ;construct src_ptr[1]
- vext.8 d14, d14, d15, #1
-
- vmlal.u8 q1, d11, d1 ;(src_ptr[0] * vp8_filter[1])
- vmlal.u8 q3, d14, d1
-
- vext.8 d12, d12, d13, #1
- vext.8 d15, d15, d16, #1
-
- vmlal.u8 q2, d12, d1 ;(src_ptr[0] * vp8_filter[1])
- vmlal.u8 q4, d15, d1
-
- vqrshrn.u16 d10, q9, #7 ;shift/round/saturate to u8
- vqrshrn.u16 d11, q10, #7
- vqrshrn.u16 d12, q11, #7
- vqrshrn.u16 d13, q12, #7
- vqrshrn.u16 d14, q13, #7
- vqrshrn.u16 d15, q14, #7
- vqrshrn.u16 d16, q1, #7
- vqrshrn.u16 d17, q2, #7
- vqrshrn.u16 d18, q3, #7
- vqrshrn.u16 d19, q4, #7
-
- vst1.u8 {d10, d11, d12, d13}, [lr]! ;store result
- vst1.u8 {d14, d15, d16, d17}, [lr]!
- vst1.u8 {d18, d19}, [lr]!
-
-;Second pass: 16x16
-;secondpass_filter
- add r3, r12, r3, lsl #3
- sub lr, lr, #272
-
- vld1.u32 {d31}, [r3] ;load second_pass filter
-
- vld1.u8 {d22, d23}, [lr]! ;load src data
-
- vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1)
- vdup.8 d1, d31[4]
- mov r12, #4 ;loop counter
-
-filt_blk2d_sp16x16_loop_neon
- vld1.u8 {d24, d25}, [lr]!
- vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0])
- vld1.u8 {d26, d27}, [lr]!
- vmull.u8 q2, d23, d0
- vld1.u8 {d28, d29}, [lr]!
- vmull.u8 q3, d24, d0
- vld1.u8 {d30, d31}, [lr]!
-
- vmull.u8 q4, d25, d0
- vmull.u8 q5, d26, d0
- vmull.u8 q6, d27, d0
- vmull.u8 q7, d28, d0
- vmull.u8 q8, d29, d0
-
- vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * vp8_filter[1])
- vmlal.u8 q2, d25, d1
- vmlal.u8 q3, d26, d1
- vmlal.u8 q4, d27, d1
- vmlal.u8 q5, d28, d1
- vmlal.u8 q6, d29, d1
- vmlal.u8 q7, d30, d1
- vmlal.u8 q8, d31, d1
-
- subs r12, r12, #1
-
- vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
- vqrshrn.u16 d3, q2, #7
- vqrshrn.u16 d4, q3, #7
- vqrshrn.u16 d5, q4, #7
- vqrshrn.u16 d6, q5, #7
- vqrshrn.u16 d7, q6, #7
- vqrshrn.u16 d8, q7, #7
- vqrshrn.u16 d9, q8, #7
-
- vst1.u8 {d2, d3}, [r4], r5 ;store result
- vst1.u8 {d4, d5}, [r4], r5
- vst1.u8 {d6, d7}, [r4], r5
- vmov q11, q15
- vst1.u8 {d8, d9}, [r4], r5
-
- bne filt_blk2d_sp16x16_loop_neon
-
- add sp, sp, #272
-
- pop {r4-r5,pc}
-
-;--------------------
-firstpass_bfilter16x16_only
- mov r2, #4 ;loop counter
- vdup.8 d0, d31[0] ;first_pass filter (d0 d1)
- vdup.8 d1, d31[4]
-
-;First Pass: output_height lines x output_width columns (16x16)
-filt_blk2d_fpo16x16_loop_neon
- vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data
- vld1.u8 {d5, d6, d7}, [r0], r1
- vld1.u8 {d8, d9, d10}, [r0], r1
- vld1.u8 {d11, d12, d13}, [r0], r1
-
- pld [r0]
- pld [r0, r1]
- pld [r0, r1, lsl #1]
-
- vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp8_filter[0])
- vmull.u8 q8, d3, d0
- vmull.u8 q9, d5, d0
- vmull.u8 q10, d6, d0
- vmull.u8 q11, d8, d0
- vmull.u8 q12, d9, d0
- vmull.u8 q13, d11, d0
- vmull.u8 q14, d12, d0
-
- vext.8 d2, d2, d3, #1 ;construct src_ptr[1]
- vext.8 d5, d5, d6, #1
- vext.8 d8, d8, d9, #1
- vext.8 d11, d11, d12, #1
-
- vmlal.u8 q7, d2, d1 ;(src_ptr[0] * vp8_filter[1])
- vmlal.u8 q9, d5, d1
- vmlal.u8 q11, d8, d1
- vmlal.u8 q13, d11, d1
-
- vext.8 d3, d3, d4, #1
- vext.8 d6, d6, d7, #1
- vext.8 d9, d9, d10, #1
- vext.8 d12, d12, d13, #1
-
- vmlal.u8 q8, d3, d1 ;(src_ptr[0] * vp8_filter[1])
- vmlal.u8 q10, d6, d1
- vmlal.u8 q12, d9, d1
- vmlal.u8 q14, d12, d1
-
- subs r2, r2, #1
-
- vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8
- vqrshrn.u16 d15, q8, #7
- vqrshrn.u16 d16, q9, #7
- vqrshrn.u16 d17, q10, #7
- vqrshrn.u16 d18, q11, #7
- vqrshrn.u16 d19, q12, #7
- vqrshrn.u16 d20, q13, #7
- vst1.u8 {d14, d15}, [r4], r5 ;store result
- vqrshrn.u16 d21, q14, #7
-
- vst1.u8 {d16, d17}, [r4], r5
- vst1.u8 {d18, d19}, [r4], r5
- vst1.u8 {d20, d21}, [r4], r5
-
- bne filt_blk2d_fpo16x16_loop_neon
- pop {r4-r5,pc}
-
-;---------------------
-secondpass_bfilter16x16_only
-;Second pass: 16x16
-;secondpass_filter
- add r3, r12, r3, lsl #3
- mov r12, #4 ;loop counter
- vld1.u32 {d31}, [r3] ;load second_pass filter
- vld1.u8 {d22, d23}, [r0], r1 ;load src data
-
- vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1)
- vdup.8 d1, d31[4]
-
-filt_blk2d_spo16x16_loop_neon
- vld1.u8 {d24, d25}, [r0], r1
- vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0])
- vld1.u8 {d26, d27}, [r0], r1
- vmull.u8 q2, d23, d0
- vld1.u8 {d28, d29}, [r0], r1
- vmull.u8 q3, d24, d0
- vld1.u8 {d30, d31}, [r0], r1
-
- vmull.u8 q4, d25, d0
- vmull.u8 q5, d26, d0
- vmull.u8 q6, d27, d0
- vmull.u8 q7, d28, d0
- vmull.u8 q8, d29, d0
-
- vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * vp8_filter[1])
- vmlal.u8 q2, d25, d1
- vmlal.u8 q3, d26, d1
- vmlal.u8 q4, d27, d1
- vmlal.u8 q5, d28, d1
- vmlal.u8 q6, d29, d1
- vmlal.u8 q7, d30, d1
- vmlal.u8 q8, d31, d1
-
- vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
- vqrshrn.u16 d3, q2, #7
- vqrshrn.u16 d4, q3, #7
- vqrshrn.u16 d5, q4, #7
- vqrshrn.u16 d6, q5, #7
- vqrshrn.u16 d7, q6, #7
- vqrshrn.u16 d8, q7, #7
- vqrshrn.u16 d9, q8, #7
-
- vst1.u8 {d2, d3}, [r4], r5 ;store result
- subs r12, r12, #1
- vst1.u8 {d4, d5}, [r4], r5
- vmov q11, q15
- vst1.u8 {d6, d7}, [r4], r5
- vst1.u8 {d8, d9}, [r4], r5
-
- bne filt_blk2d_spo16x16_loop_neon
- pop {r4-r5,pc}
-
- ENDP
-
-;-----------------
-
-bifilter16_coeff
- DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
-
- END
diff --git a/libvpx/vp8/common/arm/neon/bilinearpredict4x4_neon.asm b/libvpx/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
deleted file mode 100644
index 0ac6243..0000000
--- a/libvpx/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
+++ /dev/null
@@ -1,130 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_bilinear_predict4x4_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0 unsigned char *src_ptr,
-; r1 int src_pixels_per_line,
-; r2 int xoffset,
-; r3 int yoffset,
-; r4 unsigned char *dst_ptr,
-; stack(lr) int dst_pitch
-
-|vp8_bilinear_predict4x4_neon| PROC
- push {r4, lr}
-
- adr r12, bifilter4_coeff
- ldr r4, [sp, #8] ;load parameters from stack
- ldr lr, [sp, #12] ;load parameters from stack
-
- cmp r2, #0 ;skip first_pass filter if xoffset=0
- beq skip_firstpass_filter
-
-;First pass: output_height lines x output_width columns (5x4)
- vld1.u8 {d2}, [r0], r1 ;load src data
- add r2, r12, r2, lsl #3 ;calculate Hfilter location (2coeffsx4bytes=8bytes)
-
- vld1.u8 {d3}, [r0], r1
- vld1.u32 {d31}, [r2] ;first_pass filter
-
- vld1.u8 {d4}, [r0], r1
- vdup.8 d0, d31[0] ;first_pass filter (d0-d1)
- vld1.u8 {d5}, [r0], r1
- vdup.8 d1, d31[4]
- vld1.u8 {d6}, [r0], r1
-
- vshr.u64 q4, q1, #8 ;construct src_ptr[1]
- vshr.u64 q5, q2, #8
- vshr.u64 d12, d6, #8
-
- vzip.32 d2, d3 ;put 2-line data in 1 register (src_ptr[0])
- vzip.32 d4, d5
- vzip.32 d8, d9 ;put 2-line data in 1 register (src_ptr[1])
- vzip.32 d10, d11
-
- vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp8_filter[0])
- vmull.u8 q8, d4, d0
- vmull.u8 q9, d6, d0
-
- vmlal.u8 q7, d8, d1 ;(src_ptr[1] * vp8_filter[1])
- vmlal.u8 q8, d10, d1
- vmlal.u8 q9, d12, d1
-
- vqrshrn.u16 d28, q7, #7 ;shift/round/saturate to u8
- vqrshrn.u16 d29, q8, #7
- vqrshrn.u16 d30, q9, #7
-
-;Second pass: 4x4
-secondpass_filter
- cmp r3, #0 ;skip second_pass filter if yoffset=0
- beq skip_secondpass_filter
-
- add r3, r12, r3, lsl #3 ;calculate Vfilter location
- vld1.u32 {d31}, [r3] ;load second_pass filter
-
- vdup.8 d0, d31[0] ;second_pass filter parameters (d0-d5)
- vdup.8 d1, d31[4]
-
- vmull.u8 q1, d28, d0
- vmull.u8 q2, d29, d0
-
- vext.8 d26, d28, d29, #4 ;construct src_ptr[pixel_step]
- vext.8 d27, d29, d30, #4
-
- vmlal.u8 q1, d26, d1
- vmlal.u8 q2, d27, d1
-
- add r0, r4, lr
- add r1, r0, lr
- add r2, r1, lr
-
- vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
- vqrshrn.u16 d3, q2, #7
-
- vst1.32 {d2[0]}, [r4] ;store result
- vst1.32 {d2[1]}, [r0]
- vst1.32 {d3[0]}, [r1]
- vst1.32 {d3[1]}, [r2]
-
- pop {r4, pc}
-
-;--------------------
-skip_firstpass_filter
-
- vld1.32 {d28[0]}, [r0], r1 ;load src data
- vld1.32 {d28[1]}, [r0], r1
- vld1.32 {d29[0]}, [r0], r1
- vld1.32 {d29[1]}, [r0], r1
- vld1.32 {d30[0]}, [r0], r1
-
- b secondpass_filter
-
-;---------------------
-skip_secondpass_filter
- vst1.32 {d28[0]}, [r4], lr ;store result
- vst1.32 {d28[1]}, [r4], lr
- vst1.32 {d29[0]}, [r4], lr
- vst1.32 {d29[1]}, [r4], lr
-
- pop {r4, pc}
-
- ENDP
-
-;-----------------
-
-bifilter4_coeff
- DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
-
- END
diff --git a/libvpx/vp8/common/arm/neon/bilinearpredict8x4_neon.asm b/libvpx/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
deleted file mode 100644
index 41f5c45..0000000
--- a/libvpx/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
+++ /dev/null
@@ -1,135 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_bilinear_predict8x4_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0 unsigned char *src_ptr,
-; r1 int src_pixels_per_line,
-; r2 int xoffset,
-; r3 int yoffset,
-; r4 unsigned char *dst_ptr,
-; stack(lr) int dst_pitch
-
-|vp8_bilinear_predict8x4_neon| PROC
- push {r4, lr}
-
- adr r12, bifilter8x4_coeff
- ldr r4, [sp, #8] ;load parameters from stack
- ldr lr, [sp, #12] ;load parameters from stack
-
- cmp r2, #0 ;skip first_pass filter if xoffset=0
- beq skip_firstpass_filter
-
-;First pass: output_height lines x output_width columns (5x8)
- add r2, r12, r2, lsl #3 ;calculate filter location
-
- vld1.u8 {q1}, [r0], r1 ;load src data
- vld1.u32 {d31}, [r2] ;load first_pass filter
- vld1.u8 {q2}, [r0], r1
- vdup.8 d0, d31[0] ;first_pass filter (d0 d1)
- vld1.u8 {q3}, [r0], r1
- vdup.8 d1, d31[4]
- vld1.u8 {q4}, [r0], r1
-
- vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp8_filter[0])
- vld1.u8 {q5}, [r0], r1
- vmull.u8 q7, d4, d0
- vmull.u8 q8, d6, d0
- vmull.u8 q9, d8, d0
- vmull.u8 q10, d10, d0
-
- vext.8 d3, d2, d3, #1 ;construct src_ptr[-1]
- vext.8 d5, d4, d5, #1
- vext.8 d7, d6, d7, #1
- vext.8 d9, d8, d9, #1
- vext.8 d11, d10, d11, #1
-
- vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp8_filter[1])
- vmlal.u8 q7, d5, d1
- vmlal.u8 q8, d7, d1
- vmlal.u8 q9, d9, d1
- vmlal.u8 q10, d11, d1
-
- vqrshrn.u16 d22, q6, #7 ;shift/round/saturate to u8
- vqrshrn.u16 d23, q7, #7
- vqrshrn.u16 d24, q8, #7
- vqrshrn.u16 d25, q9, #7
- vqrshrn.u16 d26, q10, #7
-
-;Second pass: 4x8
-secondpass_filter
- cmp r3, #0 ;skip second_pass filter if yoffset=0
- beq skip_secondpass_filter
-
- add r3, r12, r3, lsl #3
- add r0, r4, lr
-
- vld1.u32 {d31}, [r3] ;load second_pass filter
- add r1, r0, lr
-
- vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1)
- vdup.8 d1, d31[4]
-
- vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0])
- vmull.u8 q2, d23, d0
- vmull.u8 q3, d24, d0
- vmull.u8 q4, d25, d0
-
- vmlal.u8 q1, d23, d1 ;(src_ptr[pixel_step] * vp8_filter[1])
- vmlal.u8 q2, d24, d1
- vmlal.u8 q3, d25, d1
- vmlal.u8 q4, d26, d1
-
- add r2, r1, lr
-
- vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
- vqrshrn.u16 d3, q2, #7
- vqrshrn.u16 d4, q3, #7
- vqrshrn.u16 d5, q4, #7
-
- vst1.u8 {d2}, [r4] ;store result
- vst1.u8 {d3}, [r0]
- vst1.u8 {d4}, [r1]
- vst1.u8 {d5}, [r2]
-
- pop {r4, pc}
-
-;--------------------
-skip_firstpass_filter
- vld1.u8 {d22}, [r0], r1 ;load src data
- vld1.u8 {d23}, [r0], r1
- vld1.u8 {d24}, [r0], r1
- vld1.u8 {d25}, [r0], r1
- vld1.u8 {d26}, [r0], r1
-
- b secondpass_filter
-
-;---------------------
-skip_secondpass_filter
- vst1.u8 {d22}, [r4], lr ;store result
- vst1.u8 {d23}, [r4], lr
- vst1.u8 {d24}, [r4], lr
- vst1.u8 {d25}, [r4], lr
-
- pop {r4, pc}
-
- ENDP
-
-;-----------------
-
-bifilter8x4_coeff
- DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
-
- END
diff --git a/libvpx/vp8/common/arm/neon/bilinearpredict8x8_neon.asm b/libvpx/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
deleted file mode 100644
index c4711bc..0000000
--- a/libvpx/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
+++ /dev/null
@@ -1,183 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_bilinear_predict8x8_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0 unsigned char *src_ptr,
-; r1 int src_pixels_per_line,
-; r2 int xoffset,
-; r3 int yoffset,
-; r4 unsigned char *dst_ptr,
-; stack(lr) int dst_pitch
-
-|vp8_bilinear_predict8x8_neon| PROC
- push {r4, lr}
-
- adr r12, bifilter8_coeff
- ldr r4, [sp, #8] ;load parameters from stack
- ldr lr, [sp, #12] ;load parameters from stack
-
- cmp r2, #0 ;skip first_pass filter if xoffset=0
- beq skip_firstpass_filter
-
-;First pass: output_height lines x output_width columns (9x8)
- add r2, r12, r2, lsl #3 ;calculate filter location
-
- vld1.u8 {q1}, [r0], r1 ;load src data
- vld1.u32 {d31}, [r2] ;load first_pass filter
- vld1.u8 {q2}, [r0], r1
- vdup.8 d0, d31[0] ;first_pass filter (d0 d1)
- vld1.u8 {q3}, [r0], r1
- vdup.8 d1, d31[4]
- vld1.u8 {q4}, [r0], r1
-
- vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp8_filter[0])
- vmull.u8 q7, d4, d0
- vmull.u8 q8, d6, d0
- vmull.u8 q9, d8, d0
-
- vext.8 d3, d2, d3, #1 ;construct src_ptr[-1]
- vext.8 d5, d4, d5, #1
- vext.8 d7, d6, d7, #1
- vext.8 d9, d8, d9, #1
-
- vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp8_filter[1])
- vmlal.u8 q7, d5, d1
- vmlal.u8 q8, d7, d1
- vmlal.u8 q9, d9, d1
-
- vld1.u8 {q1}, [r0], r1 ;load src data
- vqrshrn.u16 d22, q6, #7 ;shift/round/saturate to u8
- vld1.u8 {q2}, [r0], r1
- vqrshrn.u16 d23, q7, #7
- vld1.u8 {q3}, [r0], r1
- vqrshrn.u16 d24, q8, #7
- vld1.u8 {q4}, [r0], r1
- vqrshrn.u16 d25, q9, #7
-
- ;first_pass filtering on the rest 5-line data
- vld1.u8 {q5}, [r0], r1
-
- vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp8_filter[0])
- vmull.u8 q7, d4, d0
- vmull.u8 q8, d6, d0
- vmull.u8 q9, d8, d0
- vmull.u8 q10, d10, d0
-
- vext.8 d3, d2, d3, #1 ;construct src_ptr[-1]
- vext.8 d5, d4, d5, #1
- vext.8 d7, d6, d7, #1
- vext.8 d9, d8, d9, #1
- vext.8 d11, d10, d11, #1
-
- vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp8_filter[1])
- vmlal.u8 q7, d5, d1
- vmlal.u8 q8, d7, d1
- vmlal.u8 q9, d9, d1
- vmlal.u8 q10, d11, d1
-
- vqrshrn.u16 d26, q6, #7 ;shift/round/saturate to u8
- vqrshrn.u16 d27, q7, #7
- vqrshrn.u16 d28, q8, #7
- vqrshrn.u16 d29, q9, #7
- vqrshrn.u16 d30, q10, #7
-
-;Second pass: 8x8
-secondpass_filter
- cmp r3, #0 ;skip second_pass filter if yoffset=0
- beq skip_secondpass_filter
-
- add r3, r12, r3, lsl #3
- add r0, r4, lr
-
- vld1.u32 {d31}, [r3] ;load second_pass filter
- add r1, r0, lr
-
- vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1)
- vdup.8 d1, d31[4]
-
- vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0])
- vmull.u8 q2, d23, d0
- vmull.u8 q3, d24, d0
- vmull.u8 q4, d25, d0
- vmull.u8 q5, d26, d0
- vmull.u8 q6, d27, d0
- vmull.u8 q7, d28, d0
- vmull.u8 q8, d29, d0
-
- vmlal.u8 q1, d23, d1 ;(src_ptr[pixel_step] * vp8_filter[1])
- vmlal.u8 q2, d24, d1
- vmlal.u8 q3, d25, d1
- vmlal.u8 q4, d26, d1
- vmlal.u8 q5, d27, d1
- vmlal.u8 q6, d28, d1
- vmlal.u8 q7, d29, d1
- vmlal.u8 q8, d30, d1
-
- vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
- vqrshrn.u16 d3, q2, #7
- vqrshrn.u16 d4, q3, #7
- vqrshrn.u16 d5, q4, #7
- vqrshrn.u16 d6, q5, #7
- vqrshrn.u16 d7, q6, #7
- vqrshrn.u16 d8, q7, #7
- vqrshrn.u16 d9, q8, #7
-
- vst1.u8 {d2}, [r4] ;store result
- vst1.u8 {d3}, [r0]
- vst1.u8 {d4}, [r1], lr
- vst1.u8 {d5}, [r1], lr
- vst1.u8 {d6}, [r1], lr
- vst1.u8 {d7}, [r1], lr
- vst1.u8 {d8}, [r1], lr
- vst1.u8 {d9}, [r1], lr
-
- pop {r4, pc}
-
-;--------------------
-skip_firstpass_filter
- vld1.u8 {d22}, [r0], r1 ;load src data
- vld1.u8 {d23}, [r0], r1
- vld1.u8 {d24}, [r0], r1
- vld1.u8 {d25}, [r0], r1
- vld1.u8 {d26}, [r0], r1
- vld1.u8 {d27}, [r0], r1
- vld1.u8 {d28}, [r0], r1
- vld1.u8 {d29}, [r0], r1
- vld1.u8 {d30}, [r0], r1
-
- b secondpass_filter
-
-;---------------------
-skip_secondpass_filter
- vst1.u8 {d22}, [r4], lr ;store result
- vst1.u8 {d23}, [r4], lr
- vst1.u8 {d24}, [r4], lr
- vst1.u8 {d25}, [r4], lr
- vst1.u8 {d26}, [r4], lr
- vst1.u8 {d27}, [r4], lr
- vst1.u8 {d28}, [r4], lr
- vst1.u8 {d29}, [r4], lr
-
- pop {r4, pc}
-
- ENDP
-
-;-----------------
-
-bifilter8_coeff
- DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
-
- END
diff --git a/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c b/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c
new file mode 100644
index 0000000..e1c3c2b
--- /dev/null
+++ b/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c
@@ -0,0 +1,696 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const uint16_t bifilter4_coeff[8][2] = {
+ {128, 0},
+ {112, 16},
+ { 96, 32},
+ { 80, 48},
+ { 64, 64},
+ { 48, 80},
+ { 32, 96},
+ { 16, 112}
+};
+
+void vp8_bilinear_predict4x4_neon(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8;
+ uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8;
+ uint32x2_t d28u32, d29u32, d30u32;
+ uint8x16_t q1u8, q2u8;
+ uint16x8_t q1u16, q2u16;
+ uint16x8_t q7u16, q8u16, q9u16;
+ uint64x2_t q4u64, q5u64;
+ uint64x1_t d12u64;
+ uint32x2x2_t d0u32x2, d1u32x2, d2u32x2, d3u32x2;
+
+ if (xoffset == 0) { // skip_1stpass_filter
+ d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 0);
+ src_ptr += src_pixels_per_line;
+ d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 1);
+ src_ptr += src_pixels_per_line;
+ d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 0);
+ src_ptr += src_pixels_per_line;
+ d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 1);
+ src_ptr += src_pixels_per_line;
+ d30u32 = vld1_lane_u32((const uint32_t *)src_ptr, d30u32, 0);
+ d28u8 = vreinterpret_u8_u32(d28u32);
+ d29u8 = vreinterpret_u8_u32(d29u32);
+ d30u8 = vreinterpret_u8_u32(d30u32);
+ } else {
+ d2u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d3u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d4u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d5u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d6u8 = vld1_u8(src_ptr);
+
+ q1u8 = vcombine_u8(d2u8, d3u8);
+ q2u8 = vcombine_u8(d4u8, d5u8);
+
+ d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]);
+ d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]);
+
+ q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8);
+ q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8);
+ d12u64 = vshr_n_u64(vreinterpret_u64_u8(d6u8), 8);
+
+ d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q1u8)),
+ vreinterpret_u32_u8(vget_high_u8(q1u8)));
+ d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q2u8)),
+ vreinterpret_u32_u8(vget_high_u8(q2u8)));
+ d2u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q4u64)),
+ vreinterpret_u32_u64(vget_high_u64(q4u64)));
+ d3u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)),
+ vreinterpret_u32_u64(vget_high_u64(q5u64)));
+
+ q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
+ q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
+ q9u16 = vmull_u8(d6u8, d0u8);
+
+ q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d2u32x2.val[0]), d1u8);
+ q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d3u32x2.val[0]), d1u8);
+ q9u16 = vmlal_u8(q9u16, vreinterpret_u8_u64(d12u64), d1u8);
+
+ d28u8 = vqrshrn_n_u16(q7u16, 7);
+ d29u8 = vqrshrn_n_u16(q8u16, 7);
+ d30u8 = vqrshrn_n_u16(q9u16, 7);
+ }
+
+ // secondpass_filter
+ if (yoffset == 0) { // skip_2ndpass_filter
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 0);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 1);
+ } else {
+ d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+ d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+ q1u16 = vmull_u8(d28u8, d0u8);
+ q2u16 = vmull_u8(d29u8, d0u8);
+
+ d26u8 = vext_u8(d28u8, d29u8, 4);
+ d27u8 = vext_u8(d29u8, d30u8, 4);
+
+ q1u16 = vmlal_u8(q1u16, d26u8, d1u8);
+ q2u16 = vmlal_u8(q2u16, d27u8, d1u8);
+
+ d2u8 = vqrshrn_n_u16(q1u16, 7);
+ d3u8 = vqrshrn_n_u16(q2u16, 7);
+
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
+ dst_ptr += dst_pitch;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
+ }
+ return;
+}
+
+void vp8_bilinear_predict8x4_neon(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8;
+ uint8x8_t d7u8, d9u8, d11u8, d22u8, d23u8, d24u8, d25u8, d26u8;
+ uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
+ uint16x8_t q1u16, q2u16, q3u16, q4u16;
+ uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
+
+ if (xoffset == 0) { // skip_1stpass_filter
+ d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d26u8 = vld1_u8(src_ptr);
+ } else {
+ q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q5u8 = vld1q_u8(src_ptr);
+
+ d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]);
+ d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]);
+
+ q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
+ q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
+ q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+ q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+ q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+
+ d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
+ d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
+ d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+ d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+ d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+
+ q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
+ q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
+ q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
+ q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
+ q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
+
+ d22u8 = vqrshrn_n_u16(q6u16, 7);
+ d23u8 = vqrshrn_n_u16(q7u16, 7);
+ d24u8 = vqrshrn_n_u16(q8u16, 7);
+ d25u8 = vqrshrn_n_u16(q9u16, 7);
+ d26u8 = vqrshrn_n_u16(q10u16, 7);
+ }
+
+ // secondpass_filter
+ if (yoffset == 0) { // skip_2ndpass_filter
+ vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d25u8);
+ } else {
+ d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+ d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+ q1u16 = vmull_u8(d22u8, d0u8);
+ q2u16 = vmull_u8(d23u8, d0u8);
+ q3u16 = vmull_u8(d24u8, d0u8);
+ q4u16 = vmull_u8(d25u8, d0u8);
+
+ q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
+ q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
+ q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
+ q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
+
+ d2u8 = vqrshrn_n_u16(q1u16, 7);
+ d3u8 = vqrshrn_n_u16(q2u16, 7);
+ d4u8 = vqrshrn_n_u16(q3u16, 7);
+ d5u8 = vqrshrn_n_u16(q4u16, 7);
+
+ vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d5u8);
+ }
+ return;
+}
+
+void vp8_bilinear_predict8x8_neon(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8, d11u8;
+ uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8;
+ uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
+ uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16;
+ uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
+
+ if (xoffset == 0) { // skip_1stpass_filter
+ d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d26u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+ d30u8 = vld1_u8(src_ptr);
+ } else {
+ q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+
+ d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]);
+ d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]);
+
+ q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
+ q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
+ q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+ q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+
+ d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
+ d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
+ d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+ d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+
+ q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
+ q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
+ q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
+ q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
+
+ d22u8 = vqrshrn_n_u16(q6u16, 7);
+ d23u8 = vqrshrn_n_u16(q7u16, 7);
+ d24u8 = vqrshrn_n_u16(q8u16, 7);
+ d25u8 = vqrshrn_n_u16(q9u16, 7);
+
+ // first_pass filtering on the rest 5-line data
+ q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q5u8 = vld1q_u8(src_ptr);
+
+ q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
+ q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
+ q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+ q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+ q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+
+ d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
+ d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
+ d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+ d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+ d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+
+ q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
+ q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
+ q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
+ q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
+ q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
+
+ d26u8 = vqrshrn_n_u16(q6u16, 7);
+ d27u8 = vqrshrn_n_u16(q7u16, 7);
+ d28u8 = vqrshrn_n_u16(q8u16, 7);
+ d29u8 = vqrshrn_n_u16(q9u16, 7);
+ d30u8 = vqrshrn_n_u16(q10u16, 7);
+ }
+
+ // secondpass_filter
+ if (yoffset == 0) { // skip_2ndpass_filter
+ vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d25u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d26u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d27u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d28u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d29u8);
+ } else {
+ d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+ d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+ q1u16 = vmull_u8(d22u8, d0u8);
+ q2u16 = vmull_u8(d23u8, d0u8);
+ q3u16 = vmull_u8(d24u8, d0u8);
+ q4u16 = vmull_u8(d25u8, d0u8);
+ q5u16 = vmull_u8(d26u8, d0u8);
+ q6u16 = vmull_u8(d27u8, d0u8);
+ q7u16 = vmull_u8(d28u8, d0u8);
+ q8u16 = vmull_u8(d29u8, d0u8);
+
+ q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
+ q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
+ q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
+ q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
+ q5u16 = vmlal_u8(q5u16, d27u8, d1u8);
+ q6u16 = vmlal_u8(q6u16, d28u8, d1u8);
+ q7u16 = vmlal_u8(q7u16, d29u8, d1u8);
+ q8u16 = vmlal_u8(q8u16, d30u8, d1u8);
+
+ d2u8 = vqrshrn_n_u16(q1u16, 7);
+ d3u8 = vqrshrn_n_u16(q2u16, 7);
+ d4u8 = vqrshrn_n_u16(q3u16, 7);
+ d5u8 = vqrshrn_n_u16(q4u16, 7);
+ d6u8 = vqrshrn_n_u16(q5u16, 7);
+ d7u8 = vqrshrn_n_u16(q6u16, 7);
+ d8u8 = vqrshrn_n_u16(q7u16, 7);
+ d9u8 = vqrshrn_n_u16(q8u16, 7);
+
+ vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d5u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d6u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d7u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d8u8); dst_ptr += dst_pitch;
+ vst1_u8((uint8_t *)dst_ptr, d9u8);
+ }
+ return;
+}
+
+void vp8_bilinear_predict16x16_neon(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ int i;
+ unsigned char tmp[272];
+ unsigned char *tmpp;
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
+ uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8;
+ uint8x8_t d19u8, d20u8, d21u8;
+ uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8;
+ uint8x16_t q11u8, q12u8, q13u8, q14u8, q15u8;
+ uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16;
+ uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16;
+
+ if (xoffset == 0) { // secondpass_bfilter16x16_only
+ d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+ d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+ q11u8 = vld1q_u8(src_ptr);
+ src_ptr += src_pixels_per_line;
+ for (i = 4; i > 0; i--) {
+ q12u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q13u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q14u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+ q15u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+
+ q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
+ q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
+ q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
+ q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
+ q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
+ q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
+ q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
+ q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
+
+ q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
+ q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
+ q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
+ q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
+ q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
+ q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
+ q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
+ q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
+
+ d2u8 = vqrshrn_n_u16(q1u16, 7);
+ d3u8 = vqrshrn_n_u16(q2u16, 7);
+ d4u8 = vqrshrn_n_u16(q3u16, 7);
+ d5u8 = vqrshrn_n_u16(q4u16, 7);
+ d6u8 = vqrshrn_n_u16(q5u16, 7);
+ d7u8 = vqrshrn_n_u16(q6u16, 7);
+ d8u8 = vqrshrn_n_u16(q7u16, 7);
+ d9u8 = vqrshrn_n_u16(q8u16, 7);
+
+ q1u8 = vcombine_u8(d2u8, d3u8);
+ q2u8 = vcombine_u8(d4u8, d5u8);
+ q3u8 = vcombine_u8(d6u8, d7u8);
+ q4u8 = vcombine_u8(d8u8, d9u8);
+
+ q11u8 = q15u8;
+
+ vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
+ vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
+ vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
+ vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
+ }
+ return;
+ }
+
+ if (yoffset == 0) { // firstpass_bfilter16x16_only
+ d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+ d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
+
+ for (i = 4; i > 0 ; i--) {
+ d2u8 = vld1_u8(src_ptr);
+ d3u8 = vld1_u8(src_ptr + 8);
+ d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+ d5u8 = vld1_u8(src_ptr);
+ d6u8 = vld1_u8(src_ptr + 8);
+ d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+ d8u8 = vld1_u8(src_ptr);
+ d9u8 = vld1_u8(src_ptr + 8);
+ d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+ d11u8 = vld1_u8(src_ptr);
+ d12u8 = vld1_u8(src_ptr + 8);
+ d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+
+ q7u16 = vmull_u8(d2u8, d0u8);
+ q8u16 = vmull_u8(d3u8, d0u8);
+ q9u16 = vmull_u8(d5u8, d0u8);
+ q10u16 = vmull_u8(d6u8, d0u8);
+ q11u16 = vmull_u8(d8u8, d0u8);
+ q12u16 = vmull_u8(d9u8, d0u8);
+ q13u16 = vmull_u8(d11u8, d0u8);
+ q14u16 = vmull_u8(d12u8, d0u8);
+
+ d2u8 = vext_u8(d2u8, d3u8, 1);
+ d5u8 = vext_u8(d5u8, d6u8, 1);
+ d8u8 = vext_u8(d8u8, d9u8, 1);
+ d11u8 = vext_u8(d11u8, d12u8, 1);
+
+ q7u16 = vmlal_u8(q7u16, d2u8, d1u8);
+ q9u16 = vmlal_u8(q9u16, d5u8, d1u8);
+ q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
+ q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
+
+ d3u8 = vext_u8(d3u8, d4u8, 1);
+ d6u8 = vext_u8(d6u8, d7u8, 1);
+ d9u8 = vext_u8(d9u8, d10u8, 1);
+ d12u8 = vext_u8(d12u8, d13u8, 1);
+
+ q8u16 = vmlal_u8(q8u16, d3u8, d1u8);
+ q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
+ q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
+ q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
+
+ d14u8 = vqrshrn_n_u16(q7u16, 7);
+ d15u8 = vqrshrn_n_u16(q8u16, 7);
+ d16u8 = vqrshrn_n_u16(q9u16, 7);
+ d17u8 = vqrshrn_n_u16(q10u16, 7);
+ d18u8 = vqrshrn_n_u16(q11u16, 7);
+ d19u8 = vqrshrn_n_u16(q12u16, 7);
+ d20u8 = vqrshrn_n_u16(q13u16, 7);
+ d21u8 = vqrshrn_n_u16(q14u16, 7);
+
+ q7u8 = vcombine_u8(d14u8, d15u8);
+ q8u8 = vcombine_u8(d16u8, d17u8);
+ q9u8 = vcombine_u8(d18u8, d19u8);
+ q10u8 =vcombine_u8(d20u8, d21u8);
+
+ vst1q_u8((uint8_t *)dst_ptr, q7u8); dst_ptr += dst_pitch;
+ vst1q_u8((uint8_t *)dst_ptr, q8u8); dst_ptr += dst_pitch;
+ vst1q_u8((uint8_t *)dst_ptr, q9u8); dst_ptr += dst_pitch;
+ vst1q_u8((uint8_t *)dst_ptr, q10u8); dst_ptr += dst_pitch;
+ }
+ return;
+ }
+
+ d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+ d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
+
+ d2u8 = vld1_u8(src_ptr);
+ d3u8 = vld1_u8(src_ptr + 8);
+ d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+ d5u8 = vld1_u8(src_ptr);
+ d6u8 = vld1_u8(src_ptr + 8);
+ d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+ d8u8 = vld1_u8(src_ptr);
+ d9u8 = vld1_u8(src_ptr + 8);
+ d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+ d11u8 = vld1_u8(src_ptr);
+ d12u8 = vld1_u8(src_ptr + 8);
+ d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+
+ // First Pass: output_height lines x output_width columns (17x16)
+ tmpp = tmp;
+ for (i = 3; i > 0; i--) {
+ q7u16 = vmull_u8(d2u8, d0u8);
+ q8u16 = vmull_u8(d3u8, d0u8);
+ q9u16 = vmull_u8(d5u8, d0u8);
+ q10u16 = vmull_u8(d6u8, d0u8);
+ q11u16 = vmull_u8(d8u8, d0u8);
+ q12u16 = vmull_u8(d9u8, d0u8);
+ q13u16 = vmull_u8(d11u8, d0u8);
+ q14u16 = vmull_u8(d12u8, d0u8);
+
+ d2u8 = vext_u8(d2u8, d3u8, 1);
+ d5u8 = vext_u8(d5u8, d6u8, 1);
+ d8u8 = vext_u8(d8u8, d9u8, 1);
+ d11u8 = vext_u8(d11u8, d12u8, 1);
+
+ q7u16 = vmlal_u8(q7u16, d2u8, d1u8);
+ q9u16 = vmlal_u8(q9u16, d5u8, d1u8);
+ q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
+ q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
+
+ d3u8 = vext_u8(d3u8, d4u8, 1);
+ d6u8 = vext_u8(d6u8, d7u8, 1);
+ d9u8 = vext_u8(d9u8, d10u8, 1);
+ d12u8 = vext_u8(d12u8, d13u8, 1);
+
+ q8u16 = vmlal_u8(q8u16, d3u8, d1u8);
+ q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
+ q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
+ q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
+
+ d14u8 = vqrshrn_n_u16(q7u16, 7);
+ d15u8 = vqrshrn_n_u16(q8u16, 7);
+ d16u8 = vqrshrn_n_u16(q9u16, 7);
+ d17u8 = vqrshrn_n_u16(q10u16, 7);
+ d18u8 = vqrshrn_n_u16(q11u16, 7);
+ d19u8 = vqrshrn_n_u16(q12u16, 7);
+ d20u8 = vqrshrn_n_u16(q13u16, 7);
+ d21u8 = vqrshrn_n_u16(q14u16, 7);
+
+ d2u8 = vld1_u8(src_ptr);
+ d3u8 = vld1_u8(src_ptr + 8);
+ d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+ d5u8 = vld1_u8(src_ptr);
+ d6u8 = vld1_u8(src_ptr + 8);
+ d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+ d8u8 = vld1_u8(src_ptr);
+ d9u8 = vld1_u8(src_ptr + 8);
+ d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+ d11u8 = vld1_u8(src_ptr);
+ d12u8 = vld1_u8(src_ptr + 8);
+ d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+
+ q7u8 = vcombine_u8(d14u8, d15u8);
+ q8u8 = vcombine_u8(d16u8, d17u8);
+ q9u8 = vcombine_u8(d18u8, d19u8);
+ q10u8 = vcombine_u8(d20u8, d21u8);
+
+ vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
+ vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
+ vst1q_u8((uint8_t *)tmpp, q9u8); tmpp += 16;
+ vst1q_u8((uint8_t *)tmpp, q10u8); tmpp += 16;
+ }
+
+ // First-pass filtering for rest 5 lines
+ d14u8 = vld1_u8(src_ptr);
+ d15u8 = vld1_u8(src_ptr + 8);
+ d16u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+
+ q9u16 = vmull_u8(d2u8, d0u8);
+ q10u16 = vmull_u8(d3u8, d0u8);
+ q11u16 = vmull_u8(d5u8, d0u8);
+ q12u16 = vmull_u8(d6u8, d0u8);
+ q13u16 = vmull_u8(d8u8, d0u8);
+ q14u16 = vmull_u8(d9u8, d0u8);
+
+ d2u8 = vext_u8(d2u8, d3u8, 1);
+ d5u8 = vext_u8(d5u8, d6u8, 1);
+ d8u8 = vext_u8(d8u8, d9u8, 1);
+
+ q9u16 = vmlal_u8(q9u16, d2u8, d1u8);
+ q11u16 = vmlal_u8(q11u16, d5u8, d1u8);
+ q13u16 = vmlal_u8(q13u16, d8u8, d1u8);
+
+ d3u8 = vext_u8(d3u8, d4u8, 1);
+ d6u8 = vext_u8(d6u8, d7u8, 1);
+ d9u8 = vext_u8(d9u8, d10u8, 1);
+
+ q10u16 = vmlal_u8(q10u16, d3u8, d1u8);
+ q12u16 = vmlal_u8(q12u16, d6u8, d1u8);
+ q14u16 = vmlal_u8(q14u16, d9u8, d1u8);
+
+ q1u16 = vmull_u8(d11u8, d0u8);
+ q2u16 = vmull_u8(d12u8, d0u8);
+ q3u16 = vmull_u8(d14u8, d0u8);
+ q4u16 = vmull_u8(d15u8, d0u8);
+
+ d11u8 = vext_u8(d11u8, d12u8, 1);
+ d14u8 = vext_u8(d14u8, d15u8, 1);
+
+ q1u16 = vmlal_u8(q1u16, d11u8, d1u8);
+ q3u16 = vmlal_u8(q3u16, d14u8, d1u8);
+
+ d12u8 = vext_u8(d12u8, d13u8, 1);
+ d15u8 = vext_u8(d15u8, d16u8, 1);
+
+ q2u16 = vmlal_u8(q2u16, d12u8, d1u8);
+ q4u16 = vmlal_u8(q4u16, d15u8, d1u8);
+
+ d10u8 = vqrshrn_n_u16(q9u16, 7);
+ d11u8 = vqrshrn_n_u16(q10u16, 7);
+ d12u8 = vqrshrn_n_u16(q11u16, 7);
+ d13u8 = vqrshrn_n_u16(q12u16, 7);
+ d14u8 = vqrshrn_n_u16(q13u16, 7);
+ d15u8 = vqrshrn_n_u16(q14u16, 7);
+ d16u8 = vqrshrn_n_u16(q1u16, 7);
+ d17u8 = vqrshrn_n_u16(q2u16, 7);
+ d18u8 = vqrshrn_n_u16(q3u16, 7);
+ d19u8 = vqrshrn_n_u16(q4u16, 7);
+
+ q5u8 = vcombine_u8(d10u8, d11u8);
+ q6u8 = vcombine_u8(d12u8, d13u8);
+ q7u8 = vcombine_u8(d14u8, d15u8);
+ q8u8 = vcombine_u8(d16u8, d17u8);
+ q9u8 = vcombine_u8(d18u8, d19u8);
+
+ vst1q_u8((uint8_t *)tmpp, q5u8); tmpp += 16;
+ vst1q_u8((uint8_t *)tmpp, q6u8); tmpp += 16;
+ vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
+ vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
+ vst1q_u8((uint8_t *)tmpp, q9u8);
+
+ // secondpass_filter
+ d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+ d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+ tmpp = tmp;
+ q11u8 = vld1q_u8(tmpp);
+ tmpp += 16;
+ for (i = 4; i > 0; i--) {
+ q12u8 = vld1q_u8(tmpp); tmpp += 16;
+ q13u8 = vld1q_u8(tmpp); tmpp += 16;
+ q14u8 = vld1q_u8(tmpp); tmpp += 16;
+ q15u8 = vld1q_u8(tmpp); tmpp += 16;
+
+ q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
+ q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
+ q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
+ q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
+ q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
+ q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
+ q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
+ q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
+
+ q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
+ q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
+ q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
+ q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
+ q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
+ q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
+ q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
+ q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
+
+ d2u8 = vqrshrn_n_u16(q1u16, 7);
+ d3u8 = vqrshrn_n_u16(q2u16, 7);
+ d4u8 = vqrshrn_n_u16(q3u16, 7);
+ d5u8 = vqrshrn_n_u16(q4u16, 7);
+ d6u8 = vqrshrn_n_u16(q5u16, 7);
+ d7u8 = vqrshrn_n_u16(q6u16, 7);
+ d8u8 = vqrshrn_n_u16(q7u16, 7);
+ d9u8 = vqrshrn_n_u16(q8u16, 7);
+
+ q1u8 = vcombine_u8(d2u8, d3u8);
+ q2u8 = vcombine_u8(d4u8, d5u8);
+ q3u8 = vcombine_u8(d6u8, d7u8);
+ q4u8 = vcombine_u8(d8u8, d9u8);
+
+ q11u8 = q15u8;
+
+ vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
+ vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
+ vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
+ vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
+ }
+ return;
+}
diff --git a/libvpx/vp8/common/arm/neon/copymem16x16_neon.asm b/libvpx/vp8/common/arm/neon/copymem16x16_neon.asm
deleted file mode 100644
index bda4b96..0000000
--- a/libvpx/vp8/common/arm/neon/copymem16x16_neon.asm
+++ /dev/null
@@ -1,59 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_copy_mem16x16_neon|
- ; ARM
- ; REQUIRE8
- ; PRESERVE8
-
- AREA Block, CODE, READONLY ; name this block of code
-;void copy_mem16x16_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-|vp8_copy_mem16x16_neon| PROC
-
- vld1.u8 {q0}, [r0], r1
- vld1.u8 {q1}, [r0], r1
- vld1.u8 {q2}, [r0], r1
- vst1.u8 {q0}, [r2], r3
- vld1.u8 {q3}, [r0], r1
- vst1.u8 {q1}, [r2], r3
- vld1.u8 {q4}, [r0], r1
- vst1.u8 {q2}, [r2], r3
- vld1.u8 {q5}, [r0], r1
- vst1.u8 {q3}, [r2], r3
- vld1.u8 {q6}, [r0], r1
- vst1.u8 {q4}, [r2], r3
- vld1.u8 {q7}, [r0], r1
- vst1.u8 {q5}, [r2], r3
- vld1.u8 {q8}, [r0], r1
- vst1.u8 {q6}, [r2], r3
- vld1.u8 {q9}, [r0], r1
- vst1.u8 {q7}, [r2], r3
- vld1.u8 {q10}, [r0], r1
- vst1.u8 {q8}, [r2], r3
- vld1.u8 {q11}, [r0], r1
- vst1.u8 {q9}, [r2], r3
- vld1.u8 {q12}, [r0], r1
- vst1.u8 {q10}, [r2], r3
- vld1.u8 {q13}, [r0], r1
- vst1.u8 {q11}, [r2], r3
- vld1.u8 {q14}, [r0], r1
- vst1.u8 {q12}, [r2], r3
- vld1.u8 {q15}, [r0], r1
- vst1.u8 {q13}, [r2], r3
- vst1.u8 {q14}, [r2], r3
- vst1.u8 {q15}, [r2], r3
-
- mov pc, lr
-
- ENDP ; |vp8_copy_mem16x16_neon|
-
- END
diff --git a/libvpx/vp8/common/arm/neon/copymem8x4_neon.asm b/libvpx/vp8/common/arm/neon/copymem8x4_neon.asm
deleted file mode 100644
index 35c0f67..0000000
--- a/libvpx/vp8/common/arm/neon/copymem8x4_neon.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_copy_mem8x4_neon|
- ; ARM
- ; REQUIRE8
- ; PRESERVE8
-
- AREA Block, CODE, READONLY ; name this block of code
-;void copy_mem8x4_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-|vp8_copy_mem8x4_neon| PROC
- vld1.u8 {d0}, [r0], r1
- vld1.u8 {d1}, [r0], r1
- vst1.u8 {d0}, [r2], r3
- vld1.u8 {d2}, [r0], r1
- vst1.u8 {d1}, [r2], r3
- vld1.u8 {d3}, [r0], r1
- vst1.u8 {d2}, [r2], r3
- vst1.u8 {d3}, [r2], r3
-
- mov pc, lr
-
- ENDP ; |vp8_copy_mem8x4_neon|
-
- END
diff --git a/libvpx/vp8/common/arm/neon/copymem8x8_neon.asm b/libvpx/vp8/common/arm/neon/copymem8x8_neon.asm
deleted file mode 100644
index 1f5b941..0000000
--- a/libvpx/vp8/common/arm/neon/copymem8x8_neon.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_copy_mem8x8_neon|
- ; ARM
- ; REQUIRE8
- ; PRESERVE8
-
- AREA Block, CODE, READONLY ; name this block of code
-;void copy_mem8x8_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-|vp8_copy_mem8x8_neon| PROC
-
- vld1.u8 {d0}, [r0], r1
- vld1.u8 {d1}, [r0], r1
- vst1.u8 {d0}, [r2], r3
- vld1.u8 {d2}, [r0], r1
- vst1.u8 {d1}, [r2], r3
- vld1.u8 {d3}, [r0], r1
- vst1.u8 {d2}, [r2], r3
- vld1.u8 {d4}, [r0], r1
- vst1.u8 {d3}, [r2], r3
- vld1.u8 {d5}, [r0], r1
- vst1.u8 {d4}, [r2], r3
- vld1.u8 {d6}, [r0], r1
- vst1.u8 {d5}, [r2], r3
- vld1.u8 {d7}, [r0], r1
- vst1.u8 {d6}, [r2], r3
- vst1.u8 {d7}, [r2], r3
-
- mov pc, lr
-
- ENDP ; |vp8_copy_mem8x8_neon|
-
- END
diff --git a/libvpx/vp8/common/arm/neon/copymem_neon.c b/libvpx/vp8/common/arm/neon/copymem_neon.c
new file mode 100644
index 0000000..deced11
--- /dev/null
+++ b/libvpx/vp8/common/arm/neon/copymem_neon.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_copy_mem8x4_neon(
+ unsigned char *src,
+ int src_stride,
+ unsigned char *dst,
+ int dst_stride) {
+ uint8x8_t vtmp;
+ int r;
+
+ for (r = 0; r < 4; r++) {
+ vtmp = vld1_u8(src);
+ vst1_u8(dst, vtmp);
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp8_copy_mem8x8_neon(
+ unsigned char *src,
+ int src_stride,
+ unsigned char *dst,
+ int dst_stride) {
+ uint8x8_t vtmp;
+ int r;
+
+ for (r = 0; r < 8; r++) {
+ vtmp = vld1_u8(src);
+ vst1_u8(dst, vtmp);
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp8_copy_mem16x16_neon(
+ unsigned char *src,
+ int src_stride,
+ unsigned char *dst,
+ int dst_stride) {
+ int r;
+ uint8x16_t qtmp;
+
+ for (r = 0; r < 16; r++) {
+ qtmp = vld1q_u8(src);
+ vst1q_u8(dst, qtmp);
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
diff --git a/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.asm
deleted file mode 100644
index 79ff02c..0000000
--- a/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.asm
+++ /dev/null
@@ -1,54 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dc_only_idct_add_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-;void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
-; int pred_stride, unsigned char *dst_ptr,
-; int dst_stride)
-
-; r0 input_dc
-; r1 pred_ptr
-; r2 pred_stride
-; r3 dst_ptr
-; sp dst_stride
-
-|vp8_dc_only_idct_add_neon| PROC
- add r0, r0, #4
- asr r0, r0, #3
- ldr r12, [sp]
- vdup.16 q0, r0
-
- vld1.32 {d2[0]}, [r1], r2
- vld1.32 {d2[1]}, [r1], r2
- vld1.32 {d4[0]}, [r1], r2
- vld1.32 {d4[1]}, [r1]
-
- vaddw.u8 q1, q0, d2
- vaddw.u8 q2, q0, d4
-
- vqmovun.s16 d2, q1
- vqmovun.s16 d4, q2
-
- vst1.32 {d2[0]}, [r3], r12
- vst1.32 {d2[1]}, [r3], r12
- vst1.32 {d4[0]}, [r3], r12
- vst1.32 {d4[1]}, [r3]
-
- bx lr
-
- ENDP
-
- END
diff --git a/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c b/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c
new file mode 100644
index 0000000..ad5f41d
--- /dev/null
+++ b/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_dc_only_idct_add_neon(
+ int16_t input_dc,
+ unsigned char *pred_ptr,
+ int pred_stride,
+ unsigned char *dst_ptr,
+ int dst_stride) {
+ int i;
+ uint16_t a1 = ((input_dc + 4) >> 3);
+ uint32x2_t d2u32 = vdup_n_u32(0);
+ uint8x8_t d2u8;
+ uint16x8_t q1u16;
+ uint16x8_t qAdd;
+
+ qAdd = vdupq_n_u16(a1);
+
+ for (i = 0; i < 2; i++) {
+ d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
+ pred_ptr += pred_stride;
+ d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
+ pred_ptr += pred_stride;
+
+ q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
+ d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
+
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
+ dst_ptr += dst_stride;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
+ dst_ptr += dst_stride;
+ }
+}
diff --git a/libvpx/vp8/common/arm/neon/dequant_idct_neon.asm b/libvpx/vp8/common/arm/neon/dequant_idct_neon.asm
deleted file mode 100644
index 602cce6..0000000
--- a/libvpx/vp8/common/arm/neon/dequant_idct_neon.asm
+++ /dev/null
@@ -1,131 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequant_idct_add_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_idct_add_neon(short *input, short *dq,
-; unsigned char *dest, int stride)
-; r0 short *input,
-; r1 short *dq,
-; r2 unsigned char *dest
-; r3 int stride
-
-|vp8_dequant_idct_add_neon| PROC
- vld1.16 {q3, q4}, [r0]
- vld1.16 {q5, q6}, [r1]
-
- add r1, r2, r3 ; r1 = dest + stride
- lsl r3, #1 ; 2x stride
-
- vld1.32 {d14[0]}, [r2], r3
- vld1.32 {d14[1]}, [r1], r3
- vld1.32 {d15[0]}, [r2]
- vld1.32 {d15[1]}, [r1]
-
- adr r12, cospi8sqrt2minus1 ; pointer to the first constant
-
- vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
- vmul.i16 q2, q4, q6
-
-;|short_idct4x4llm_neon| PROC
- vld1.16 {d0}, [r12]
- vswp d3, d4 ;q2(vp[4] vp[12])
-
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2
- vqadd.s16 q4, q4, q2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
-; memset(input, 0, 32) -- 32bytes
- vmov.i16 q14, #0
-
- vswp d3, d4
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vmov q15, q14
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2
- vqadd.s16 q4, q4, q2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vst1.16 {q14, q15}, [r0]
-
- vrshr.s16 d2, d2, #3
- vrshr.s16 d3, d3, #3
- vrshr.s16 d4, d4, #3
- vrshr.s16 d5, d5, #3
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
- vaddw.u8 q1, q1, d14
- vaddw.u8 q2, q2, d15
-
- sub r2, r2, r3
- sub r1, r1, r3
-
- vqmovun.s16 d0, q1
- vqmovun.s16 d1, q2
-
- vst1.32 {d0[0]}, [r2], r3
- vst1.32 {d0[1]}, [r1], r3
- vst1.32 {d1[0]}, [r2]
- vst1.32 {d1[1]}, [r1]
-
- bx lr
-
- ENDP ; |vp8_dequant_idct_add_neon|
-
-; Constant Pool
-cospi8sqrt2minus1 DCD 0x4e7b4e7b
-sinpi8sqrt2 DCD 0x8a8c8a8c
-
- END
diff --git a/libvpx/vp8/common/arm/neon/dequant_idct_neon.c b/libvpx/vp8/common/arm/neon/dequant_idct_neon.c
new file mode 100644
index 0000000..58e1192
--- /dev/null
+++ b/libvpx/vp8/common/arm/neon/dequant_idct_neon.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const int16_t cospi8sqrt2minus1 = 20091;
+static const int16_t sinpi8sqrt2 = 35468;
+
+void vp8_dequant_idct_add_neon(
+ int16_t *input,
+ int16_t *dq,
+ unsigned char *dst,
+ int stride) {
+ unsigned char *dst0;
+ int32x2_t d14, d15;
+ int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
+ int16x8_t q1, q2, q3, q4, q5, q6;
+ int16x8_t qEmpty = vdupq_n_s16(0);
+ int32x2x2_t d2tmp0, d2tmp1;
+ int16x4x2_t d2tmp2, d2tmp3;
+
+ d14 = d15 = vdup_n_s32(0);
+
+ // load input
+ q3 = vld1q_s16(input);
+ vst1q_s16(input, qEmpty);
+ input += 8;
+ q4 = vld1q_s16(input);
+ vst1q_s16(input, qEmpty);
+
+ // load dq
+ q5 = vld1q_s16(dq);
+ dq += 8;
+ q6 = vld1q_s16(dq);
+
+ // load src from dst
+ dst0 = dst;
+ d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
+ dst0 += stride;
+ d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
+ dst0 += stride;
+ d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
+ dst0 += stride;
+ d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
+
+ q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3),
+ vreinterpretq_u16_s16(q5)));
+ q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4),
+ vreinterpretq_u16_s16(q6)));
+
+ d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
+ d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
+
+ q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
+
+ q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
+ q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
+
+ q3 = vshrq_n_s16(q3, 1);
+ q4 = vshrq_n_s16(q4, 1);
+
+ q3 = vqaddq_s16(q3, q2);
+ q4 = vqaddq_s16(q4, q2);
+
+ d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
+ d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
+
+ d2 = vqadd_s16(d12, d11);
+ d3 = vqadd_s16(d13, d10);
+ d4 = vqsub_s16(d13, d10);
+ d5 = vqsub_s16(d12, d11);
+
+ d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+ d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+ d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
+ vreinterpret_s16_s32(d2tmp1.val[0]));
+ d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
+ vreinterpret_s16_s32(d2tmp1.val[1]));
+
+ // loop 2
+ q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
+
+ q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
+ q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
+
+ d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
+ d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
+
+ q3 = vshrq_n_s16(q3, 1);
+ q4 = vshrq_n_s16(q4, 1);
+
+ q3 = vqaddq_s16(q3, q2);
+ q4 = vqaddq_s16(q4, q2);
+
+ d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
+ d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
+
+ d2 = vqadd_s16(d12, d11);
+ d3 = vqadd_s16(d13, d10);
+ d4 = vqsub_s16(d13, d10);
+ d5 = vqsub_s16(d12, d11);
+
+ d2 = vrshr_n_s16(d2, 3);
+ d3 = vrshr_n_s16(d3, 3);
+ d4 = vrshr_n_s16(d4, 3);
+ d5 = vrshr_n_s16(d5, 3);
+
+ d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+ d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+ d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
+ vreinterpret_s16_s32(d2tmp1.val[0]));
+ d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
+ vreinterpret_s16_s32(d2tmp1.val[1]));
+
+ q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
+ q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
+
+ q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1),
+ vreinterpret_u8_s32(d14)));
+ q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2),
+ vreinterpret_u8_s32(d15)));
+
+ d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
+ d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
+
+ dst0 = dst;
+ vst1_lane_s32((int32_t *)dst0, d14, 0);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst0, d14, 1);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst0, d15, 0);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst0, d15, 1);
+ return;
+}
diff --git a/libvpx/vp8/common/arm/neon/dequantizeb_neon.asm b/libvpx/vp8/common/arm/neon/dequantizeb_neon.asm
deleted file mode 100644
index c8e0c31..0000000
--- a/libvpx/vp8/common/arm/neon/dequantizeb_neon.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequantize_b_loop_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0 short *Q,
-; r1 short *DQC
-; r2 short *DQ
-|vp8_dequantize_b_loop_neon| PROC
- vld1.16 {q0, q1}, [r0]
- vld1.16 {q2, q3}, [r1]
-
- vmul.i16 q4, q0, q2
- vmul.i16 q5, q1, q3
-
- vst1.16 {q4, q5}, [r2]
-
- bx lr
-
- ENDP
-
- END
diff --git a/libvpx/vp8/common/arm/neon/dequantizeb_neon.c b/libvpx/vp8/common/arm/neon/dequantizeb_neon.c
new file mode 100644
index 0000000..60f69c8
--- /dev/null
+++ b/libvpx/vp8/common/arm/neon/dequantizeb_neon.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_dequantize_b_loop_neon(
+ int16_t *Q,
+ int16_t *DQC,
+ int16_t *DQ) {
+ int16x8x2_t qQ, qDQC, qDQ;
+
+ qQ = vld2q_s16(Q);
+ qDQC = vld2q_s16(DQC);
+
+ qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
+ qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
+
+ vst2q_s16(DQ, qDQ);
+ return;
+}
diff --git a/libvpx/vp8/common/blockd.h b/libvpx/vp8/common/blockd.h
index f7ff577..ea1a6a4 100644
--- a/libvpx/vp8/common/blockd.h
+++ b/libvpx/vp8/common/blockd.h
@@ -9,8 +9,8 @@
*/
-#ifndef __INC_BLOCKD_H
-#define __INC_BLOCKD_H
+#ifndef VP8_COMMON_BLOCKD_H_
+#define VP8_COMMON_BLOCKD_H_
void vpx_log(const char *format, ...);
@@ -20,6 +20,10 @@ void vpx_log(const char *format, ...);
#include "treecoder.h"
#include "vpx_ports/mem.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/*#define DCPRED 1*/
#define DCPREDSIMTHRESH 0
#define DCPREDCNTTHRESH 3
@@ -297,4 +301,8 @@ typedef struct macroblockd
extern void vp8_build_block_doffsets(MACROBLOCKD *x);
extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
-#endif /* __INC_BLOCKD_H */
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_COMMON_BLOCKD_H_
diff --git a/libvpx/vp8/common/coefupdateprobs.h b/libvpx/vp8/common/coefupdateprobs.h
index 9e194dc..d96a19e 100644
--- a/libvpx/vp8/common/coefupdateprobs.h
+++ b/libvpx/vp8/common/coefupdateprobs.h
@@ -8,6 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP8_COMMON_COEFUPDATEPROBS_H_
+#define VP8_COMMON_COEFUPDATEPROBS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
/* Update probabilities for the nodes in the token entropy tree.
Generated file included by entropy.c */
@@ -183,3 +189,9 @@ const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTE
},
},
};
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_COMMON_COEFUPDATEPROBS_H_
diff --git a/libvpx/vp8/common/common.h b/libvpx/vp8/common/common.h
index 2cc1c54..ee5b58c 100644
--- a/libvpx/vp8/common/common.h
+++ b/libvpx/vp8/common/common.h
@@ -9,8 +9,8 @@
*/
-#ifndef common_h
-#define common_h 1
+#ifndef VP8_COMMON_COMMON_H_
+#define VP8_COMMON_COMMON_H_
#include <assert.h>
@@ -18,6 +18,10 @@
#include "vpx_mem/vpx_mem.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* Only need this for fixed-size arrays, for structs just assign. */
#define vp8_copy( Dest, Src) { \
@@ -37,4 +41,8 @@
#define vp8_zero_array( Dest, N) vpx_memset( Dest, 0, N * sizeof( *Dest));
-#endif /* common_h */
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_COMMON_COMMON_H_
diff --git a/libvpx/vp8/common/default_coef_probs.h b/libvpx/vp8/common/default_coef_probs.h
index 0d19563..4d69e4b 100644
--- a/libvpx/vp8/common/default_coef_probs.h
+++ b/libvpx/vp8/common/default_coef_probs.h
@@ -8,6 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_
+#define VP8_COMMON_DEFAULT_COEF_PROBS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
/*Generated file, included by entropy.c*/
@@ -186,3 +192,9 @@ static const vp8_prob default_coef_probs [BLOCK_TYPES]
}
}
};
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_COMMON_DEFAULT_COEF_PROBS_H_
diff --git a/libvpx/vp8/common/entropy.h b/libvpx/vp8/common/entropy.h
index 5389bc1..a90bab4 100644
--- a/libvpx/vp8/common/entropy.h
+++ b/libvpx/vp8/common/entropy.h
@@ -9,12 +9,16 @@
*/
-#ifndef __INC_ENTROPY_H
-#define __INC_ENTROPY_H
+#ifndef VP8_COMMON_ENTROPY_H_
+#define VP8_COMMON_ENTROPY_H_
#include "treecoder.h"
#include "blockd.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* Coefficient token alphabet */
#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
@@ -98,4 +102,8 @@ extern DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]);
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
void vp8_coef_tree_initialize(void);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_ENTROPY_H_
diff --git a/libvpx/vp8/common/entropymode.h b/libvpx/vp8/common/entropymode.h
index 1df0f64..81bdfc4 100644
--- a/libvpx/vp8/common/entropymode.h
+++ b/libvpx/vp8/common/entropymode.h
@@ -9,12 +9,16 @@
*/
-#ifndef __INC_ENTROPYMODE_H
-#define __INC_ENTROPYMODE_H
+#ifndef VP8_COMMON_ENTROPYMODE_H_
+#define VP8_COMMON_ENTROPYMODE_H_
#include "onyxc_int.h"
#include "treecoder.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef enum
{
SUBMVREF_NORMAL,
@@ -77,4 +81,8 @@ void vp8_init_mbmode_probs(VP8_COMMON *x);
void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_ENTROPYMODE_H_
diff --git a/libvpx/vp8/common/entropymv.h b/libvpx/vp8/common/entropymv.h
index 2db1e38..42840d5 100644
--- a/libvpx/vp8/common/entropymv.h
+++ b/libvpx/vp8/common/entropymv.h
@@ -9,11 +9,15 @@
*/
-#ifndef __INC_ENTROPYMV_H
-#define __INC_ENTROPYMV_H
+#ifndef VP8_COMMON_ENTROPYMV_H_
+#define VP8_COMMON_ENTROPYMV_H_
#include "treecoder.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
enum
{
mv_max = 1023, /* max absolute value of a MV component */
@@ -41,4 +45,8 @@ typedef struct mv_context
extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2];
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_ENTROPYMV_H_
diff --git a/libvpx/vp8/common/extend.h b/libvpx/vp8/common/extend.h
index 74a0b17..068f4ac 100644
--- a/libvpx/vp8/common/extend.h
+++ b/libvpx/vp8/common/extend.h
@@ -9,11 +9,15 @@
*/
-#ifndef __INC_EXTEND_H
-#define __INC_EXTEND_H
+#ifndef VP8_COMMON_EXTEND_H_
+#define VP8_COMMON_EXTEND_H_
#include "vpx_scale/yv12config.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst);
@@ -22,4 +26,8 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
int srcy, int srcx,
int srch, int srcw);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_EXTEND_H_
diff --git a/libvpx/vp8/common/filter.h b/libvpx/vp8/common/filter.h
index ccda7c8..cfba775 100644
--- a/libvpx/vp8/common/filter.h
+++ b/libvpx/vp8/common/filter.h
@@ -9,11 +9,15 @@
*/
-#ifndef FILTER_H
-#define FILTER_H
+#ifndef VP8_COMMON_FILTER_H_
+#define VP8_COMMON_FILTER_H_
#include "vpx_ports/mem.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define BLOCK_HEIGHT_WIDTH 4
#define VP8_FILTER_WEIGHT 128
#define VP8_FILTER_SHIFT 7
@@ -21,4 +25,8 @@
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]);
extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_FILTER_H_
diff --git a/libvpx/vp8/common/findnearmv.h b/libvpx/vp8/common/findnearmv.h
index c60e463..3c8c050 100644
--- a/libvpx/vp8/common/findnearmv.h
+++ b/libvpx/vp8/common/findnearmv.h
@@ -9,14 +9,18 @@
*/
-#ifndef __INC_FINDNEARMV_H
-#define __INC_FINDNEARMV_H
+#ifndef VP8_COMMON_FINDNEARMV_H_
+#define VP8_COMMON_FINDNEARMV_H_
#include "mv.h"
#include "blockd.h"
#include "modecont.h"
#include "treecoder.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp,
const int *ref_frame_sign_bias)
@@ -179,4 +183,8 @@ static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi
return (cur_mb->bmi + b - 4)->as_mode;
}
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_FINDNEARMV_H_
diff --git a/libvpx/vp8/common/header.h b/libvpx/vp8/common/header.h
index 3e98eeb..e27bca1 100644
--- a/libvpx/vp8/common/header.h
+++ b/libvpx/vp8/common/header.h
@@ -9,8 +9,12 @@
*/
-#ifndef __INC_HEADER_H
-#define __INC_HEADER_H
+#ifndef VP8_COMMON_HEADER_H_
+#define VP8_COMMON_HEADER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
/* 24 bits total */
typedef struct
@@ -40,4 +44,8 @@ typedef struct
#endif
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_HEADER_H_
diff --git a/libvpx/vp8/common/invtrans.h b/libvpx/vp8/common/invtrans.h
index 9262640..affe57e 100644
--- a/libvpx/vp8/common/invtrans.h
+++ b/libvpx/vp8/common/invtrans.h
@@ -9,8 +9,8 @@
*/
-#ifndef __INC_INVTRANS_H
-#define __INC_INVTRANS_H
+#ifndef VP8_COMMON_INVTRANS_H_
+#define VP8_COMMON_INVTRANS_H_
#include "vpx_config.h"
#include "vp8_rtcd.h"
@@ -21,6 +21,10 @@
#include "vpx_mem/vpx_mem.h"
#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+
static void eob_adjust(char *eobs, short *diff)
{
/* eob adjust.... the idct can only skip if both the dc and eob are zero */
@@ -59,4 +63,8 @@ static void vp8_inverse_transform_mby(MACROBLOCKD *xd)
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
}
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_INVTRANS_H_
diff --git a/libvpx/vp8/common/loopfilter.h b/libvpx/vp8/common/loopfilter.h
index 1e47f34..20a6bd3 100644
--- a/libvpx/vp8/common/loopfilter.h
+++ b/libvpx/vp8/common/loopfilter.h
@@ -9,13 +9,17 @@
*/
-#ifndef loopfilter_h
-#define loopfilter_h
+#ifndef VP8_COMMON_LOOPFILTER_H_
+#define VP8_COMMON_LOOPFILTER_H_
#include "vpx_ports/mem.h"
#include "vpx_config.h"
#include "vp8_rtcd.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define MAX_LOOP_FILTER 63
/* fraction of total macroblock rows to be used in fast filter level picking */
/* has to be > 2 */
@@ -102,4 +106,8 @@ void vp8_loop_filter_row_simple(struct VP8Common *cm,
int mb_row, int post_ystride, int post_uvstride,
unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_LOOPFILTER_H_
diff --git a/libvpx/vp8/common/modecont.h b/libvpx/vp8/common/modecont.h
index 24db882..ff34c33 100644
--- a/libvpx/vp8/common/modecont.h
+++ b/libvpx/vp8/common/modecont.h
@@ -9,9 +9,17 @@
*/
-#ifndef __INC_MODECONT_H
-#define __INC_MODECONT_H
+#ifndef VP8_COMMON_MODECONT_H_
+#define VP8_COMMON_MODECONT_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
extern const int vp8_mode_contexts[6][4];
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_MODECONT_H_
diff --git a/libvpx/vp8/common/mv.h b/libvpx/vp8/common/mv.h
index b3f919d..111ccd6 100644
--- a/libvpx/vp8/common/mv.h
+++ b/libvpx/vp8/common/mv.h
@@ -9,10 +9,14 @@
*/
-#ifndef __INC_MV_H
-#define __INC_MV_H
+#ifndef VP8_COMMON_MV_H_
+#define VP8_COMMON_MV_H_
#include "vpx/vpx_integer.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef struct
{
short row;
@@ -25,4 +29,8 @@ typedef union int_mv
MV as_mv;
} int_mv; /* facilitates faster equality tests and copies */
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_MV_H_
diff --git a/libvpx/vp8/common/onyx.h b/libvpx/vp8/common/onyx.h
index 30c4cbb..119e40c 100644
--- a/libvpx/vp8/common/onyx.h
+++ b/libvpx/vp8/common/onyx.h
@@ -9,8 +9,8 @@
*/
-#ifndef __INC_VP8_H
-#define __INC_VP8_H
+#ifndef VP8_COMMON_ONYX_H_
+#define VP8_COMMON_ONYX_H_
#ifdef __cplusplus
extern "C"
@@ -39,8 +39,8 @@ extern "C"
typedef enum
{
- USAGE_STREAM_FROM_SERVER = 0x0,
- USAGE_LOCAL_FILE_PLAYBACK = 0x1,
+ USAGE_LOCAL_FILE_PLAYBACK = 0x0,
+ USAGE_STREAM_FROM_SERVER = 0x1,
USAGE_CONSTRAINED_QUALITY = 0x2,
USAGE_CONSTANT_QUALITY = 0x3
} END_USAGE;
@@ -267,4 +267,4 @@ extern "C"
}
#endif
-#endif
+#endif // VP8_COMMON_ONYX_H_
diff --git a/libvpx/vp8/common/onyxc_int.h b/libvpx/vp8/common/onyxc_int.h
index e9bb7af..6d89865 100644
--- a/libvpx/vp8/common/onyxc_int.h
+++ b/libvpx/vp8/common/onyxc_int.h
@@ -9,8 +9,8 @@
*/
-#ifndef __INC_VP8C_INT_H
-#define __INC_VP8C_INT_H
+#ifndef VP8_COMMON_ONYXC_INT_H_
+#define VP8_COMMON_ONYXC_INT_H_
#include "vpx_config.h"
#include "vp8_rtcd.h"
@@ -26,6 +26,10 @@
#include "header.h"
/*#endif*/
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define MINQ 0
#define MAXQ 127
#define QINDEX_RANGE (MAXQ + 1)
@@ -174,4 +178,8 @@ typedef struct VP8Common
int cpu_caps;
} VP8_COMMON;
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_ONYXC_INT_H_
diff --git a/libvpx/vp8/common/onyxd.h b/libvpx/vp8/common/onyxd.h
index 97c81c1..e37b29f 100644
--- a/libvpx/vp8/common/onyxd.h
+++ b/libvpx/vp8/common/onyxd.h
@@ -9,8 +9,8 @@
*/
-#ifndef __INC_VP8D_H
-#define __INC_VP8D_H
+#ifndef VP8_COMMON_ONYXD_H_
+#define VP8_COMMON_ONYXD_H_
/* Create/destroy static data structures. */
@@ -60,4 +60,4 @@ extern "C"
#endif
-#endif
+#endif // VP8_COMMON_ONYXD_H_
diff --git a/libvpx/vp8/common/postproc.c b/libvpx/vp8/common/postproc.c
index dd998f1..e3bee32 100644
--- a/libvpx/vp8/common/postproc.c
+++ b/libvpx/vp8/common/postproc.c
@@ -71,11 +71,6 @@ static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] =
};
#endif
-static const short kernel5[] =
-{
- 1, 1, 4, 1, 1
-};
-
const short vp8_rv[] =
{
8, 5, 2, 2, 8, 12, 4, 9, 8, 3,
diff --git a/libvpx/vp8/common/postproc.h b/libvpx/vp8/common/postproc.h
index 495a2c9..33d0a7f 100644
--- a/libvpx/vp8/common/postproc.h
+++ b/libvpx/vp8/common/postproc.h
@@ -9,8 +9,8 @@
*/
-#ifndef POSTPROC_H
-#define POSTPROC_H
+#ifndef VP8_COMMON_POSTPROC_H_
+#define VP8_COMMON_POSTPROC_H_
#include "vpx_ports/mem.h"
struct postproc_state
@@ -26,6 +26,10 @@ struct postproc_state
};
#include "onyxc_int.h"
#include "ppflags.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
vp8_ppflags_t *flags);
@@ -47,4 +51,8 @@ void vp8_deblock(struct VP8Common *oci,
#define MFQE_PRECISION 4
void vp8_multiframe_quality_enhance(struct VP8Common *cm);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_POSTPROC_H_
diff --git a/libvpx/vp8/common/ppflags.h b/libvpx/vp8/common/ppflags.h
index 665e21f..768224a 100644
--- a/libvpx/vp8/common/ppflags.h
+++ b/libvpx/vp8/common/ppflags.h
@@ -9,8 +9,12 @@
*/
-#ifndef __INC_PPFLAGS_H
-#define __INC_PPFLAGS_H
+#ifndef VP8_COMMON_PPFLAGS_H_
+#define VP8_COMMON_PPFLAGS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
enum
{
VP8D_NOFILTERING = 0,
@@ -38,4 +42,8 @@ typedef struct
int display_mv_flag;
} vp8_ppflags_t;
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_PPFLAGS_H_
diff --git a/libvpx/vp8/common/pragmas.h b/libvpx/vp8/common/pragmas.h
index 99fee5a..329cc82 100644
--- a/libvpx/vp8/common/pragmas.h
+++ b/libvpx/vp8/common/pragmas.h
@@ -8,8 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP8_COMMON_PRAGMAS_H_
+#define VP8_COMMON_PRAGMAS_H_
-
+#ifdef __cplusplus
+extern "C" {
+#endif
#ifdef __INTEL_COMPILER
#pragma warning(disable:997 1011 170)
@@ -17,3 +21,9 @@
#ifdef _MSC_VER
#pragma warning(disable:4799)
#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_COMMON_PRAGMAS_H_
diff --git a/libvpx/vp8/common/quant_common.h b/libvpx/vp8/common/quant_common.h
index cb64d8e..700b5e6 100644
--- a/libvpx/vp8/common/quant_common.h
+++ b/libvpx/vp8/common/quant_common.h
@@ -8,14 +8,27 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP8_COMMON_QUANT_COMMON_H_
+#define VP8_COMMON_QUANT_COMMON_H_
+
#include "string.h"
#include "blockd.h"
#include "onyxc_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
extern int vp8_ac_yquant(int QIndex);
extern int vp8_dc_quant(int QIndex, int Delta);
extern int vp8_dc2quant(int QIndex, int Delta);
extern int vp8_ac2quant(int QIndex, int Delta);
extern int vp8_dc_uv_quant(int QIndex, int Delta);
extern int vp8_ac_uv_quant(int QIndex, int Delta);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_COMMON_QUANT_COMMON_H_
diff --git a/libvpx/vp8/common/reconinter.h b/libvpx/vp8/common/reconinter.h
index 233c02e..ba979b9 100644
--- a/libvpx/vp8/common/reconinter.h
+++ b/libvpx/vp8/common/reconinter.h
@@ -9,8 +9,12 @@
*/
-#ifndef __INC_RECONINTER_H
-#define __INC_RECONINTER_H
+#ifndef VP8_COMMON_RECONINTER_H_
+#define VP8_COMMON_RECONINTER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
@@ -32,4 +36,8 @@ extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch,
extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x);
extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_RECONINTER_H_
diff --git a/libvpx/vp8/common/reconintra4x4.h b/libvpx/vp8/common/reconintra4x4.h
index d2b0d43..ed59c9e 100644
--- a/libvpx/vp8/common/reconintra4x4.h
+++ b/libvpx/vp8/common/reconintra4x4.h
@@ -9,10 +9,14 @@
*/
-#ifndef __INC_RECONINTRA4x4_H
-#define __INC_RECONINTRA4x4_H
+#ifndef VP8_COMMON_RECONINTRA4X4_H_
+#define VP8_COMMON_RECONINTRA4X4_H_
#include "vp8/common/blockd.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
static void intra_prediction_down_copy(MACROBLOCKD *xd,
unsigned char *above_right_src)
{
@@ -29,4 +33,8 @@ static void intra_prediction_down_copy(MACROBLOCKD *xd,
*dst_ptr2 = *src_ptr;
}
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_RECONINTRA4X4_H_
diff --git a/libvpx/vp8/common/rtcd_defs.pl b/libvpx/vp8/common/rtcd_defs.pl
new file mode 100644
index 0000000..130d965
--- /dev/null
+++ b/libvpx/vp8/common/rtcd_defs.pl
@@ -0,0 +1,541 @@
+sub vp8_common_forward_decls() {
+print <<EOF
+/*
+ * VP8
+ */
+
+struct blockd;
+struct macroblockd;
+struct loop_filter_info;
+
+/* Encoder forward decls */
+struct block;
+struct macroblock;
+struct variance_vtable;
+union int_mv;
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vp8_common_forward_decls/;
+
+#
+# system state
+#
+add_proto qw/void vp8_clear_system_state/, "";
+specialize qw/vp8_clear_system_state mmx/;
+$vp8_clear_system_state_mmx=vpx_reset_mmx_state;
+
+#
+# Dequant
+#
+add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";
+specialize qw/vp8_dequantize_b mmx media neon/;
+$vp8_dequantize_b_media=vp8_dequantize_b_v6;
+
+add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";
+specialize qw/vp8_dequant_idct_add mmx media neon dspr2/;
+$vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6;
+$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2;
+
+add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";
+specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2/;
+$vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6;
+$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
+
+add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";
+specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2/;
+$vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6;
+$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
+
+#
+# Loopfilter
+#
+add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_mbv mmx sse2 media neon dspr2/;
+$vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6;
+$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2;
+
+add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2/;
+$vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6;
+$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2;
+
+add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_mbh mmx sse2 media neon dspr2/;
+$vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6;
+$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2;
+
+add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2/;
+$vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6;
+$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2;
+
+
+add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon/;
+$vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c;
+$vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx;
+$vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2;
+$vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6;
+$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon;
+
+add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon/;
+$vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c;
+$vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx;
+$vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2;
+$vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6;
+$vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon;
+
+add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon/;
+$vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c;
+$vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx;
+$vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2;
+$vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6;
+$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon;
+
+add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon/;
+$vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c;
+$vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx;
+$vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2;
+$vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6;
+$vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon;
+
+#
+# IDCT
+#
+#idct16
+add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride";
+specialize qw/vp8_short_idct4x4llm mmx media neon dspr2/;
+$vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual;
+$vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2;
+
+#iwalsh1
+add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output";
+specialize qw/vp8_short_inv_walsh4x4_1 dspr2/;
+$vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2;
+# no asm yet
+
+#iwalsh16
+add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output";
+specialize qw/vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2/;
+$vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6;
+$vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2;
+
+#idct1_scalar_add
+add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride";
+specialize qw/vp8_dc_only_idct_add mmx media neon dspr2/;
+$vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6;
+$vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2;
+
+#
+# RECON
+#
+add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_copy_mem16x16 mmx sse2 media neon dspr2/;
+$vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6;
+$vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2;
+
+add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_copy_mem8x8 mmx media neon dspr2/;
+$vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6;
+$vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2;
+
+add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_copy_mem8x4 mmx media neon dspr2/;
+$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
+$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
+
+add_proto qw/void vp8_build_intra_predictors_mby_s/, "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride";
+specialize qw/vp8_build_intra_predictors_mby_s sse2 ssse3/;
+#TODO: fix assembly for neon
+
+add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride";
+specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3/;
+
+add_proto qw/void vp8_intra4x4_predict/, "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left";
+specialize qw/vp8_intra4x4_predict media/;
+$vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6;
+
+#
+# Postproc
+#
+if (vpx_config("CONFIG_POSTPROC") eq "yes") {
+ add_proto qw/void vp8_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
+ specialize qw/vp8_mbpost_proc_down mmx sse2/;
+ $vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm;
+
+ add_proto qw/void vp8_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
+ specialize qw/vp8_mbpost_proc_across_ip sse2/;
+ $vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm;
+
+ add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
+ specialize qw/vp8_post_proc_down_and_across_mb_row sse2/;
+
+ add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
+ specialize qw/vp8_plane_add_noise mmx sse2/;
+ $vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
+
+ add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
+ # no asm yet
+
+ add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
+ # no asm yet
+
+ add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
+ # no asm yet
+
+ add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
+ specialize qw/vp8_filter_by_weight16x16 sse2/;
+
+ add_proto qw/void vp8_filter_by_weight8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
+ specialize qw/vp8_filter_by_weight8x8 sse2/;
+
+ add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
+ # no asm yet
+}
+
+#
+# Subpixel
+#
+add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2/;
+$vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6;
+$vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2;
+
+add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2/;
+$vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6;
+$vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2;
+
+add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2/;
+$vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
+$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
+
+add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2/;
+$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
+$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
+
+add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon/;
+$vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6;
+
+add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon/;
+$vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6;
+
+add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict8x4 mmx media neon/;
+$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
+
+add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict4x4 mmx media neon/;
+$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
+
+#
+# Whole-pixel Variance
+#
+add_proto qw/unsigned int vp8_variance4x4/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance4x4 mmx sse2/;
+$vp8_variance4x4_sse2=vp8_variance4x4_wmt;
+
+add_proto qw/unsigned int vp8_variance8x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance8x8 mmx sse2 media neon/;
+$vp8_variance8x8_sse2=vp8_variance8x8_wmt;
+$vp8_variance8x8_media=vp8_variance8x8_armv6;
+
+add_proto qw/unsigned int vp8_variance8x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance8x16 mmx sse2 neon/;
+$vp8_variance8x16_sse2=vp8_variance8x16_wmt;
+
+add_proto qw/unsigned int vp8_variance16x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance16x8 mmx sse2 neon/;
+$vp8_variance16x8_sse2=vp8_variance16x8_wmt;
+
+add_proto qw/unsigned int vp8_variance16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance16x16 mmx sse2 media neon/;
+$vp8_variance16x16_sse2=vp8_variance16x16_wmt;
+$vp8_variance16x16_media=vp8_variance16x16_armv6;
+
+#
+# Sub-pixel Variance
+#
+add_proto qw/unsigned int vp8_sub_pixel_variance4x4/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/;
+$vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon/;
+$vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt;
+$vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/;
+$vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance16x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance16x8 mmx sse2 ssse3/;
+$vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon/;
+$vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt;
+$vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6;
+
+add_proto qw/unsigned int vp8_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon/;
+$vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt;
+$vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6;
+
+add_proto qw/unsigned int vp8_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon/;
+$vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt;
+$vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6;
+
+add_proto qw/unsigned int vp8_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/;
+$vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt;
+$vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
+
+#
+# Single block SAD
+#
+add_proto qw/unsigned int vp8_sad4x4/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad4x4 mmx sse2 neon/;
+$vp8_sad4x4_sse2=vp8_sad4x4_wmt;
+
+add_proto qw/unsigned int vp8_sad8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad8x8 mmx sse2 neon/;
+$vp8_sad8x8_sse2=vp8_sad8x8_wmt;
+
+add_proto qw/unsigned int vp8_sad8x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad8x16 mmx sse2 neon/;
+$vp8_sad8x16_sse2=vp8_sad8x16_wmt;
+
+add_proto qw/unsigned int vp8_sad16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad16x8 mmx sse2 neon/;
+$vp8_sad16x8_sse2=vp8_sad16x8_wmt;
+
+add_proto qw/unsigned int vp8_sad16x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad16x16 mmx sse2 sse3 media neon/;
+$vp8_sad16x16_sse2=vp8_sad16x16_wmt;
+$vp8_sad16x16_media=vp8_sad16x16_armv6;
+
+#
+# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
+#
+add_proto qw/void vp8_sad4x4x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad4x4x3 sse3/;
+
+add_proto qw/void vp8_sad8x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x8x3 sse3/;
+
+add_proto qw/void vp8_sad8x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x16x3 sse3/;
+
+add_proto qw/void vp8_sad16x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x8x3 sse3 ssse3/;
+
+add_proto qw/void vp8_sad16x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x16x3 sse3 ssse3/;
+
+# Note the only difference in the following prototypes is that they return into
+# an array of short
+add_proto qw/void vp8_sad4x4x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad4x4x8 sse4_1/;
+$vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4;
+
+add_proto qw/void vp8_sad8x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad8x8x8 sse4_1/;
+$vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4;
+
+add_proto qw/void vp8_sad8x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad8x16x8 sse4_1/;
+$vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4;
+
+add_proto qw/void vp8_sad16x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad16x8x8 sse4_1/;
+$vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4;
+
+add_proto qw/void vp8_sad16x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad16x16x8 sse4_1/;
+$vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4;
+
+#
+# Multi-block SAD, comparing a reference to N independent blocks
+#
+add_proto qw/void vp8_sad4x4x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad4x4x4d sse3/;
+
+add_proto qw/void vp8_sad8x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x8x4d sse3/;
+
+add_proto qw/void vp8_sad8x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x16x4d sse3/;
+
+add_proto qw/void vp8_sad16x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x8x4d sse3/;
+
+add_proto qw/void vp8_sad16x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x16x4d sse3/;
+
+#
+# Encoder functions below this point.
+#
+if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
+
+#
+# Sum of squares (vector)
+#
+add_proto qw/unsigned int vp8_get_mb_ss/, "const short *";
+specialize qw/vp8_get_mb_ss mmx sse2/;
+
+#
+# SSE (Sum Squared Error)
+#
+add_proto qw/unsigned int vp8_sub_pixel_mse16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/;
+$vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt;
+
+add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_mse16x16 mmx sse2 media neon/;
+$vp8_mse16x16_sse2=vp8_mse16x16_wmt;
+$vp8_mse16x16_media=vp8_mse16x16_armv6;
+
+add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
+specialize qw/vp8_get4x4sse_cs mmx neon/;
+
+#
+# Block copy
+#
+if ($opts{arch} =~ /x86/) {
+ add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n";
+ specialize qw/vp8_copy32xn sse2 sse3/;
+}
+
+#
+# Structured Similarity (SSIM)
+#
+if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+ $opts{arch} eq "x86_64" and $sse2_on_x86_64 = "sse2";
+
+ add_proto qw/void vp8_ssim_parms_8x8/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+ specialize qw/vp8_ssim_parms_8x8/, "$sse2_on_x86_64";
+
+ add_proto qw/void vp8_ssim_parms_16x16/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+ specialize qw/vp8_ssim_parms_16x16/, "$sse2_on_x86_64";
+}
+
+#
+# Forward DCT
+#
+add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";
+specialize qw/vp8_short_fdct4x4 mmx sse2 media neon/;
+$vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6;
+
+add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch";
+specialize qw/vp8_short_fdct8x4 mmx sse2 media neon/;
+$vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6;
+
+add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch";
+specialize qw/vp8_short_walsh4x4 sse2 media neon/;
+$vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6;
+
+#
+# Quantizer
+#
+add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
+specialize qw/vp8_regular_quantize_b sse2/;
+# TODO(johann) Update sse4 implementation and re-enable
+#$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4;
+
+add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
+specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/;
+$vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6;
+
+add_proto qw/void vp8_regular_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
+# no asm yet
+
+add_proto qw/void vp8_fast_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
+specialize qw/vp8_fast_quantize_b_pair neon/;
+
+add_proto qw/void vp8_quantize_mb/, "struct macroblock *";
+specialize qw/vp8_quantize_mb neon/;
+
+add_proto qw/void vp8_quantize_mby/, "struct macroblock *";
+specialize qw/vp8_quantize_mby neon/;
+
+add_proto qw/void vp8_quantize_mbuv/, "struct macroblock *";
+specialize qw/vp8_quantize_mbuv neon/;
+
+#
+# Block subtraction
+#
+add_proto qw/int vp8_block_error/, "short *coeff, short *dqcoeff";
+specialize qw/vp8_block_error mmx sse2/;
+$vp8_block_error_sse2=vp8_block_error_xmm;
+
+add_proto qw/int vp8_mbblock_error/, "struct macroblock *mb, int dc";
+specialize qw/vp8_mbblock_error mmx sse2/;
+$vp8_mbblock_error_sse2=vp8_mbblock_error_xmm;
+
+add_proto qw/int vp8_mbuverror/, "struct macroblock *mb";
+specialize qw/vp8_mbuverror mmx sse2/;
+$vp8_mbuverror_sse2=vp8_mbuverror_xmm;
+
+add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch";
+specialize qw/vp8_subtract_b mmx sse2 media neon/;
+$vp8_subtract_b_media=vp8_subtract_b_armv6;
+
+add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride";
+specialize qw/vp8_subtract_mby mmx sse2 media neon/;
+$vp8_subtract_mby_media=vp8_subtract_mby_armv6;
+
+add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride";
+specialize qw/vp8_subtract_mbuv mmx sse2 media neon/;
+$vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6;
+
+#
+# Motion search
+#
+add_proto qw/int vp8_full_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
+specialize qw/vp8_full_search_sad sse3 sse4_1/;
+$vp8_full_search_sad_sse3=vp8_full_search_sadx3;
+$vp8_full_search_sad_sse4_1=vp8_full_search_sadx8;
+
+add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
+specialize qw/vp8_refining_search_sad sse3/;
+$vp8_refining_search_sad_sse3=vp8_refining_search_sadx4;
+
+add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
+$vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4;
+
+#
+# Alt-ref Noise Reduction (ARNR)
+#
+if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
+ add_proto qw/void vp8_temporal_filter_apply/, "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count";
+ specialize qw/vp8_temporal_filter_apply sse2/;
+}
+
+#
+# Pick Loopfilter
+#
+add_proto qw/void vp8_yv12_copy_partial_frame/, "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
+specialize qw/vp8_yv12_copy_partial_frame neon/;
+
+#
+# Denoiser filter
+#
+if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
+ add_proto qw/int vp8_denoiser_filter/, "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset";
+ specialize qw/vp8_denoiser_filter sse2 neon/;
+}
+
+# End of encoder only functions
+}
+1;
diff --git a/libvpx/vp8/common/rtcd_defs.sh b/libvpx/vp8/common/rtcd_defs.sh
deleted file mode 100644
index 9ebf389..0000000
--- a/libvpx/vp8/common/rtcd_defs.sh
+++ /dev/null
@@ -1,542 +0,0 @@
-vp8_common_forward_decls() {
-cat <<EOF
-/*
- * VP8
- */
-
-struct blockd;
-struct macroblockd;
-struct loop_filter_info;
-
-/* Encoder forward decls */
-struct block;
-struct macroblock;
-struct variance_vtable;
-union int_mv;
-struct yv12_buffer_config;
-EOF
-}
-forward_decls vp8_common_forward_decls
-
-#
-# system state
-#
-prototype void vp8_clear_system_state ""
-specialize vp8_clear_system_state mmx
-vp8_clear_system_state_mmx=vpx_reset_mmx_state
-
-#
-# Dequant
-#
-prototype void vp8_dequantize_b "struct blockd*, short *dqc"
-specialize vp8_dequantize_b mmx media neon
-vp8_dequantize_b_media=vp8_dequantize_b_v6
-
-prototype void vp8_dequant_idct_add "short *input, short *dq, unsigned char *output, int stride"
-specialize vp8_dequant_idct_add mmx media neon dspr2
-vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6
-vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2
-
-prototype void vp8_dequant_idct_add_y_block "short *q, short *dq, unsigned char *dst, int stride, char *eobs"
-specialize vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2
-vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6
-vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2
-
-prototype void vp8_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"
-specialize vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2
-vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6
-vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2
-
-#
-# Loopfilter
-#
-prototype void vp8_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_mbv mmx sse2 media neon dspr2
-vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6
-vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2
-
-prototype void vp8_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bv mmx sse2 media neon dspr2
-vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6
-vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2
-
-prototype void vp8_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_mbh mmx sse2 media neon dspr2
-vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6
-vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2
-
-prototype void vp8_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bh mmx sse2 media neon dspr2
-vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6
-vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2
-
-
-prototype void vp8_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_mbv mmx sse2 media neon
-vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c
-vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx
-vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2
-vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6
-vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon
-
-prototype void vp8_loop_filter_simple_mbh "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_mbh mmx sse2 media neon
-vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c
-vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx
-vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2
-vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6
-vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon
-
-prototype void vp8_loop_filter_simple_bv "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_bv mmx sse2 media neon
-vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c
-vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx
-vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2
-vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6
-vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon
-
-prototype void vp8_loop_filter_simple_bh "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_bh mmx sse2 media neon
-vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c
-vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx
-vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2
-vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6
-vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon
-
-#
-# IDCT
-#
-#idct16
-prototype void vp8_short_idct4x4llm "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"
-specialize vp8_short_idct4x4llm mmx media neon dspr2
-vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual
-vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2
-
-#iwalsh1
-prototype void vp8_short_inv_walsh4x4_1 "short *input, short *output"
-specialize vp8_short_inv_walsh4x4_1 dspr2
-vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2
-# no asm yet
-
-#iwalsh16
-prototype void vp8_short_inv_walsh4x4 "short *input, short *output"
-specialize vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2
-vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6
-vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2
-
-#idct1_scalar_add
-prototype void vp8_dc_only_idct_add "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"
-specialize vp8_dc_only_idct_add mmx media neon dspr2
-vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6
-vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2
-
-#
-# RECON
-#
-prototype void vp8_copy_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem16x16 mmx sse2 media neon dspr2
-vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6
-vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2
-
-prototype void vp8_copy_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem8x8 mmx media neon dspr2
-vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6
-vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2
-
-prototype void vp8_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem8x4 mmx media neon dspr2
-vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6
-vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2
-
-prototype void vp8_build_intra_predictors_mby_s "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride"
-specialize vp8_build_intra_predictors_mby_s sse2 ssse3
-#TODO: fix assembly for neon
-
-prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"
-specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
-
-prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
-specialize vp8_intra4x4_predict media
-vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6
-
-#
-# Postproc
-#
-if [ "$CONFIG_POSTPROC" = "yes" ]; then
- prototype void vp8_mbpost_proc_down "unsigned char *dst, int pitch, int rows, int cols,int flimit"
- specialize vp8_mbpost_proc_down mmx sse2
- vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm
-
- prototype void vp8_mbpost_proc_across_ip "unsigned char *dst, int pitch, int rows, int cols,int flimit"
- specialize vp8_mbpost_proc_across_ip sse2
- vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm
-
- prototype void vp8_post_proc_down_and_across_mb_row "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size"
- specialize vp8_post_proc_down_and_across_mb_row sse2
-
- prototype void vp8_plane_add_noise "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch"
- specialize vp8_plane_add_noise mmx sse2
- vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt
-
- prototype void vp8_blend_mb_inner "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
- # no asm yet
-
- prototype void vp8_blend_mb_outer "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
- # no asm yet
-
- prototype void vp8_blend_b "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
- # no asm yet
-
- prototype void vp8_filter_by_weight16x16 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
- specialize vp8_filter_by_weight16x16 sse2
-
- prototype void vp8_filter_by_weight8x8 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
- specialize vp8_filter_by_weight8x8 sse2
-
- prototype void vp8_filter_by_weight4x4 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
- # no asm yet
-fi
-
-#
-# Subpixel
-#
-prototype void vp8_sixtap_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2
-vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6
-vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2
-
-prototype void vp8_sixtap_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2
-vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6
-vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2
-
-prototype void vp8_sixtap_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2
-vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6
-vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2
-
-prototype void vp8_sixtap_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2
-vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6
-vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2
-
-prototype void vp8_bilinear_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon
-vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6
-
-prototype void vp8_bilinear_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon
-vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6
-
-prototype void vp8_bilinear_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict8x4 mmx media neon
-vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6
-
-prototype void vp8_bilinear_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict4x4 mmx media neon
-vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6
-
-#
-# Whole-pixel Variance
-#
-prototype unsigned int vp8_variance4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance4x4 mmx sse2
-vp8_variance4x4_sse2=vp8_variance4x4_wmt
-
-prototype unsigned int vp8_variance8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance8x8 mmx sse2 media neon
-vp8_variance8x8_sse2=vp8_variance8x8_wmt
-vp8_variance8x8_media=vp8_variance8x8_armv6
-
-prototype unsigned int vp8_variance8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance8x16 mmx sse2 neon
-vp8_variance8x16_sse2=vp8_variance8x16_wmt
-
-prototype unsigned int vp8_variance16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance16x8 mmx sse2 neon
-vp8_variance16x8_sse2=vp8_variance16x8_wmt
-
-prototype unsigned int vp8_variance16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance16x16 mmx sse2 media neon
-vp8_variance16x16_sse2=vp8_variance16x16_wmt
-vp8_variance16x16_media=vp8_variance16x16_armv6
-
-#
-# Sub-pixel Variance
-#
-prototype unsigned int vp8_sub_pixel_variance4x4 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance4x4 mmx sse2
-vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt
-
-prototype unsigned int vp8_sub_pixel_variance8x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance8x8 mmx sse2 media neon
-vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt
-vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6
-
-prototype unsigned int vp8_sub_pixel_variance8x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance8x16 mmx sse2
-vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt
-
-prototype unsigned int vp8_sub_pixel_variance16x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance16x8 mmx sse2 ssse3
-vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt
-
-prototype unsigned int vp8_sub_pixel_variance16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon
-vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt
-vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6
-
-prototype unsigned int vp8_variance_halfpixvar16x16_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance_halfpixvar16x16_h mmx sse2 media neon
-vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt
-vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6
-
-prototype unsigned int vp8_variance_halfpixvar16x16_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance_halfpixvar16x16_v mmx sse2 media neon
-vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt
-vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6
-
-prototype unsigned int vp8_variance_halfpixvar16x16_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance_halfpixvar16x16_hv mmx sse2 media neon
-vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt
-vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6
-
-#
-# Single block SAD
-#
-prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad4x4 mmx sse2 neon
-vp8_sad4x4_sse2=vp8_sad4x4_wmt
-
-prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad8x8 mmx sse2 neon
-vp8_sad8x8_sse2=vp8_sad8x8_wmt
-
-prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad8x16 mmx sse2 neon
-vp8_sad8x16_sse2=vp8_sad8x16_wmt
-
-prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad16x8 mmx sse2 neon
-vp8_sad16x8_sse2=vp8_sad16x8_wmt
-
-prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad16x16 mmx sse2 sse3 media neon
-vp8_sad16x16_sse2=vp8_sad16x16_wmt
-vp8_sad16x16_media=vp8_sad16x16_armv6
-
-#
-# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
-#
-prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp8_sad4x4x3 sse3
-
-prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x8x3 sse3
-
-prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x16x3 sse3
-
-prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x8x3 sse3 ssse3
-
-prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x16x3 sse3 ssse3
-
-# Note the only difference in the following prototypes is that they return into
-# an array of short
-prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
-specialize vp8_sad4x4x8 sse4_1
-vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4
-
-prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
-specialize vp8_sad8x8x8 sse4_1
-vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4
-
-prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
-specialize vp8_sad8x16x8 sse4_1
-vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4
-
-prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
-specialize vp8_sad16x8x8 sse4_1
-vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4
-
-prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
-specialize vp8_sad16x16x8 sse4_1
-vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4
-
-#
-# Multi-block SAD, comparing a reference to N independent blocks
-#
-prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp8_sad4x4x4d sse3
-
-prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x8x4d sse3
-
-prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x16x4d sse3
-
-prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x8x4d sse3
-
-prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x16x4d sse3
-
-#
-# Encoder functions below this point.
-#
-if [ "$CONFIG_VP8_ENCODER" = "yes" ]; then
-
-#
-# Sum of squares (vector)
-#
-prototype unsigned int vp8_get_mb_ss "const short *"
-specialize vp8_get_mb_ss mmx sse2
-
-#
-# SSE (Sum Squared Error)
-#
-prototype unsigned int vp8_sub_pixel_mse16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_mse16x16 mmx sse2
-vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt
-
-prototype unsigned int vp8_mse16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_mse16x16 mmx sse2 media neon
-vp8_mse16x16_sse2=vp8_mse16x16_wmt
-vp8_mse16x16_media=vp8_mse16x16_armv6
-
-prototype unsigned int vp8_get4x4sse_cs "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride"
-specialize vp8_get4x4sse_cs mmx neon
-
-#
-# Block copy
-#
-case $arch in
- x86*)
- prototype void vp8_copy32xn "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n"
- specialize vp8_copy32xn sse2 sse3
- ;;
-esac
-
-#
-# Structured Similarity (SSIM)
-#
-if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
- [ $arch = "x86_64" ] && sse2_on_x86_64=sse2
-
- prototype void vp8_ssim_parms_8x8 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
- specialize vp8_ssim_parms_8x8 $sse2_on_x86_64
-
- prototype void vp8_ssim_parms_16x16 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
- specialize vp8_ssim_parms_16x16 $sse2_on_x86_64
-fi
-
-#
-# Forward DCT
-#
-prototype void vp8_short_fdct4x4 "short *input, short *output, int pitch"
-specialize vp8_short_fdct4x4 mmx sse2 media neon
-vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6
-
-prototype void vp8_short_fdct8x4 "short *input, short *output, int pitch"
-specialize vp8_short_fdct8x4 mmx sse2 media neon
-vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6
-
-prototype void vp8_short_walsh4x4 "short *input, short *output, int pitch"
-specialize vp8_short_walsh4x4 sse2 media neon
-vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6
-
-#
-# Quantizer
-#
-prototype void vp8_regular_quantize_b "struct block *, struct blockd *"
-specialize vp8_regular_quantize_b sse2 #sse4_1
-# TODO(johann) Update sse4 implementation and re-enable
-#vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4
-
-prototype void vp8_fast_quantize_b "struct block *, struct blockd *"
-specialize vp8_fast_quantize_b sse2 ssse3 media neon
-vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6
-
-prototype void vp8_regular_quantize_b_pair "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"
-# no asm yet
-
-prototype void vp8_fast_quantize_b_pair "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"
-specialize vp8_fast_quantize_b_pair neon
-
-prototype void vp8_quantize_mb "struct macroblock *"
-specialize vp8_quantize_mb neon
-
-prototype void vp8_quantize_mby "struct macroblock *"
-specialize vp8_quantize_mby neon
-
-prototype void vp8_quantize_mbuv "struct macroblock *"
-specialize vp8_quantize_mbuv neon
-
-#
-# Block subtraction
-#
-prototype int vp8_block_error "short *coeff, short *dqcoeff"
-specialize vp8_block_error mmx sse2
-vp8_block_error_sse2=vp8_block_error_xmm
-
-prototype int vp8_mbblock_error "struct macroblock *mb, int dc"
-specialize vp8_mbblock_error mmx sse2
-vp8_mbblock_error_sse2=vp8_mbblock_error_xmm
-
-prototype int vp8_mbuverror "struct macroblock *mb"
-specialize vp8_mbuverror mmx sse2
-vp8_mbuverror_sse2=vp8_mbuverror_xmm
-
-prototype void vp8_subtract_b "struct block *be, struct blockd *bd, int pitch"
-specialize vp8_subtract_b mmx sse2 media neon
-vp8_subtract_b_media=vp8_subtract_b_armv6
-
-prototype void vp8_subtract_mby "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride"
-specialize vp8_subtract_mby mmx sse2 media neon
-vp8_subtract_mby_media=vp8_subtract_mby_armv6
-
-prototype void vp8_subtract_mbuv "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride"
-specialize vp8_subtract_mbuv mmx sse2 media neon
-vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6
-
-#
-# Motion search
-#
-prototype int vp8_full_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
-specialize vp8_full_search_sad sse3 sse4_1
-vp8_full_search_sad_sse3=vp8_full_search_sadx3
-vp8_full_search_sad_sse4_1=vp8_full_search_sadx8
-
-prototype int vp8_refining_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
-specialize vp8_refining_search_sad sse3
-vp8_refining_search_sad_sse3=vp8_refining_search_sadx4
-
-prototype int vp8_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
-vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4
-
-#
-# Alt-ref Noise Reduction (ARNR)
-#
-if [ "$CONFIG_REALTIME_ONLY" != "yes" ]; then
- prototype void vp8_temporal_filter_apply "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count"
- specialize vp8_temporal_filter_apply sse2
-fi
-
-#
-# Pick Loopfilter
-#
-prototype void vp8_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vp8_yv12_copy_partial_frame neon
-
-#
-# Denoiser filter
-#
-if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then
- prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
- specialize vp8_denoiser_filter sse2
-fi
-
-# End of encoder only functions
-fi
diff --git a/libvpx/vp8/common/setupintrarecon.h b/libvpx/vp8/common/setupintrarecon.h
index e515c3a..608f4a9 100644
--- a/libvpx/vp8/common/setupintrarecon.h
+++ b/libvpx/vp8/common/setupintrarecon.h
@@ -8,8 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP8_COMMON_SETUPINTRARECON_H_
+#define VP8_COMMON_SETUPINTRARECON_H_
#include "vpx_scale/yv12config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf);
extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf);
@@ -31,3 +37,9 @@ void setup_intra_recon_left(unsigned char *y_buffer,
for (i = 0; i < 8; i++)
v_buffer[uv_stride *i] = (unsigned char) 129;
}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_COMMON_SETUPINTRARECON_H_
diff --git a/libvpx/vp8/common/swapyv12buffer.h b/libvpx/vp8/common/swapyv12buffer.h
index a6473ed..1d66cd3 100644
--- a/libvpx/vp8/common/swapyv12buffer.h
+++ b/libvpx/vp8/common/swapyv12buffer.h
@@ -9,11 +9,19 @@
*/
-#ifndef SWAPYV12_BUFFER_H
-#define SWAPYV12_BUFFER_H
+#ifndef VP8_COMMON_SWAPYV12BUFFER_H_
+#define VP8_COMMON_SWAPYV12BUFFER_H_
#include "vpx_scale/yv12config.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_SWAPYV12BUFFER_H_
diff --git a/libvpx/vp8/common/systemdependent.h b/libvpx/vp8/common/systemdependent.h
index e6b0456..3d44e37 100644
--- a/libvpx/vp8/common/systemdependent.h
+++ b/libvpx/vp8/common/systemdependent.h
@@ -8,8 +8,20 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP8_COMMON_SYSTEMDEPENDENT_H_
+#define VP8_COMMON_SYSTEMDEPENDENT_H_
#include "vpx_config.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct VP8Common;
void vp8_machine_specific_config(struct VP8Common *);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_COMMON_SYSTEMDEPENDENT_H_
diff --git a/libvpx/vp8/common/threading.h b/libvpx/vp8/common/threading.h
index ed9e3e6..01c82db 100644
--- a/libvpx/vp8/common/threading.h
+++ b/libvpx/vp8/common/threading.h
@@ -9,8 +9,12 @@
*/
-#ifndef _PTHREAD_EMULATION
-#define _PTHREAD_EMULATION
+#ifndef VP8_COMMON_THREADING_H_
+#define VP8_COMMON_THREADING_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
@@ -183,4 +187,8 @@ static inline int sem_destroy(sem_t * sem)
#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_THREADING_H_
diff --git a/libvpx/vp8/common/treecoder.h b/libvpx/vp8/common/treecoder.h
index ebf51c5..d22b7c5 100644
--- a/libvpx/vp8/common/treecoder.h
+++ b/libvpx/vp8/common/treecoder.h
@@ -9,8 +9,12 @@
*/
-#ifndef __INC_TREECODER_H
-#define __INC_TREECODER_H
+#ifndef VP8_COMMON_TREECODER_H_
+#define VP8_COMMON_TREECODER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
typedef unsigned char vp8bc_index_t; /* probability index */
@@ -87,4 +91,8 @@ void vp8bc_tree_probs_from_distribution(
);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_TREECODER_H_
diff --git a/libvpx/vp8/common/variance.h b/libvpx/vp8/common/variance.h
index 01193b8..89a32a7 100644
--- a/libvpx/vp8/common/variance.h
+++ b/libvpx/vp8/common/variance.h
@@ -9,11 +9,15 @@
*/
-#ifndef VARIANCE_H
-#define VARIANCE_H
+#ifndef VP8_COMMON_VARIANCE_H_
+#define VP8_COMMON_VARIANCE_H_
#include "vpx_config.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef unsigned int(*vp8_sad_fn_t)(
const unsigned char *src_ptr,
int source_stride,
@@ -112,4 +116,8 @@ typedef struct variance_vtable
#endif
} vp8_variance_fn_ptr_t;
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_COMMON_VARIANCE_H_
diff --git a/libvpx/vp8/common/vp8_entropymodedata.h b/libvpx/vp8/common/vp8_entropymodedata.h
index 13e9a92..c4aed49 100644
--- a/libvpx/vp8/common/vp8_entropymodedata.h
+++ b/libvpx/vp8/common/vp8_entropymodedata.h
@@ -8,6 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP8_COMMON_VP8_ENTROPYMODEDATA_H_
+#define VP8_COMMON_VP8_ENTROPYMODEDATA_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
/*Generated file, included by entropymode.c*/
@@ -240,3 +246,9 @@ const vp8_prob vp8_kf_bmode_prob
{ 112, 19, 12, 61, 195, 128, 48, 4, 24 }
}
};
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_COMMON_VP8_ENTROPYMODEDATA_H_
diff --git a/libvpx/vp8/common/x86/filter_x86.h b/libvpx/vp8/common/x86/filter_x86.h
index cfadaee..d282841 100644
--- a/libvpx/vp8/common/x86/filter_x86.h
+++ b/libvpx/vp8/common/x86/filter_x86.h
@@ -8,11 +8,15 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef FILTER_X86_H
-#define FILTER_X86_H
+#ifndef VP8_COMMON_X86_FILTER_X86_H_
+#define VP8_COMMON_X86_FILTER_X86_H_
#include "vpx_ports/mem.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with
* duplicated values */
@@ -22,4 +26,8 @@ extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]);
/* duplicated 8x */
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]);
-#endif /* FILTER_X86_H */
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_COMMON_X86_FILTER_X86_H_
diff --git a/libvpx/vp8/common/x86/loopfilter_mmx.asm b/libvpx/vp8/common/x86/loopfilter_mmx.asm
index f388d24..88a07b9 100644
--- a/libvpx/vp8/common/x86/loopfilter_mmx.asm
+++ b/libvpx/vp8/common/x86/loopfilter_mmx.asm
@@ -527,7 +527,7 @@ sym(vp8_loop_filter_vertical_edge_mmx):
pxor mm7, [GLOBAL(t80)] ; unoffset
; mm7 = q1
- ; tranpose and write back
+ ; transpose and write back
; mm1 = 72 62 52 42 32 22 12 02
; mm6 = 73 63 53 43 33 23 13 03
; mm3 = 74 64 54 44 34 24 14 04
@@ -1289,7 +1289,7 @@ sym(vp8_mbloop_filter_vertical_edge_mmx):
pxor mm6, [GLOBAL(t80)] ; mm6 = 71 61 51 41 31 21 11 01
pxor mm3, [GLOBAL(t80)] ; mm3 = 76 66 56 46 36 26 15 06
- ; tranpose and write back
+ ; transpose and write back
movq mm0, [rdx] ; mm0 = 70 60 50 40 30 20 10 00
movq mm1, mm0 ; mm0 = 70 60 50 40 30 20 10 00
diff --git a/libvpx/vp8/common/x86/loopfilter_sse2.asm b/libvpx/vp8/common/x86/loopfilter_sse2.asm
index a66753b..1913abc 100644
--- a/libvpx/vp8/common/x86/loopfilter_sse2.asm
+++ b/libvpx/vp8/common/x86/loopfilter_sse2.asm
@@ -958,7 +958,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
; start work on filters
B_FILTER 2
- ; tranpose and write back - only work on q1, q0, p0, p1
+ ; transpose and write back - only work on q1, q0, p0, p1
BV_TRANSPOSE
; store 16-line result
@@ -1023,7 +1023,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
; start work on filters
B_FILTER 2
- ; tranpose and write back - only work on q1, q0, p0, p1
+ ; transpose and write back - only work on q1, q0, p0, p1
BV_TRANSPOSE
lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
diff --git a/libvpx/vp8/decoder/dboolhuff.h b/libvpx/vp8/decoder/dboolhuff.h
index 4c0ca1c..36af7ee 100644
--- a/libvpx/vp8/decoder/dboolhuff.h
+++ b/libvpx/vp8/decoder/dboolhuff.h
@@ -9,8 +9,8 @@
*/
-#ifndef DBOOLHUFF_H_
-#define DBOOLHUFF_H_
+#ifndef VP8_DECODER_DBOOLHUFF_H_
+#define VP8_DECODER_DBOOLHUFF_H_
#include <stddef.h>
#include <limits.h>
@@ -19,6 +19,10 @@
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef size_t VP8_BD_VALUE;
#define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT)
@@ -135,4 +139,8 @@ static int vp8dx_bool_error(BOOL_DECODER *br)
return 0;
}
-#endif // DBOOLHUFF_H_
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_DECODER_DBOOLHUFF_H_
diff --git a/libvpx/vp8/decoder/decodframe.c b/libvpx/vp8/decoder/decodeframe.c
index 16da78a..bfde599 100644
--- a/libvpx/vp8/decoder/decodframe.c
+++ b/libvpx/vp8/decoder/decodeframe.c
@@ -680,7 +680,6 @@ static void decode_mb_rows(VP8D_COMP *pbi)
vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1,
recon_y_stride, recon_uv_stride,
lf_dst[0], lf_dst[1], lf_dst[2]);
-
if(mb_row > 1)
{
yv12_extend_frame_left_right_c(yv12_fb_new,
@@ -691,10 +690,6 @@ static void decode_mb_rows(VP8D_COMP *pbi)
eb_dst[0] += recon_y_stride * 16;
eb_dst[1] += recon_uv_stride * 8;
eb_dst[2] += recon_uv_stride * 8;
-
- if(mb_row == 2)
- yv12_extend_frame_top_c(yv12_fb_new);
-
}
lf_dst[0] += recon_y_stride * 16;
@@ -713,13 +708,9 @@ static void decode_mb_rows(VP8D_COMP *pbi)
eb_dst[0],
eb_dst[1],
eb_dst[2]);
-
eb_dst[0] += recon_y_stride * 16;
eb_dst[1] += recon_uv_stride * 8;
eb_dst[2] += recon_uv_stride * 8;
-
- if(mb_row == 1)
- yv12_extend_frame_top_c(yv12_fb_new);
}
}
}
@@ -747,7 +738,7 @@ static void decode_mb_rows(VP8D_COMP *pbi)
eb_dst[0],
eb_dst[1],
eb_dst[2]);
-
+ yv12_extend_frame_top_c(yv12_fb_new);
yv12_extend_frame_bottom_c(yv12_fb_new);
}
diff --git a/libvpx/vp8/decoder/decodemv.h b/libvpx/vp8/decoder/decodemv.h
index 05a33d2..f33b073 100644
--- a/libvpx/vp8/decoder/decodemv.h
+++ b/libvpx/vp8/decoder/decodemv.h
@@ -8,11 +8,19 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef DECODEMV_H_
-#define DECODEMV_H_
+#ifndef VP8_DECODER_DECODEMV_H_
+#define VP8_DECODER_DECODEMV_H_
#include "onyxd_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp8_decode_mode_mvs(VP8D_COMP *);
-#endif // DECODEMV_H_
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_DECODER_DECODEMV_H_
diff --git a/libvpx/vp8/decoder/decoderthreading.h b/libvpx/vp8/decoder/decoderthreading.h
index bc716e4..c563cf6 100644
--- a/libvpx/vp8/decoder/decoderthreading.h
+++ b/libvpx/vp8/decoder/decoderthreading.h
@@ -8,8 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef DECODERTHREADING_H_
-#define DECODERTHREADING_H_
+#ifndef VP8_DECODER_DECODERTHREADING_H_
+#define VP8_DECODER_DECODERTHREADING_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
#if CONFIG_MULTITHREAD
void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd);
@@ -19,4 +23,8 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);
void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows);
#endif
-#endif // DECODERTHREADING_H_
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_DECODER_DECODERTHREADING_H_
diff --git a/libvpx/vp8/decoder/detokenize.h b/libvpx/vp8/decoder/detokenize.h
index f2130b3..f0b1254 100644
--- a/libvpx/vp8/decoder/detokenize.h
+++ b/libvpx/vp8/decoder/detokenize.h
@@ -8,12 +8,20 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef DETOKENIZE_H_
-#define DETOKENIZE_H_
+#ifndef VP8_DECODER_DETOKENIZE_H_
+#define VP8_DECODER_DETOKENIZE_H_
#include "onyxd_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp8_reset_mb_tokens_context(MACROBLOCKD *x);
int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
-#endif // DETOKENIZE_H
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_DECODER_DETOKENIZE_H_
diff --git a/libvpx/vp8/decoder/ec_types.h b/libvpx/vp8/decoder/ec_types.h
index b24bfd9..3af5ca8 100644
--- a/libvpx/vp8/decoder/ec_types.h
+++ b/libvpx/vp8/decoder/ec_types.h
@@ -8,8 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP8_DEC_EC_TYPES_H
-#define VP8_DEC_EC_TYPES_H
+#ifndef VP8_DECODER_EC_TYPES_H_
+#define VP8_DECODER_EC_TYPES_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
#define MAX_OVERLAPS 16
@@ -47,4 +51,8 @@ typedef struct
MV_REFERENCE_FRAME ref_frame;
} EC_BLOCK;
-#endif // VP8_DEC_EC_TYPES_H
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_DECODER_EC_TYPES_H_
diff --git a/libvpx/vp8/decoder/error_concealment.h b/libvpx/vp8/decoder/error_concealment.h
index fb96b36..9a1e024 100644
--- a/libvpx/vp8/decoder/error_concealment.h
+++ b/libvpx/vp8/decoder/error_concealment.h
@@ -9,12 +9,16 @@
*/
-#ifndef ERROR_CONCEALMENT_H_
-#define ERROR_CONCEALMENT_H_
+#ifndef VP8_DECODER_ERROR_CONCEALMENT_H_
+#define VP8_DECODER_ERROR_CONCEALMENT_H_
#include "onyxd_int.h"
#include "ec_types.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* Allocate memory for the overlap lists */
int vp8_alloc_overlap_lists(VP8D_COMP *pbi);
@@ -38,4 +42,8 @@ void vp8_interpolate_motion(MACROBLOCKD *mb,
*/
void vp8_conceal_corrupt_mb(MACROBLOCKD *xd);
-#endif // ERROR_CONCEALMENT_H_
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_DECODER_ERROR_CONCEALMENT_H_
diff --git a/libvpx/vp8/decoder/onyxd_int.h b/libvpx/vp8/decoder/onyxd_int.h
index 54a98f7..8ef4894 100644
--- a/libvpx/vp8/decoder/onyxd_int.h
+++ b/libvpx/vp8/decoder/onyxd_int.h
@@ -9,8 +9,8 @@
*/
-#ifndef ONYXD_INT_H_
-#define ONYXD_INT_H_
+#ifndef VP8_DECODER_ONYXD_INT_H_
+#define VP8_DECODER_ONYXD_INT_H_
#include "vpx_config.h"
#include "vp8/common/onyxd.h"
@@ -22,6 +22,10 @@
#include "ec_types.h"
#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef struct
{
int ithread;
@@ -148,4 +152,8 @@ int vp8_remove_decoder_instances(struct frame_buffers *fb);
} while(0)
#endif
-#endif // ONYXD_INT_H_
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_DECODER_ONYXD_INT_H_
diff --git a/libvpx/vp8/decoder/treereader.h b/libvpx/vp8/decoder/treereader.h
index 9393bb4..35ee696 100644
--- a/libvpx/vp8/decoder/treereader.h
+++ b/libvpx/vp8/decoder/treereader.h
@@ -9,12 +9,16 @@
*/
-#ifndef TREEREADER_H_
-#define TREEREADER_H_
+#ifndef VP8_DECODER_TREEREADER_H_
+#define VP8_DECODER_TREEREADER_H_
#include "vp8/common/treecoder.h"
#include "dboolhuff.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef BOOL_DECODER vp8_reader;
#define vp8_read vp8dx_decode_bool
@@ -37,4 +41,8 @@ static int vp8_treed_read(
return -i;
}
-#endif // TREEREADER_H_
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_DECODER_TREEREADER_H_
diff --git a/libvpx/vp8/encoder/arm/neon/denoising_neon.c b/libvpx/vp8/encoder/arm/neon/denoising_neon.c
new file mode 100644
index 0000000..3f85397
--- /dev/null
+++ b/libvpx/vp8/encoder/arm/neon/denoising_neon.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "vp8/encoder/denoising.h"
+#include "vpx_mem/vpx_mem.h"
+#include "./vp8_rtcd.h"
+
+/*
+ * The filter function was modified to reduce the computational complexity.
+ *
+ * Step 1:
+ * Instead of applying tap coefficients for each pixel, we calculated the
+ * pixel adjustments vs. pixel diff value ahead of time.
+ * adjustment = filtered_value - current_raw
+ * = (filter_coefficient * diff + 128) >> 8
+ * where
+ * filter_coefficient = (255 << 8) / (256 + ((abs_diff * 330) >> 3));
+ * filter_coefficient += filter_coefficient /
+ * (3 + motion_magnitude_adjustment);
+ * filter_coefficient is clamped to 0 ~ 255.
+ *
+ * Step 2:
+ * The adjustment vs. diff curve becomes flat very quick when diff increases.
+ * This allowed us to use only several levels to approximate the curve without
+ * changing the filtering algorithm too much.
+ * The adjustments were further corrected by checking the motion magnitude.
+ * The levels used are:
+ * diff level adjustment w/o adjustment w/
+ * motion correction motion correction
+ * [-255, -16] 3 -6 -7
+ * [-15, -8] 2 -4 -5
+ * [-7, -4] 1 -3 -4
+ * [-3, 3] 0 diff diff
+ * [4, 7] 1 3 4
+ * [8, 15] 2 4 5
+ * [16, 255] 3 6 7
+ */
+
+int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg,
+ YV12_BUFFER_CONFIG *running_avg,
+ MACROBLOCK *signal, unsigned int motion_magnitude,
+ int y_offset, int uv_offset) {
+ /* If motion_magnitude is small, making the denoiser more aggressive by
+ * increasing the adjustment for each level, level1 adjustment is
+ * increased, the deltas stay the same.
+ */
+ const uint8x16_t v_level1_adjustment = vdupq_n_u8(
+ (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 : 3);
+ const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1);
+ const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2);
+ const uint8x16_t v_level1_threshold = vdupq_n_u8(4);
+ const uint8x16_t v_level2_threshold = vdupq_n_u8(8);
+ const uint8x16_t v_level3_threshold = vdupq_n_u8(16);
+
+ /* Local variables for array pointers and strides. */
+ unsigned char *sig = signal->thismb;
+ int sig_stride = 16;
+ unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
+ int mc_running_avg_y_stride = mc_running_avg->y_stride;
+ unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
+ int running_avg_y_stride = running_avg->y_stride;
+
+ /* Go over lines. */
+ int i;
+ int sum_diff = 0;
+ for (i = 0; i < 16; ++i) {
+ int8x16_t v_sum_diff = vdupq_n_s8(0);
+ uint8x16_t v_running_avg_y;
+
+ /* Load inputs. */
+ const uint8x16_t v_sig = vld1q_u8(sig);
+ const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y);
+
+ /* Calculate absolute difference and sign masks. */
+ const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y);
+ const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y);
+ const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y);
+
+ /* Figure out which level that put us in. */
+ const uint8x16_t v_level1_mask = vcleq_u8(v_level1_threshold,
+ v_abs_diff);
+ const uint8x16_t v_level2_mask = vcleq_u8(v_level2_threshold,
+ v_abs_diff);
+ const uint8x16_t v_level3_mask = vcleq_u8(v_level3_threshold,
+ v_abs_diff);
+
+ /* Calculate absolute adjustments for level 1, 2 and 3. */
+ const uint8x16_t v_level2_adjustment = vandq_u8(v_level2_mask,
+ v_delta_level_1_and_2);
+ const uint8x16_t v_level3_adjustment = vandq_u8(v_level3_mask,
+ v_delta_level_2_and_3);
+ const uint8x16_t v_level1and2_adjustment = vaddq_u8(v_level1_adjustment,
+ v_level2_adjustment);
+ const uint8x16_t v_level1and2and3_adjustment = vaddq_u8(
+ v_level1and2_adjustment, v_level3_adjustment);
+
+ /* Figure adjustment absolute value by selecting between the absolute
+ * difference if in level0 or the value for level 1, 2 and 3.
+ */
+ const uint8x16_t v_abs_adjustment = vbslq_u8(v_level1_mask,
+ v_level1and2and3_adjustment, v_abs_diff);
+
+ /* Calculate positive and negative adjustments. Apply them to the signal
+ * and accumulate them. Adjustments are less than eight and the maximum
+ * sum of them (7 * 16) can fit in a signed char.
+ */
+ const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask,
+ v_abs_adjustment);
+ const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask,
+ v_abs_adjustment);
+ v_running_avg_y = vqaddq_u8(v_sig, v_pos_adjustment);
+ v_running_avg_y = vqsubq_u8(v_running_avg_y, v_neg_adjustment);
+ v_sum_diff = vqaddq_s8(v_sum_diff,
+ vreinterpretq_s8_u8(v_pos_adjustment));
+ v_sum_diff = vqsubq_s8(v_sum_diff,
+ vreinterpretq_s8_u8(v_neg_adjustment));
+
+ /* Store results. */
+ vst1q_u8(running_avg_y, v_running_avg_y);
+
+ /* Sum all the accumulators to have the sum of all pixel differences
+ * for this macroblock.
+ */
+ {
+ int s0 = vgetq_lane_s8(v_sum_diff, 0) +
+ vgetq_lane_s8(v_sum_diff, 1) +
+ vgetq_lane_s8(v_sum_diff, 2) +
+ vgetq_lane_s8(v_sum_diff, 3);
+ int s1 = vgetq_lane_s8(v_sum_diff, 4) +
+ vgetq_lane_s8(v_sum_diff, 5) +
+ vgetq_lane_s8(v_sum_diff, 6) +
+ vgetq_lane_s8(v_sum_diff, 7);
+ int s2 = vgetq_lane_s8(v_sum_diff, 8) +
+ vgetq_lane_s8(v_sum_diff, 9) +
+ vgetq_lane_s8(v_sum_diff, 10) +
+ vgetq_lane_s8(v_sum_diff, 11);
+ int s3 = vgetq_lane_s8(v_sum_diff, 12) +
+ vgetq_lane_s8(v_sum_diff, 13) +
+ vgetq_lane_s8(v_sum_diff, 14) +
+ vgetq_lane_s8(v_sum_diff, 15);
+ sum_diff += s0 + s1+ s2 + s3;
+ }
+
+ /* Update pointers for next iteration. */
+ sig += sig_stride;
+ mc_running_avg_y += mc_running_avg_y_stride;
+ running_avg_y += running_avg_y_stride;
+ }
+
+ /* Too much adjustments => copy block. */
+ if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
+ return COPY_BLOCK;
+
+ /* Tell above level that block was filtered. */
+ vp8_copy_mem16x16(running_avg->y_buffer + y_offset, running_avg_y_stride,
+ signal->thismb, sig_stride);
+ return FILTER_BLOCK;
+}
diff --git a/libvpx/vp8/encoder/bitstream.h b/libvpx/vp8/encoder/bitstream.h
index 455a94f..eef2d79 100644
--- a/libvpx/vp8/encoder/bitstream.h
+++ b/libvpx/vp8/encoder/bitstream.h
@@ -9,8 +9,12 @@
*/
-#ifndef __INC_BITSTREAM_H
-#define __INC_BITSTREAM_H
+#ifndef VP8_ENCODER_BITSTREAM_H_
+#define VP8_ENCODER_BITSTREAM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
#if HAVE_EDSP
void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
@@ -43,4 +47,8 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount);
# define pack_mb_row_tokens(a,b) pack_mb_row_tokens_c(a,b)
#endif
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_BITSTREAM_H_
diff --git a/libvpx/vp8/encoder/block.h b/libvpx/vp8/encoder/block.h
index cf74c7a..dd733e5 100644
--- a/libvpx/vp8/encoder/block.h
+++ b/libvpx/vp8/encoder/block.h
@@ -9,8 +9,8 @@
*/
-#ifndef __INC_BLOCK_H
-#define __INC_BLOCK_H
+#ifndef VP8_ENCODER_BLOCK_H_
+#define VP8_ENCODER_BLOCK_H_
#include "vp8/common/onyx.h"
#include "vp8/common/blockd.h"
@@ -18,6 +18,10 @@
#include "vp8/common/entropy.h"
#include "vpx_ports/mem.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define MAX_MODES 20
#define MAX_ERROR_BINS 1024
@@ -160,4 +164,8 @@ typedef struct macroblock
} MACROBLOCK;
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_BLOCK_H_
diff --git a/libvpx/vp8/encoder/boolhuff.h b/libvpx/vp8/encoder/boolhuff.h
index 39ab586..6114215 100644
--- a/libvpx/vp8/encoder/boolhuff.h
+++ b/libvpx/vp8/encoder/boolhuff.h
@@ -16,12 +16,16 @@
* Description : Bool Coder header file.
*
****************************************************************************/
-#ifndef __INC_BOOLHUFF_H
-#define __INC_BOOLHUFF_H
+#ifndef VP8_ENCODER_BOOLHUFF_H_
+#define VP8_ENCODER_BOOLHUFF_H_
#include "vpx_ports/mem.h"
#include "vpx/internal/vpx_codec_internal.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef struct
{
unsigned int lowvalue;
@@ -125,4 +129,8 @@ static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability)
br->range = range;
}
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_BOOLHUFF_H_
diff --git a/libvpx/vp8/encoder/dct_value_cost.h b/libvpx/vp8/encoder/dct_value_cost.h
index e892765..1cd3eec 100644
--- a/libvpx/vp8/encoder/dct_value_cost.h
+++ b/libvpx/vp8/encoder/dct_value_cost.h
@@ -8,6 +8,13 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP8_ENCODER_DCT_VALUE_COST_H_
+#define VP8_ENCODER_DCT_VALUE_COST_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* Generated file, included by tokenize.c */
/* Values generated by fill_value_tokens() */
@@ -356,3 +363,9 @@ static const short dct_value_cost[2048*2] =
8134, 8140, 8148, 8170, 8178, 8184, 8192, 8202, 8210, 8216, 8224, 8243,
8251, 8257, 8265, 8275
};
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_ENCODER_DCT_VALUE_COST_H_
diff --git a/libvpx/vp8/encoder/dct_value_tokens.h b/libvpx/vp8/encoder/dct_value_tokens.h
index ef08eed..c2aadef 100644
--- a/libvpx/vp8/encoder/dct_value_tokens.h
+++ b/libvpx/vp8/encoder/dct_value_tokens.h
@@ -8,6 +8,13 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP8_ENCODER_DCT_VALUE_TOKENS_H_
+#define VP8_ENCODER_DCT_VALUE_TOKENS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* Generated file, included by tokenize.c */
/* Values generated by fill_value_tokens() */
@@ -697,3 +704,9 @@ static const TOKENVALUE dct_value_tokens[2048*2] =
{10, 3942}, {10, 3944}, {10, 3946}, {10, 3948}, {10, 3950}, {10, 3952},
{10, 3954}, {10, 3956}, {10, 3958}, {10, 3960}
};
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_ENCODER_DCT_VALUE_TOKENS_H_
diff --git a/libvpx/vp8/encoder/defaultcoefcounts.h b/libvpx/vp8/encoder/defaultcoefcounts.h
index 2c0f3dd..1e8e804 100644
--- a/libvpx/vp8/encoder/defaultcoefcounts.h
+++ b/libvpx/vp8/encoder/defaultcoefcounts.h
@@ -8,6 +8,13 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP8_ENCODER_DEFAULTCOEFCOUNTS_H_
+#define VP8_ENCODER_DEFAULTCOEFCOUNTS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* Generated file, included by entropy.c */
static const unsigned int default_coef_counts[BLOCK_TYPES]
@@ -221,3 +228,9 @@ static const unsigned int default_coef_counts[BLOCK_TYPES]
},
},
};
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_ENCODER_DEFAULTCOEFCOUNTS_H_
diff --git a/libvpx/vp8/encoder/denoising.h b/libvpx/vp8/encoder/denoising.h
index b025f5c..cc9913a 100644
--- a/libvpx/vp8/encoder/denoising.h
+++ b/libvpx/vp8/encoder/denoising.h
@@ -13,6 +13,10 @@
#include "block.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define SUM_DIFF_THRESHOLD (16 * 16 * 2)
#define MOTION_MAGNITUDE_THRESHOLD (8*3)
@@ -39,4 +43,8 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
int recon_yoffset,
int recon_uvoffset);
-#endif /* VP8_ENCODER_DENOISING_H_ */
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_ENCODER_DENOISING_H_
diff --git a/libvpx/vp8/encoder/encodeframe.h b/libvpx/vp8/encoder/encodeframe.h
index 4dd6ba0..e185c10 100644
--- a/libvpx/vp8/encoder/encodeframe.h
+++ b/libvpx/vp8/encoder/encodeframe.h
@@ -7,8 +7,12 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef ENCODEFRAME_H
-#define ENCODEFRAME_H
+#ifndef VP8_ENCODER_ENCODEFRAME_H_
+#define VP8_ENCODER_ENCODEFRAME_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
extern void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x);
extern void vp8_build_block_offsets(MACROBLOCK *x);
@@ -24,4 +28,8 @@ extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
extern int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
TOKENEXTRA **t);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_ENCODEFRAME_H_
diff --git a/libvpx/vp8/encoder/encodeintra.h b/libvpx/vp8/encoder/encodeintra.h
index be2141f..a8d0284 100644
--- a/libvpx/vp8/encoder/encodeintra.h
+++ b/libvpx/vp8/encoder/encodeintra.h
@@ -9,13 +9,21 @@
*/
-#ifndef _ENCODEINTRA_H_
-#define _ENCODEINTRA_H_
+#ifndef VP8_ENCODER_ENCODEINTRA_H_
+#define VP8_ENCODER_ENCODEINTRA_H_
#include "onyx_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred);
void vp8_encode_intra16x16mby(MACROBLOCK *x);
void vp8_encode_intra16x16mbuv(MACROBLOCK *x);
void vp8_encode_intra4x4mby(MACROBLOCK *mb);
void vp8_encode_intra4x4block(MACROBLOCK *x, int ib);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_ENCODEINTRA_H_
diff --git a/libvpx/vp8/encoder/encodemb.h b/libvpx/vp8/encoder/encodemb.h
index 6badf7d..0b3ec87 100644
--- a/libvpx/vp8/encoder/encodemb.h
+++ b/libvpx/vp8/encoder/encodemb.h
@@ -9,10 +9,14 @@
*/
-#ifndef __INC_ENCODEMB_H
-#define __INC_ENCODEMB_H
+#ifndef VP8_ENCODER_ENCODEMB_H_
+#define VP8_ENCODER_ENCODEMB_H_
#include "onyx_int.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
void vp8_encode_inter16x16(MACROBLOCK *x);
void vp8_build_dcblock(MACROBLOCK *b);
@@ -23,4 +27,8 @@ void vp8_transform_intra_mby(MACROBLOCK *x);
void vp8_optimize_mby(MACROBLOCK *x);
void vp8_optimize_mbuv(MACROBLOCK *x);
void vp8_encode_inter16x16y(MACROBLOCK *x);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_ENCODEMB_H_
diff --git a/libvpx/vp8/encoder/encodemv.h b/libvpx/vp8/encoder/encodemv.h
index a6116c1..722162b 100644
--- a/libvpx/vp8/encoder/encodemv.h
+++ b/libvpx/vp8/encoder/encodemv.h
@@ -9,13 +9,21 @@
*/
-#ifndef __INC_ENCODEMV_H
-#define __INC_ENCODEMV_H
+#ifndef VP8_ENCODER_ENCODEMV_H_
+#define VP8_ENCODER_ENCODEMV_H_
#include "onyx_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp8_write_mvprobs(VP8_COMP *);
void vp8_encode_motion_vector(vp8_writer *, const MV *, const MV_CONTEXT *);
void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_ENCODEMV_H_
diff --git a/libvpx/vp8/encoder/firstpass.c b/libvpx/vp8/encoder/firstpass.c
index 968c7f3..98e5a71 100644
--- a/libvpx/vp8/encoder/firstpass.c
+++ b/libvpx/vp8/encoder/firstpass.c
@@ -940,9 +940,9 @@ static int64_t estimate_modemvcost(VP8_COMP *cpi,
/* Crude estimate of overhead cost from modes
* << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb
*/
- mode_cost =((((av_pct_inter - av_pct_motion) * zz_cost) +
- (av_pct_motion * motion_cost) +
- (av_intra * intra_cost)) * cpi->common.MBs) * 512;
+ mode_cost = (int64_t)((((av_pct_inter - av_pct_motion) * zz_cost) +
+ (av_pct_motion * motion_cost) +
+ (av_intra * intra_cost)) * cpi->common.MBs) * 512;
return mv_cost + mode_cost;
}
@@ -2310,7 +2310,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
pct_extra = (pct_extra > 20) ? 20 : pct_extra;
cpi->twopass.alt_extra_bits =
- (cpi->twopass.gf_group_bits * pct_extra) / 100;
+ (int)(cpi->twopass.gf_group_bits * pct_extra) / 100;
cpi->twopass.gf_group_bits -= cpi->twopass.alt_extra_bits;
cpi->twopass.alt_extra_bits /=
((cpi->baseline_gf_interval-1)>>1);
@@ -2386,7 +2386,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
target_frame_size = max_bits;
if (target_frame_size > cpi->twopass.gf_group_bits)
- target_frame_size = cpi->twopass.gf_group_bits;
+ target_frame_size = (int)cpi->twopass.gf_group_bits;
}
/* Adjust error and bits remaining */
@@ -2444,10 +2444,10 @@ void vp8_second_pass(VP8_COMP *cpi)
find_next_key_frame(cpi, &this_frame_copy);
/* Special case: Error error_resilient_mode mode does not make much
- * sense for two pass but with its current meaning but this code is
+ * sense for two pass but with its current meaning this code is
* designed to stop outlandish behaviour if someone does set it when
* using two pass. It effectively disables GF groups. This is
- * temporary code till we decide what should really happen in this
+ * temporary code until we decide what should really happen in this
* case.
*/
if (cpi->oxcf.error_resilient_mode)
@@ -2773,7 +2773,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
kf_group_intra_err += this_frame->intra_error;
kf_group_coded_err += this_frame->coded_error;
- /* load a the next frame's stats */
+ /* Load the next frame's stats. */
vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame));
input_stats(cpi, this_frame);
diff --git a/libvpx/vp8/encoder/firstpass.h b/libvpx/vp8/encoder/firstpass.h
index 95e1e54..c409ebc 100644
--- a/libvpx/vp8/encoder/firstpass.h
+++ b/libvpx/vp8/encoder/firstpass.h
@@ -9,8 +9,12 @@
*/
-#if !defined __INC_FIRSTPASS_H
-#define __INC_FIRSTPASS_H
+#ifndef VP8_ENCODER_FIRSTPASS_H_
+#define VP8_ENCODER_FIRSTPASS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
extern void vp8_init_first_pass(VP8_COMP *cpi);
extern void vp8_first_pass(VP8_COMP *cpi);
@@ -21,4 +25,8 @@ extern void vp8_second_pass(VP8_COMP *cpi);
extern void vp8_end_second_pass(VP8_COMP *cpi);
extern size_t vp8_firstpass_stats_sz(unsigned int mb_count);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_FIRSTPASS_H_
diff --git a/libvpx/vp8/encoder/lookahead.h b/libvpx/vp8/encoder/lookahead.h
index cf56b75..cad68e6 100644
--- a/libvpx/vp8/encoder/lookahead.h
+++ b/libvpx/vp8/encoder/lookahead.h
@@ -7,11 +7,15 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef LOOKAHEAD_H
-#define LOOKAHEAD_H
+#ifndef VP8_ENCODER_LOOKAHEAD_H_
+#define VP8_ENCODER_LOOKAHEAD_H_
#include "vpx_scale/yv12config.h"
#include "vpx/vpx_integer.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct lookahead_entry
{
YV12_BUFFER_CONFIG img;
@@ -106,4 +110,8 @@ unsigned int
vp8_lookahead_depth(struct lookahead_ctx *ctx);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_LOOKAHEAD_H_
diff --git a/libvpx/vp8/encoder/mcomp.h b/libvpx/vp8/encoder/mcomp.h
index e36c515..f284f7c 100644
--- a/libvpx/vp8/encoder/mcomp.h
+++ b/libvpx/vp8/encoder/mcomp.h
@@ -9,12 +9,16 @@
*/
-#ifndef __INC_MCOMP_H
-#define __INC_MCOMP_H
+#ifndef VP8_ENCODER_MCOMP_H_
+#define VP8_ENCODER_MCOMP_H_
#include "block.h"
#include "vp8/common/variance.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#ifdef VP8_ENTROPY_STATS
extern void init_mv_ref_counts();
extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
@@ -104,4 +108,8 @@ typedef int (*vp8_diamond_search_fn_t)
int_mv *center_mv
);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_MCOMP_H_
diff --git a/libvpx/vp8/encoder/modecosts.h b/libvpx/vp8/encoder/modecosts.h
index 99ef119..9281551 100644
--- a/libvpx/vp8/encoder/modecosts.h
+++ b/libvpx/vp8/encoder/modecosts.h
@@ -9,9 +9,17 @@
*/
-#ifndef __INC_MODECOSTS_H
-#define __INC_MODECOSTS_H
+#ifndef VP8_ENCODER_MODECOSTS_H_
+#define VP8_ENCODER_MODECOSTS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
void vp8_init_mode_costs(VP8_COMP *x);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_MODECOSTS_H_
diff --git a/libvpx/vp8/encoder/mr_dissim.h b/libvpx/vp8/encoder/mr_dissim.h
index f8cb135..5a59ce6 100644
--- a/libvpx/vp8/encoder/mr_dissim.h
+++ b/libvpx/vp8/encoder/mr_dissim.h
@@ -9,12 +9,20 @@
*/
-#ifndef __INC_MR_DISSIM_H
-#define __INC_MR_DISSIM_H
+#ifndef VP8_ENCODER_MR_DISSIM_H_
+#define VP8_ENCODER_MR_DISSIM_H_
#include "vpx_config.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
extern void vp8_cal_low_res_mb_cols(VP8_COMP *cpi);
extern void vp8_cal_dissimilarity(VP8_COMP *cpi);
extern void vp8_store_drop_frame_info(VP8_COMP *cpi);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_MR_DISSIM_H_
diff --git a/libvpx/vp8/encoder/onyx_if.c b/libvpx/vp8/encoder/onyx_if.c
index 4b60cfd..ef37c0e 100644
--- a/libvpx/vp8/encoder/onyx_if.c
+++ b/libvpx/vp8/encoder/onyx_if.c
@@ -19,7 +19,7 @@
#include "vp8/common/alloccommon.h"
#include "mcomp.h"
#include "firstpass.h"
-#include "psnr.h"
+#include "vpx/internal/vpx_psnr.h"
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/extend.h"
#include "ratectrl.h"
@@ -1401,6 +1401,7 @@ static void update_layer_contexts (VP8_COMP *cpi)
unsigned int i;
double prev_layer_framerate=0;
+ assert(oxcf->number_of_layers <= VPX_TS_MAX_LAYERS);
for (i=0; i<oxcf->number_of_layers; i++)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
@@ -1623,6 +1624,12 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->oxcf.maximum_buffer_size =
rescale((int)cpi->oxcf.maximum_buffer_size,
cpi->oxcf.target_bandwidth, 1000);
+ // Under a configuration change, where maximum_buffer_size may change,
+ // keep buffer level clipped to the maximum allowed buffer size.
+ if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) {
+ cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
+ cpi->buffer_level = cpi->bits_off_target;
+ }
/* Set up frame rate and related parameters rate control values. */
vp8_new_framerate(cpi, cpi->framerate);
@@ -2164,10 +2171,12 @@ void vp8_remove_compressor(VP8_COMP **ptr)
8.0 / 1000.0 / time_encoded;
double samples = 3.0 / 2 * cpi->frames_in_layer[i] *
lst_yv12->y_width * lst_yv12->y_height;
- double total_psnr = vp8_mse2psnr(samples, 255.0,
- cpi->total_error2[i]);
- double total_psnr2 = vp8_mse2psnr(samples, 255.0,
- cpi->total_error2_p[i]);
+ double total_psnr =
+ vpx_sse_to_psnr(samples, 255.0,
+ cpi->total_error2[i]);
+ double total_psnr2 =
+ vpx_sse_to_psnr(samples, 255.0,
+ cpi->total_error2_p[i]);
double total_ssim = 100 * pow(cpi->sum_ssim[i] /
cpi->sum_weights[i], 8.0);
@@ -2184,9 +2193,9 @@ void vp8_remove_compressor(VP8_COMP **ptr)
{
double samples = 3.0 / 2 * cpi->count *
lst_yv12->y_width * lst_yv12->y_height;
- double total_psnr = vp8_mse2psnr(samples, 255.0,
- cpi->total_sq_error);
- double total_psnr2 = vp8_mse2psnr(samples, 255.0,
+ double total_psnr = vpx_sse_to_psnr(samples, 255.0,
+ cpi->total_sq_error);
+ double total_psnr2 = vpx_sse_to_psnr(samples, 255.0,
cpi->total_sq_error2);
double total_ssim = 100 * pow(cpi->summed_quality /
cpi->summed_weights, 8.0);
@@ -2516,8 +2525,8 @@ static void generate_psnr_packet(VP8_COMP *cpi)
pkt.data.psnr.samples[3] = width * height;
for (i = 0; i < 4; i++)
- pkt.data.psnr.psnr[i] = vp8_mse2psnr(pkt.data.psnr.samples[i], 255.0,
- (double)(pkt.data.psnr.sse[i]));
+ pkt.data.psnr.psnr[i] = vpx_sse_to_psnr(pkt.data.psnr.samples[i], 255.0,
+ (double)(pkt.data.psnr.sse[i]));
vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
}
@@ -2675,8 +2684,8 @@ static int resize_key_frame(VP8_COMP *cpi)
VP8_COMMON *cm = &cpi->common;
/* Do we need to apply resampling for one pass cbr.
- * In one pass this is more limited than in two pass cbr
- * The test and any change is only made one per key frame sequence
+ * In one pass this is more limited than in two pass cbr.
+ * The test and any change is only made once per key frame sequence.
*/
if (cpi->oxcf.allow_spatial_resampling && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
{
@@ -2699,7 +2708,7 @@ static int resize_key_frame(VP8_COMP *cpi)
cm->vert_scale = (cm->vert_scale > NORMAL) ? cm->vert_scale - 1 : NORMAL;
}
- /* Get the new hieght and width */
+ /* Get the new height and width */
Scale2Ratio(cm->horiz_scale, &hr, &hs);
Scale2Ratio(cm->vert_scale, &vr, &vs);
new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs;
@@ -3808,7 +3817,7 @@ static void encode_frame_to_data_rate
/* Setup background Q adjustment for error resilient mode.
* For multi-layer encodes only enable this for the base layer.
- */
+ */
if (cpi->cyclic_refresh_mode_enabled)
{
if (cpi->current_layer==0)
@@ -4621,45 +4630,43 @@ static void encode_frame_to_data_rate
vp8_clear_system_state();
if (cpi->twopass.total_left_stats.coded_error != 0.0)
- fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
- "%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f"
- "%10.3f %8d\n",
+ fprintf(f, "%10d %10d %10d %10d %10d %10"PRId64" %10"PRId64
+ "%10"PRId64" %10d %6d %6d %6d %6d %5d %5d %5d %8d "
+ "%8.2lf %"PRId64" %10.3lf %10"PRId64" %8d\n",
cpi->common.current_video_frame, cpi->this_frame_target,
cpi->projected_frame_size,
(cpi->projected_frame_size - cpi->this_frame_target),
- (int)cpi->total_target_vs_actual,
+ cpi->total_target_vs_actual,
cpi->buffer_level,
(cpi->oxcf.starting_buffer_level-cpi->bits_off_target),
- (int)cpi->total_actual_bits, cm->base_qindex,
+ cpi->total_actual_bits, cm->base_qindex,
cpi->active_best_quality, cpi->active_worst_quality,
cpi->ni_av_qi, cpi->cq_target_quality,
- cpi->zbin_over_quant,
cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
cm->frame_type, cpi->gfu_boost,
cpi->twopass.est_max_qcorrection_factor,
- (int)cpi->twopass.bits_left,
+ cpi->twopass.bits_left,
cpi->twopass.total_left_stats.coded_error,
(double)cpi->twopass.bits_left /
cpi->twopass.total_left_stats.coded_error,
cpi->tot_recode_hits);
else
- fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
- "%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f"
- "%8d\n",
- cpi->common.current_video_frame,
- cpi->this_frame_target, cpi->projected_frame_size,
+ fprintf(f, "%10d %10d %10d %10d %10d %10"PRId64" %10"PRId64
+ "%10"PRId64" %10d %6d %6d %6d %6d %5d %5d %5d %8d "
+ "%8.2lf %"PRId64" %10.3lf %8d\n",
+ cpi->common.current_video_frame, cpi->this_frame_target,
+ cpi->projected_frame_size,
(cpi->projected_frame_size - cpi->this_frame_target),
- (int)cpi->total_target_vs_actual,
+ cpi->total_target_vs_actual,
cpi->buffer_level,
(cpi->oxcf.starting_buffer_level-cpi->bits_off_target),
- (int)cpi->total_actual_bits, cm->base_qindex,
+ cpi->total_actual_bits, cm->base_qindex,
cpi->active_best_quality, cpi->active_worst_quality,
cpi->ni_av_qi, cpi->cq_target_quality,
- cpi->zbin_over_quant,
cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
cm->frame_type, cpi->gfu_boost,
cpi->twopass.est_max_qcorrection_factor,
- (int)cpi->twopass.bits_left,
+ cpi->twopass.bits_left,
cpi->twopass.total_left_stats.coded_error,
cpi->tot_recode_hits);
@@ -4667,7 +4674,6 @@ static void encode_frame_to_data_rate
{
FILE *fmodes = fopen("Modes.stt", "a");
- int i;
fprintf(fmodes, "%6d:%1d:%1d:%1d ",
cpi->common.current_video_frame,
@@ -5066,6 +5072,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
unsigned int i;
/* Update frame rates for each layer */
+ assert(cpi->oxcf.number_of_layers <= VPX_TS_MAX_LAYERS);
for (i=0; i<cpi->oxcf.number_of_layers; i++)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
@@ -5281,11 +5288,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
sq_error = (double)(ye + ue + ve);
- frame_psnr = vp8_mse2psnr(t_samples, 255.0, sq_error);
+ frame_psnr = vpx_sse_to_psnr(t_samples, 255.0, sq_error);
- cpi->total_y += vp8_mse2psnr(y_samples, 255.0, (double)ye);
- cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, (double)ue);
- cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, (double)ve);
+ cpi->total_y += vpx_sse_to_psnr(y_samples, 255.0, (double)ye);
+ cpi->total_u += vpx_sse_to_psnr(uv_samples, 255.0, (double)ue);
+ cpi->total_v += vpx_sse_to_psnr(uv_samples, 255.0, (double)ve);
cpi->total_sq_error += sq_error;
cpi->total += frame_psnr;
#if CONFIG_POSTPROC
@@ -5308,14 +5315,14 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
sq_error2 = (double)(ye + ue + ve);
- frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error2);
+ frame_psnr2 = vpx_sse_to_psnr(t_samples, 255.0, sq_error2);
- cpi->totalp_y += vp8_mse2psnr(y_samples,
- 255.0, (double)ye);
- cpi->totalp_u += vp8_mse2psnr(uv_samples,
- 255.0, (double)ue);
- cpi->totalp_v += vp8_mse2psnr(uv_samples,
- 255.0, (double)ve);
+ cpi->totalp_y += vpx_sse_to_psnr(y_samples,
+ 255.0, (double)ye);
+ cpi->totalp_u += vpx_sse_to_psnr(uv_samples,
+ 255.0, (double)ue);
+ cpi->totalp_v += vpx_sse_to_psnr(uv_samples,
+ 255.0, (double)ve);
cpi->total_sq_error2 += sq_error2;
cpi->totalp += frame_psnr2;
diff --git a/libvpx/vp8/encoder/onyx_int.h b/libvpx/vp8/encoder/onyx_int.h
index 3ab0fe8..6b37167 100644
--- a/libvpx/vp8/encoder/onyx_int.h
+++ b/libvpx/vp8/encoder/onyx_int.h
@@ -9,8 +9,8 @@
*/
-#ifndef __INC_VP8_INT_H
-#define __INC_VP8_INT_H
+#ifndef VP8_ENCODER_ONYX_INT_H_
+#define VP8_ENCODER_ONYX_INT_H_
#include <stdio.h>
#include "vpx_config.h"
@@ -33,6 +33,10 @@
#include "vp8/encoder/denoising.h"
#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define MIN_GF_INTERVAL 4
#define DEFAULT_GF_INTERVAL 7
@@ -721,4 +725,8 @@ void vp8_set_speed_features(VP8_COMP *cpi);
"Failed to allocate "#lval);\
} while(0)
#endif
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_ONYX_INT_H_
diff --git a/libvpx/vp8/encoder/pickinter.h b/libvpx/vp8/encoder/pickinter.h
index 35011ca..cf3b1f8 100644
--- a/libvpx/vp8/encoder/pickinter.h
+++ b/libvpx/vp8/encoder/pickinter.h
@@ -9,11 +9,15 @@
*/
-#ifndef __INC_PICKINTER_H
-#define __INC_PICKINTER_H
+#ifndef VP8_ENCODER_PICKINTER_H_
+#define VP8_ENCODER_PICKINTER_H_
#include "vpx_config.h"
#include "vp8/common/onyxc_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int recon_uvoffset, int *returnrate,
int *returndistortion, int *returnintra,
@@ -24,4 +28,8 @@ extern int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
const vp8_variance_fn_ptr_t *vfp,
unsigned int *sse,
int_mv this_mv);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_PICKINTER_H_
diff --git a/libvpx/vp8/encoder/psnr.c b/libvpx/vp8/encoder/psnr.c
deleted file mode 100644
index b3a3d95..0000000
--- a/libvpx/vp8/encoder/psnr.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_scale/yv12config.h"
-#include "math.h"
-#include "vp8/common/systemdependent.h" /* for vp8_clear_system_state() */
-
-#define MAX_PSNR 100
-
-double vp8_mse2psnr(double Samples, double Peak, double Mse)
-{
- double psnr;
-
- if ((double)Mse > 0.0)
- psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
- else
- psnr = MAX_PSNR; /* Limit to prevent / 0 */
-
- if (psnr > MAX_PSNR)
- psnr = MAX_PSNR;
-
- return psnr;
-}
diff --git a/libvpx/vp8/encoder/psnr.h b/libvpx/vp8/encoder/psnr.h
deleted file mode 100644
index 7f6269a..0000000
--- a/libvpx/vp8/encoder/psnr.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef __INC_PSNR_H
-#define __INC_PSNR_H
-
-extern double vp8_mse2psnr(double Samples, double Peak, double Mse);
-
-#endif
diff --git a/libvpx/vp8/encoder/quantize.h b/libvpx/vp8/encoder/quantize.h
index d55496c..c739b26 100644
--- a/libvpx/vp8/encoder/quantize.h
+++ b/libvpx/vp8/encoder/quantize.h
@@ -9,8 +9,12 @@
*/
-#ifndef __INC_QUANTIZE_H
-#define __INC_QUANTIZE_H
+#ifndef VP8_ENCODER_QUANTIZE_H_
+#define VP8_ENCODER_QUANTIZE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
struct VP8_COMP;
struct macroblock;
@@ -20,4 +24,8 @@ extern void vp8_update_zbin_extra(struct VP8_COMP *cpi, struct macroblock *x);
extern void vp8cx_mb_init_quantizer(struct VP8_COMP *cpi, struct macroblock *x, int ok_to_skip);
extern void vp8cx_init_quantizer(struct VP8_COMP *cpi);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_QUANTIZE_H_
diff --git a/libvpx/vp8/encoder/ratectrl.c b/libvpx/vp8/encoder/ratectrl.c
index fe4db13..c51650c 100644
--- a/libvpx/vp8/encoder/ratectrl.c
+++ b/libvpx/vp8/encoder/ratectrl.c
@@ -174,14 +174,6 @@ static const int kf_gf_boost_qlimits[QINDEX_RANGE] =
600, 600, 600, 600, 600, 600, 600, 600,
};
-/* % adjustment to target kf size based on seperation from previous frame */
-static const int kf_boost_seperation_adjustment[16] =
-{
- 30, 40, 50, 55, 60, 65, 70, 75,
- 80, 85, 90, 95, 100, 100, 100, 100,
-};
-
-
static const int gf_adjust_table[101] =
{
100,
@@ -1238,7 +1230,6 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
{
Q = cpi->oxcf.gold_q;
}
-
}
else
{
diff --git a/libvpx/vp8/encoder/ratectrl.h b/libvpx/vp8/encoder/ratectrl.h
index c43f08d..829697f 100644
--- a/libvpx/vp8/encoder/ratectrl.h
+++ b/libvpx/vp8/encoder/ratectrl.h
@@ -9,10 +9,15 @@
*/
-#if !defined __INC_RATECTRL_H
+#ifndef VP8_ENCODER_RATECTRL_H_
+#define VP8_ENCODER_RATECTRL_H_
#include "onyx_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
extern void vp8_save_coding_context(VP8_COMP *cpi);
extern void vp8_restore_coding_context(VP8_COMP *cpi);
@@ -25,4 +30,8 @@ extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_
/* return of 0 means drop frame */
extern int vp8_pick_frame_size(VP8_COMP *cpi);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_RATECTRL_H_
diff --git a/libvpx/vp8/encoder/rdopt.c b/libvpx/vp8/encoder/rdopt.c
index 5016cc4..387701c 100644
--- a/libvpx/vp8/encoder/rdopt.c
+++ b/libvpx/vp8/encoder/rdopt.c
@@ -528,19 +528,16 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
-# define QC( I) ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
-
+ assert(eob <= 16);
for (; c < eob; c++)
{
- int v = QC(c);
- int t = vp8_dct_value_tokens_ptr[v].Token;
+ const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]];
+ const int t = vp8_dct_value_tokens_ptr[v].Token;
cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
cost += vp8_dct_value_cost_ptr[v];
pt = vp8_prev_token_class[t];
}
-# undef QC
-
if (c < 16)
cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
diff --git a/libvpx/vp8/encoder/rdopt.h b/libvpx/vp8/encoder/rdopt.h
index 1e11fa7..fe21b8e 100644
--- a/libvpx/vp8/encoder/rdopt.h
+++ b/libvpx/vp8/encoder/rdopt.h
@@ -9,8 +9,12 @@
*/
-#ifndef __INC_RDOPT_H
-#define __INC_RDOPT_H
+#ifndef VP8_ENCODER_RDOPT_H_
+#define VP8_ENCODER_RDOPT_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
@@ -130,4 +134,8 @@ extern void vp8_mv_pred
);
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_RDOPT_H_
diff --git a/libvpx/vp8/encoder/segmentation.h b/libvpx/vp8/encoder/segmentation.h
index 12815b0..6b55005 100644
--- a/libvpx/vp8/encoder/segmentation.h
+++ b/libvpx/vp8/encoder/segmentation.h
@@ -8,9 +8,21 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP8_ENCODER_SEGMENTATION_H_
+#define VP8_ENCODER_SEGMENTATION_H_
#include "string.h"
#include "vp8/common/blockd.h"
#include "onyx_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
extern void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_ENCODER_SEGMENTATION_H_
diff --git a/libvpx/vp8/encoder/temporal_filter.c b/libvpx/vp8/encoder/temporal_filter.c
index 7e3af71..513b2bf 100644
--- a/libvpx/vp8/encoder/temporal_filter.c
+++ b/libvpx/vp8/encoder/temporal_filter.c
@@ -16,7 +16,6 @@
#include "vp8/common/alloccommon.h"
#include "mcomp.h"
#include "firstpass.h"
-#include "psnr.h"
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/extend.h"
#include "ratectrl.h"
diff --git a/libvpx/vp8/encoder/tokenize.c b/libvpx/vp8/encoder/tokenize.c
index 11559a7..2dc8205 100644
--- a/libvpx/vp8/encoder/tokenize.c
+++ b/libvpx/vp8/encoder/tokenize.c
@@ -213,6 +213,7 @@ static void tokenize1st_order_b
/* Luma */
for (block = 0; block < 16; block++, b++)
{
+ const int eob = *b->eob;
tmp1 = vp8_block2above[block];
tmp2 = vp8_block2left[block];
qcoeff_ptr = b->qcoeff;
@@ -223,7 +224,7 @@ static void tokenize1st_order_b
c = type ? 0 : 1;
- if(c >= *b->eob)
+ if(c >= eob)
{
/* c = band for this case */
t->Token = DCT_EOB_TOKEN;
@@ -250,7 +251,8 @@ static void tokenize1st_order_b
t++;
c++;
- for (; c < *b->eob; c++)
+ assert(eob <= 16);
+ for (; c < eob; c++)
{
rc = vp8_default_zig_zag1d[c];
band = vp8_coef_bands[c];
@@ -286,6 +288,7 @@ static void tokenize1st_order_b
/* Chroma */
for (block = 16; block < 24; block++, b++)
{
+ const int eob = *b->eob;
tmp1 = vp8_block2above[block];
tmp2 = vp8_block2left[block];
qcoeff_ptr = b->qcoeff;
@@ -294,7 +297,7 @@ static void tokenize1st_order_b
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
- if(!(*b->eob))
+ if(!eob)
{
/* c = band for this case */
t->Token = DCT_EOB_TOKEN;
@@ -321,7 +324,8 @@ static void tokenize1st_order_b
t++;
c = 1;
- for (; c < *b->eob; c++)
+ assert(eob <= 16);
+ for (; c < eob; c++)
{
rc = vp8_default_zig_zag1d[c];
band = vp8_coef_bands[c];
diff --git a/libvpx/vp8/encoder/tokenize.h b/libvpx/vp8/encoder/tokenize.h
index 1e6cea1..b73a9ee 100644
--- a/libvpx/vp8/encoder/tokenize.h
+++ b/libvpx/vp8/encoder/tokenize.h
@@ -9,12 +9,16 @@
*/
-#ifndef tokenize_h
-#define tokenize_h
+#ifndef VP8_ENCODER_TOKENIZE_H_
+#define VP8_ENCODER_TOKENIZE_H_
#include "vp8/common/entropy.h"
#include "block.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp8_tokenize_initialize();
typedef struct
@@ -47,4 +51,8 @@ extern const short *const vp8_dct_value_cost_ptr;
*/
extern const TOKENVALUE *const vp8_dct_value_tokens_ptr;
-#endif /* tokenize_h */
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_ENCODER_TOKENIZE_H_
diff --git a/libvpx/vp8/encoder/treewriter.h b/libvpx/vp8/encoder/treewriter.h
index 48574f3..cfb2730 100644
--- a/libvpx/vp8/encoder/treewriter.h
+++ b/libvpx/vp8/encoder/treewriter.h
@@ -9,8 +9,8 @@
*/
-#ifndef __INC_TREEWRITER_H
-#define __INC_TREEWRITER_H
+#ifndef VP8_ENCODER_TREEWRITER_H_
+#define VP8_ENCODER_TREEWRITER_H_
/* Trees map alphabets into huffman-like codes suitable for an arithmetic
bit coder. Timothy S Murphy 11 October 2004 */
@@ -19,6 +19,10 @@
#include "boolhuff.h" /* for now */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef BOOL_CODER vp8_writer;
#define vp8_write vp8_encode_bool
@@ -123,4 +127,8 @@ void vp8_cost_tokens2(
int *Costs, const vp8_prob *, vp8_tree, int
);
+#ifdef __cplusplus
+} // extern "C"
#endif
+
+#endif // VP8_ENCODER_TREEWRITER_H_
diff --git a/libvpx/vp8/vp8_common.mk b/libvpx/vp8/vp8_common.mk
index f98eb31..dfb54a5 100644
--- a/libvpx/vp8/vp8_common.mk
+++ b/libvpx/vp8/vp8_common.mk
@@ -47,7 +47,7 @@ VP8_COMMON_SRCS-yes += common/quant_common.h
VP8_COMMON_SRCS-yes += common/reconinter.h
VP8_COMMON_SRCS-yes += common/reconintra4x4.h
VP8_COMMON_SRCS-yes += common/rtcd.c
-VP8_COMMON_SRCS-yes += common/rtcd_defs.sh
+VP8_COMMON_SRCS-yes += common/rtcd_defs.pl
VP8_COMMON_SRCS-yes += common/setupintrarecon.h
VP8_COMMON_SRCS-yes += common/swapyv12buffer.h
VP8_COMMON_SRCS-yes += common/systemdependent.h
@@ -159,14 +159,6 @@ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM)
# common (neon)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict4x4_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict8x4_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict8x8_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict16x16_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem8x4_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem8x8_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem16x16_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iwalsh_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfilter_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
@@ -181,14 +173,20 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x8_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict16x16_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/save_reg_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_blk_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/variance_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
-$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))
+# common (neon intrinsics)
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon.c
+
+
+$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl))
diff --git a/libvpx/vp8/vp8_cx_iface.c b/libvpx/vp8/vp8_cx_iface.c
index 19e9d27..4c896b1 100644
--- a/libvpx/vp8/vp8_cx_iface.c
+++ b/libvpx/vp8/vp8_cx_iface.c
@@ -414,7 +414,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
printf("Sharpness: %d\n", oxcf->Sharpness);
printf("cpu_used: %d\n", oxcf->cpu_used);
printf("Mode: %d\n", oxcf->Mode);
- printf("delete_first_pass_file: %d\n", oxcf->delete_first_pass_file);
printf("auto_key: %d\n", oxcf->auto_key);
printf("key_freq: %d\n", oxcf->key_freq);
printf("end_usage: %d\n", oxcf->end_usage);
@@ -751,9 +750,6 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
if (!ctx->cfg.rc_target_bitrate)
return res;
- if (!ctx->cfg.rc_target_bitrate)
- return res;
-
if (img)
res = validate_img(ctx, img);
@@ -1266,10 +1262,10 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
128, /* kf_max_dist */
#if VPX_ENCODER_ABI_VERSION == (1 + VPX_CODEC_ABI_VERSION)
- 1, /* g_delete_first_pass_file */
"vp8.fpf" /* first pass filename */
#endif
VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */
+ {0}, /* ss_target_bitrate */
1, /* ts_number_layers */
{0}, /* ts_target_bitrate */
{0}, /* ts_rate_decimator */
diff --git a/libvpx/vp8/vp8_dx_iface.c b/libvpx/vp8/vp8_dx_iface.c
index 871b8d3..0b4c4cb 100644
--- a/libvpx/vp8/vp8_dx_iface.c
+++ b/libvpx/vp8/vp8_dx_iface.c
@@ -929,6 +929,7 @@ CODEC_INTERFACE(vpx_codec_vp8_dx) =
vp8_get_si, /* vpx_codec_get_si_fn_t get_si; */
vp8_decode, /* vpx_codec_decode_fn_t decode; */
vp8_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */
+ NOT_IMPLEMENTED,
},
{ /* encoder functions */
NOT_IMPLEMENTED,
diff --git a/libvpx/vp8/vp8cx.mk b/libvpx/vp8/vp8cx.mk
index cd091f3..d7c6dd1 100644
--- a/libvpx/vp8/vp8cx.mk
+++ b/libvpx/vp8/vp8cx.mk
@@ -50,7 +50,6 @@ VP8_CX_SRCS-yes += encoder/mcomp.h
VP8_CX_SRCS-yes += encoder/modecosts.h
VP8_CX_SRCS-yes += encoder/onyx_int.h
VP8_CX_SRCS-yes += encoder/pickinter.h
-VP8_CX_SRCS-yes += encoder/psnr.h
VP8_CX_SRCS-yes += encoder/quantize.h
VP8_CX_SRCS-yes += encoder/ratectrl.h
VP8_CX_SRCS-yes += encoder/rdopt.h
@@ -61,7 +60,6 @@ VP8_CX_SRCS-yes += encoder/modecosts.c
VP8_CX_SRCS-yes += encoder/onyx_if.c
VP8_CX_SRCS-yes += encoder/pickinter.c
VP8_CX_SRCS-yes += encoder/picklpf.c
-VP8_CX_SRCS-yes += encoder/psnr.c
VP8_CX_SRCS-yes += encoder/quantize.c
VP8_CX_SRCS-yes += encoder/ratectrl.c
VP8_CX_SRCS-yes += encoder/rdopt.c
diff --git a/libvpx/vp8/vp8cx_arm.mk b/libvpx/vp8/vp8cx_arm.mk
index b030ee5..398172a 100644
--- a/libvpx/vp8/vp8cx_arm.mk
+++ b/libvpx/vp8/vp8cx_arm.mk
@@ -37,6 +37,7 @@ VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/walsh_v6$(ASM)
# encoder
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/fastquantizeb_neon$(ASM)
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/picklpf_arm.c
+VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/denoising_neon.c
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/shortfdct_neon$(ASM)
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/subtract_neon$(ASM)
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_mse16x16_neon$(ASM)
diff --git a/libvpx/vp8/vp8dx.mk b/libvpx/vp8/vp8dx.mk
index 4a8f467..892ed70 100644
--- a/libvpx/vp8/vp8dx.mk
+++ b/libvpx/vp8/vp8dx.mk
@@ -22,7 +22,7 @@ VP8_DX_SRCS-yes += vp8_dx_iface.c
VP8_DX_SRCS-yes += decoder/dboolhuff.c
VP8_DX_SRCS-yes += decoder/decodemv.c
-VP8_DX_SRCS-yes += decoder/decodframe.c
+VP8_DX_SRCS-yes += decoder/decodeframe.c
VP8_DX_SRCS-yes += decoder/detokenize.c
VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/ec_types.h
VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/error_concealment.h