diff options
| author | Bill Yi <byi@google.com> | 2016-02-22 19:24:59 +0000 |
|---|---|---|
| committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2016-02-22 19:24:59 +0000 |
| commit | 857bb8df092ee86783ab6933063a736929a07227 (patch) | |
| tree | f42181486e87a18dba9945956209fae0366172cb | |
| parent | 30dc5b6cbc88d67b24843b52c282e13f070b4ebc (diff) | |
| parent | c927526be9a7b72fb5edb3f29c4e8ceabe0ec98a (diff) | |
| download | platform_external_libvpx-brillo-m10-dev.tar.gz platform_external_libvpx-brillo-m10-dev.tar.bz2 platform_external_libvpx-brillo-m10-dev.zip | |
Merge "Update external/libvpx to 1.5.0 release"brillo-m10-releasebrillo-m10-dev
246 files changed, 8362 insertions, 8108 deletions
diff --git a/README.android b/README.android index 5949fc65..36d716d8 100644 --- a/README.android +++ b/README.android @@ -1,12 +1,12 @@ Name: libvpx URL: http://www.webmproject.org -Version: v1.4.0 +Version: v1.5.0 License: BSD License File: libvpx/LICENSE -Date: Tuesday August 25 2015 -Branch: origin/master -Commit: 7105df53d7dc13d5e575bc8df714ec8d1da36b06 +Date: Thursday November 19 2015 +Branch: javanwhistlingduck +Commit: cbecf57f3e0d85a7b7f97f3ab7c507f6fe640a93 Description: Contains the sources used to compile libvpx. diff --git a/README.version b/README.version index f4d77c4e..48e6229d 100644 --- a/README.version +++ b/README.version @@ -1,4 +1,4 @@ -URL: https://chromium.googlesource.com/webm/libvpx/+archive/v1.4.0.tar.gz -Version: 1.4.0 +URL: https://storage.googleapis.com/downloads.webmproject.org/releases/webm/libvpx-1.5.0.tar.bz2 +Version: 1.5.0 BugComponent: 42195 Owners: johannkoenig diff --git a/config/arm-neon/libvpx_srcs.txt b/config/arm-neon/libvpx_srcs.txt index 9d5084c3..bdeae071 100644 --- a/config/arm-neon/libvpx_srcs.txt +++ b/config/arm-neon/libvpx_srcs.txt @@ -14,7 +14,6 @@ vp8/common/arm/armv6/dequantize_v6.asm vp8/common/arm/armv6/filter_v6.asm vp8/common/arm/armv6/idct_blk_v6.c vp8/common/arm/armv6/idct_v6.asm -vp8/common/arm/armv6/intra4x4_predict_v6.asm vp8/common/arm/armv6/iwalsh_v6.asm vp8/common/arm/armv6/loopfilter_v6.asm vp8/common/arm/armv6/simpleloopfilter_v6.asm @@ -36,7 +35,6 @@ vp8/common/arm/neon/iwalsh_neon.c vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c vp8/common/arm/neon/mbloopfilter_neon.c -vp8/common/arm/neon/reconintra_neon.c vp8/common/arm/neon/shortidct4x4llm_neon.c vp8/common/arm/neon/sixtappredict_neon.c vp8/common/arm/neon/vp8_loopfilter_neon.c @@ -80,6 +78,7 @@ vp8/common/quant_common.h vp8/common/reconinter.c vp8/common/reconinter.h vp8/common/reconintra.c +vp8/common/reconintra.h vp8/common/reconintra4x4.c vp8/common/reconintra4x4.h vp8/common/rtcd.c @@ -298,6 +297,7 @@ vp9/encoder/vp9_treewriter.h vp9/vp9_common.mk vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c +vp9/vp9_dx_iface.h vp9/vp9_iface_common.h vp9/vp9cx.mk vp9/vp9dx.mk diff --git a/config/arm-neon/vp8_rtcd.h b/config/arm-neon/vp8_rtcd.h index 0b836c4f..6fd2dac4 100644 --- a/config/arm-neon/vp8_rtcd.h +++ b/config/arm-neon/vp8_rtcd.h @@ -48,14 +48,6 @@ void vp8_bilinear_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int vp8_block_error_c(short *coeff, short *dqcoeff); #define vp8_block_error vp8_block_error_c -void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -void vp8_build_intra_predictors_mbuv_s_neon(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_neon - -void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -void vp8_build_intra_predictors_mby_s_neon(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_neon - void vp8_clear_system_state_c(); #define vp8_clear_system_state vp8_clear_system_state_c @@ -117,10 +109,6 @@ void vp8_fast_quantize_b_neon(struct block *, struct blockd *); int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_full_search_sad vp8_full_search_sad_c -void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); -void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); -#define vp8_intra4x4_predict vp8_intra4x4_predict_armv6 - void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); void vp8_loop_filter_bh_armv6(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); void vp8_loop_filter_bh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); diff --git a/config/arm-neon/vpx_config.asm b/config/arm-neon/vpx_config.asm index 6f032662..5b623b8c 100644 --- a/config/arm-neon/vpx_config.asm +++ b/config/arm-neon/vpx_config.asm @@ -28,7 +28,7 @@ .equ HAVE_UNISTD_H , 1 .equ CONFIG_DEPENDENCY_TRACKING , 1 .equ CONFIG_EXTERNAL_BUILD , 1 -.equ CONFIG_INSTALL_DOCS , 1 +.equ CONFIG_INSTALL_DOCS , 0 .equ CONFIG_INSTALL_BINS , 1 .equ CONFIG_INSTALL_LIBS , 1 .equ CONFIG_INSTALL_SRCS , 0 @@ -86,4 +86,5 @@ .equ CONFIG_SPATIAL_SVC , 0 .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_EMULATE_HARDWARE , 0 +.equ CONFIG_MISC_FIXES , 0 .section .note.GNU-stack,"",%progbits diff --git a/config/arm-neon/vpx_config.h b/config/arm-neon/vpx_config.h index 8d02c250..d9d5f1ca 100644 --- a/config/arm-neon/vpx_config.h +++ b/config/arm-neon/vpx_config.h @@ -37,7 +37,7 @@ #define HAVE_UNISTD_H 1 #define CONFIG_DEPENDENCY_TRACKING 1 #define CONFIG_EXTERNAL_BUILD 1 -#define CONFIG_INSTALL_DOCS 1 +#define CONFIG_INSTALL_DOCS 0 #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 @@ -95,4 +95,5 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 +#define CONFIG_MISC_FIXES 0 #endif /* VPX_CONFIG_H */ diff --git a/config/arm-neon/vpx_dsp_rtcd.h b/config/arm-neon/vpx_dsp_rtcd.h index 4de075d7..ccb5df42 100644 --- a/config/arm-neon/vpx_dsp_rtcd.h +++ b/config/arm-neon/vpx_dsp_rtcd.h @@ -103,6 +103,18 @@ void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c +void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c + +void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c + +void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c + +void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c + void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_neon @@ -118,6 +130,18 @@ void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_neon +void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c + +void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c + +void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c + void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c @@ -130,6 +154,21 @@ void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c +void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c + +void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c + +void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c + +void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c + void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_neon @@ -254,6 +293,9 @@ void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_h_predictor_8x8 vpx_h_predictor_8x8_neon +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c + void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); #define vpx_idct16x16_10_add vpx_idct16x16_10_add_neon @@ -743,6 +785,9 @@ uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int sour uint32_t vpx_variance_halfpixvar16x16_v_media(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); #define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_media +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c + void vpx_dsp_rtcd(void); #include "vpx_config.h" diff --git a/config/arm-neon/vpx_version.h b/config/arm-neon/vpx_version.h index bce03815..3b6ea1e9 100644 --- a/config/arm-neon/vpx_version.h +++ b/config/arm-neon/vpx_version.h @@ -1,7 +1,7 @@ #define VERSION_MAJOR 1 -#define VERSION_MINOR 4 +#define VERSION_MINOR 5 #define VERSION_PATCH 0 #define VERSION_EXTRA "" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.4.0" -#define VERSION_STRING " v1.4.0" +#define VERSION_STRING_NOSP "v1.5.0" +#define VERSION_STRING " v1.5.0" diff --git a/config/arm/libvpx_srcs.txt b/config/arm/libvpx_srcs.txt index 53c4fda0..46a3c605 100644 --- a/config/arm/libvpx_srcs.txt +++ b/config/arm/libvpx_srcs.txt @@ -14,7 +14,6 @@ vp8/common/arm/armv6/dequantize_v6.asm vp8/common/arm/armv6/filter_v6.asm vp8/common/arm/armv6/idct_blk_v6.c vp8/common/arm/armv6/idct_v6.asm -vp8/common/arm/armv6/intra4x4_predict_v6.asm vp8/common/arm/armv6/iwalsh_v6.asm vp8/common/arm/armv6/loopfilter_v6.asm vp8/common/arm/armv6/simpleloopfilter_v6.asm @@ -64,6 +63,7 @@ vp8/common/quant_common.h vp8/common/reconinter.c vp8/common/reconinter.h vp8/common/reconintra.c +vp8/common/reconintra.h vp8/common/reconintra4x4.c vp8/common/reconintra4x4.h vp8/common/rtcd.c @@ -272,6 +272,7 @@ vp9/encoder/vp9_treewriter.h vp9/vp9_common.mk vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c +vp9/vp9_dx_iface.h vp9/vp9_iface_common.h vp9/vp9cx.mk vp9/vp9dx.mk diff --git a/config/arm/vp8_rtcd.h b/config/arm/vp8_rtcd.h index 7c2cefdd..f7287a52 100644 --- a/config/arm/vp8_rtcd.h +++ b/config/arm/vp8_rtcd.h @@ -45,12 +45,6 @@ void vp8_bilinear_predict8x8_armv6(unsigned char *src, int src_pitch, int xofst, int vp8_block_error_c(short *coeff, short *dqcoeff); #define vp8_block_error vp8_block_error_c -void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_c - -void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_c - void vp8_clear_system_state_c(); #define vp8_clear_system_state vp8_clear_system_state_c @@ -101,10 +95,6 @@ void vp8_fast_quantize_b_c(struct block *, struct blockd *); int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_full_search_sad vp8_full_search_sad_c -void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); -void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); -#define vp8_intra4x4_predict vp8_intra4x4_predict_armv6 - void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); void vp8_loop_filter_bh_armv6(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); #define vp8_loop_filter_bh vp8_loop_filter_bh_armv6 diff --git a/config/arm/vpx_config.asm b/config/arm/vpx_config.asm index 2a69621b..992fdee9 100644 --- a/config/arm/vpx_config.asm +++ b/config/arm/vpx_config.asm @@ -28,7 +28,7 @@ .equ HAVE_UNISTD_H , 1 .equ CONFIG_DEPENDENCY_TRACKING , 1 .equ CONFIG_EXTERNAL_BUILD , 1 -.equ CONFIG_INSTALL_DOCS , 1 +.equ CONFIG_INSTALL_DOCS , 0 .equ CONFIG_INSTALL_BINS , 1 .equ CONFIG_INSTALL_LIBS , 1 .equ CONFIG_INSTALL_SRCS , 0 @@ -86,4 +86,5 @@ .equ CONFIG_SPATIAL_SVC , 0 .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_EMULATE_HARDWARE , 0 +.equ CONFIG_MISC_FIXES , 0 .section .note.GNU-stack,"",%progbits diff --git a/config/arm/vpx_config.h b/config/arm/vpx_config.h index 62b62859..d6d28094 100644 --- a/config/arm/vpx_config.h +++ b/config/arm/vpx_config.h @@ -37,7 +37,7 @@ #define HAVE_UNISTD_H 1 #define CONFIG_DEPENDENCY_TRACKING 1 #define CONFIG_EXTERNAL_BUILD 1 -#define CONFIG_INSTALL_DOCS 1 +#define CONFIG_INSTALL_DOCS 0 #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 @@ -95,4 +95,5 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 +#define CONFIG_MISC_FIXES 0 #endif /* VPX_CONFIG_H */ diff --git a/config/arm/vpx_dsp_rtcd.h b/config/arm/vpx_dsp_rtcd.h index bb570a02..ce2aeac1 100644 --- a/config/arm/vpx_dsp_rtcd.h +++ b/config/arm/vpx_dsp_rtcd.h @@ -94,6 +94,18 @@ void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c +void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c + +void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c + +void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c + +void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c + void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c @@ -106,6 +118,18 @@ void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c +void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c + +void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c + +void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c + void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c @@ -118,6 +142,21 @@ void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c +void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c + +void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c + +void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c + +void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c + void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c @@ -217,6 +256,9 @@ void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c + void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); #define vpx_idct16x16_10_add vpx_idct16x16_10_add_c @@ -652,6 +694,9 @@ uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int sour uint32_t vpx_variance_halfpixvar16x16_v_media(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); #define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_media +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c + void vpx_dsp_rtcd(void); #include "vpx_config.h" diff --git a/config/arm/vpx_version.h b/config/arm/vpx_version.h index bce03815..3b6ea1e9 100644 --- a/config/arm/vpx_version.h +++ b/config/arm/vpx_version.h @@ -1,7 +1,7 @@ #define VERSION_MAJOR 1 -#define VERSION_MINOR 4 +#define VERSION_MINOR 5 #define VERSION_PATCH 0 #define VERSION_EXTRA "" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.4.0" -#define VERSION_STRING " v1.4.0" +#define VERSION_STRING_NOSP "v1.5.0" +#define VERSION_STRING " v1.5.0" diff --git a/config/arm64/libvpx_srcs.txt b/config/arm64/libvpx_srcs.txt index 483ffbb5..97705603 100644 --- a/config/arm64/libvpx_srcs.txt +++ b/config/arm64/libvpx_srcs.txt @@ -19,7 +19,6 @@ vp8/common/arm/neon/iwalsh_neon.c vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c vp8/common/arm/neon/mbloopfilter_neon.c -vp8/common/arm/neon/reconintra_neon.c vp8/common/arm/neon/shortidct4x4llm_neon.c vp8/common/arm/neon/sixtappredict_neon.c vp8/common/arm/neon/vp8_loopfilter_neon.c @@ -63,6 +62,7 @@ vp8/common/quant_common.h vp8/common/reconinter.c vp8/common/reconinter.h vp8/common/reconintra.c +vp8/common/reconintra.h vp8/common/reconintra4x4.c vp8/common/reconintra4x4.h vp8/common/rtcd.c @@ -279,6 +279,7 @@ vp9/encoder/vp9_treewriter.h vp9/vp9_common.mk vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c +vp9/vp9_dx_iface.h vp9/vp9_iface_common.h vp9/vp9cx.mk vp9/vp9dx.mk diff --git a/config/arm64/vp8_rtcd.h b/config/arm64/vp8_rtcd.h index 1f376294..5ab06f46 100644 --- a/config/arm64/vp8_rtcd.h +++ b/config/arm64/vp8_rtcd.h @@ -44,14 +44,6 @@ void vp8_bilinear_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int vp8_block_error_c(short *coeff, short *dqcoeff); #define vp8_block_error vp8_block_error_c -void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -void vp8_build_intra_predictors_mbuv_s_neon(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_neon - -void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -void vp8_build_intra_predictors_mby_s_neon(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_neon - void vp8_clear_system_state_c(); #define vp8_clear_system_state vp8_clear_system_state_c @@ -105,9 +97,6 @@ void vp8_fast_quantize_b_neon(struct block *, struct blockd *); int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_full_search_sad vp8_full_search_sad_c -void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); -#define vp8_intra4x4_predict vp8_intra4x4_predict_c - void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); void vp8_loop_filter_bh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); #define vp8_loop_filter_bh vp8_loop_filter_bh_neon diff --git a/config/arm64/vpx_config.asm b/config/arm64/vpx_config.asm index b6c1a52e..d7d6652e 100644 --- a/config/arm64/vpx_config.asm +++ b/config/arm64/vpx_config.asm @@ -28,7 +28,7 @@ .equ HAVE_UNISTD_H , 1 .equ CONFIG_DEPENDENCY_TRACKING , 1 .equ CONFIG_EXTERNAL_BUILD , 1 -.equ CONFIG_INSTALL_DOCS , 1 +.equ CONFIG_INSTALL_DOCS , 0 .equ CONFIG_INSTALL_BINS , 1 .equ CONFIG_INSTALL_LIBS , 1 .equ CONFIG_INSTALL_SRCS , 0 @@ -86,4 +86,5 @@ .equ CONFIG_SPATIAL_SVC , 0 .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_EMULATE_HARDWARE , 0 +.equ CONFIG_MISC_FIXES , 0 .section .note.GNU-stack,"",%progbits diff --git a/config/arm64/vpx_config.h b/config/arm64/vpx_config.h index fb0eabc0..981aa3e9 100644 --- a/config/arm64/vpx_config.h +++ b/config/arm64/vpx_config.h @@ -37,7 +37,7 @@ #define HAVE_UNISTD_H 1 #define CONFIG_DEPENDENCY_TRACKING 1 #define CONFIG_EXTERNAL_BUILD 1 -#define CONFIG_INSTALL_DOCS 1 +#define CONFIG_INSTALL_DOCS 0 #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 @@ -95,4 +95,5 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 +#define CONFIG_MISC_FIXES 0 #endif /* VPX_CONFIG_H */ diff --git a/config/arm64/vpx_dsp_rtcd.h b/config/arm64/vpx_dsp_rtcd.h index 2cac9e66..e5fa148c 100644 --- a/config/arm64/vpx_dsp_rtcd.h +++ b/config/arm64/vpx_dsp_rtcd.h @@ -103,6 +103,18 @@ void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c +void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c + +void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c + +void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c + +void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c + void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_neon @@ -118,6 +130,18 @@ void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_neon +void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c + +void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c + +void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c + void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c @@ -130,6 +154,21 @@ void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c +void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c + +void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c + +void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c + +void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c + void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_neon @@ -254,6 +293,9 @@ void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_h_predictor_8x8 vpx_h_predictor_8x8_neon +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c + void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); #define vpx_idct16x16_10_add vpx_idct16x16_10_add_neon @@ -728,6 +770,9 @@ uint32_t vpx_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, int sou uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); #define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_c +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c + void vpx_dsp_rtcd(void); #include "vpx_config.h" diff --git a/config/arm64/vpx_version.h b/config/arm64/vpx_version.h index bce03815..3b6ea1e9 100644 --- a/config/arm64/vpx_version.h +++ b/config/arm64/vpx_version.h @@ -1,7 +1,7 @@ #define VERSION_MAJOR 1 -#define VERSION_MINOR 4 +#define VERSION_MINOR 5 #define VERSION_PATCH 0 #define VERSION_EXTRA "" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.4.0" -#define VERSION_STRING " v1.4.0" +#define VERSION_STRING_NOSP "v1.5.0" +#define VERSION_STRING " v1.5.0" diff --git a/config/generic/libvpx_srcs.txt b/config/generic/libvpx_srcs.txt index f6e76f01..212026b4 100644 --- a/config/generic/libvpx_srcs.txt +++ b/config/generic/libvpx_srcs.txt @@ -44,6 +44,7 @@ vp8/common/quant_common.h vp8/common/reconinter.c vp8/common/reconinter.h vp8/common/reconintra.c +vp8/common/reconintra.h vp8/common/reconintra4x4.c vp8/common/reconintra4x4.h vp8/common/rtcd.c @@ -248,6 +249,7 @@ vp9/encoder/vp9_treewriter.h vp9/vp9_common.mk vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c +vp9/vp9_dx_iface.h vp9/vp9_iface_common.h vp9/vp9cx.mk vp9/vp9dx.mk diff --git a/config/generic/vp8_rtcd.h b/config/generic/vp8_rtcd.h index f5424bbc..bad72a3e 100644 --- a/config/generic/vp8_rtcd.h +++ b/config/generic/vp8_rtcd.h @@ -41,12 +41,6 @@ void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int int vp8_block_error_c(short *coeff, short *dqcoeff); #define vp8_block_error vp8_block_error_c -void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_c - -void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_c - void vp8_clear_system_state_c(); #define vp8_clear_system_state vp8_clear_system_state_c @@ -89,9 +83,6 @@ void vp8_fast_quantize_b_c(struct block *, struct blockd *); int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_full_search_sad vp8_full_search_sad_c -void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); -#define vp8_intra4x4_predict vp8_intra4x4_predict_c - void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); #define vp8_loop_filter_bh vp8_loop_filter_bh_c diff --git a/config/generic/vpx_config.asm b/config/generic/vpx_config.asm index b684cd24..c3530a2b 100644 --- a/config/generic/vpx_config.asm +++ b/config/generic/vpx_config.asm @@ -28,7 +28,7 @@ .equ HAVE_UNISTD_H , 1 .equ CONFIG_DEPENDENCY_TRACKING , 1 .equ CONFIG_EXTERNAL_BUILD , 1 -.equ CONFIG_INSTALL_DOCS , 1 +.equ CONFIG_INSTALL_DOCS , 0 .equ CONFIG_INSTALL_BINS , 1 .equ CONFIG_INSTALL_LIBS , 1 .equ CONFIG_INSTALL_SRCS , 0 @@ -86,4 +86,5 @@ .equ CONFIG_SPATIAL_SVC , 0 .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_EMULATE_HARDWARE , 0 +.equ CONFIG_MISC_FIXES , 0 .section .note.GNU-stack,"",%progbits diff --git a/config/generic/vpx_config.h b/config/generic/vpx_config.h index 9cdca1fd..50da7042 100644 --- a/config/generic/vpx_config.h +++ b/config/generic/vpx_config.h @@ -37,7 +37,7 @@ #define HAVE_UNISTD_H 1 #define CONFIG_DEPENDENCY_TRACKING 1 #define CONFIG_EXTERNAL_BUILD 1 -#define CONFIG_INSTALL_DOCS 1 +#define CONFIG_INSTALL_DOCS 0 #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 @@ -95,4 +95,5 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 +#define CONFIG_MISC_FIXES 0 #endif /* VPX_CONFIG_H */ diff --git a/config/generic/vpx_dsp_rtcd.h b/config/generic/vpx_dsp_rtcd.h index 010cbe78..f4929eec 100644 --- a/config/generic/vpx_dsp_rtcd.h +++ b/config/generic/vpx_dsp_rtcd.h @@ -94,6 +94,18 @@ void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c +void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c + +void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c + +void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c + +void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c + void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c @@ -106,6 +118,18 @@ void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c +void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c + +void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c + +void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c + void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c @@ -118,6 +142,21 @@ void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c +void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c + +void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c + +void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c + +void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c + void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c @@ -217,6 +256,9 @@ void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c + void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); #define vpx_idct16x16_10_add vpx_idct16x16_10_add_c @@ -643,6 +685,9 @@ uint32_t vpx_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, int sou uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); #define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_c +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c + void vpx_dsp_rtcd(void); #include "vpx_config.h" diff --git a/config/generic/vpx_version.h b/config/generic/vpx_version.h index bce03815..3b6ea1e9 100644 --- a/config/generic/vpx_version.h +++ b/config/generic/vpx_version.h @@ -1,7 +1,7 @@ #define VERSION_MAJOR 1 -#define VERSION_MINOR 4 +#define VERSION_MINOR 5 #define VERSION_PATCH 0 #define VERSION_EXTRA "" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.4.0" -#define VERSION_STRING " v1.4.0" +#define VERSION_STRING_NOSP "v1.5.0" +#define VERSION_STRING " v1.5.0" diff --git a/config/mips32-dspr2/libvpx_srcs.txt b/config/mips32-dspr2/libvpx_srcs.txt index 9ea5edef..452c0a38 100644 --- a/config/mips32-dspr2/libvpx_srcs.txt +++ b/config/mips32-dspr2/libvpx_srcs.txt @@ -50,6 +50,7 @@ vp8/common/quant_common.h vp8/common/reconinter.c vp8/common/reconinter.h vp8/common/reconintra.c +vp8/common/reconintra.h vp8/common/reconintra4x4.c vp8/common/reconintra4x4.h vp8/common/rtcd.c @@ -257,6 +258,7 @@ vp9/encoder/vp9_treewriter.h vp9/vp9_common.mk vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c +vp9/vp9_dx_iface.h vp9/vp9_iface_common.h vp9/vp9cx.mk vp9/vp9dx.mk diff --git a/config/mips32-dspr2/vp8_rtcd.h b/config/mips32-dspr2/vp8_rtcd.h index 4442f6ae..03d3f0c8 100644 --- a/config/mips32-dspr2/vp8_rtcd.h +++ b/config/mips32-dspr2/vp8_rtcd.h @@ -41,12 +41,6 @@ void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int int vp8_block_error_c(short *coeff, short *dqcoeff); #define vp8_block_error vp8_block_error_c -void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_c - -void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_c - void vp8_clear_system_state_c(); #define vp8_clear_system_state vp8_clear_system_state_c @@ -96,9 +90,6 @@ void vp8_fast_quantize_b_c(struct block *, struct blockd *); int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_full_search_sad vp8_full_search_sad_c -void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); -#define vp8_intra4x4_predict vp8_intra4x4_predict_c - void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); void vp8_loop_filter_bh_dspr2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); #define vp8_loop_filter_bh vp8_loop_filter_bh_dspr2 diff --git a/config/mips32-dspr2/vpx_config.h b/config/mips32-dspr2/vpx_config.h index f0a0556e..4e8961cf 100644 --- a/config/mips32-dspr2/vpx_config.h +++ b/config/mips32-dspr2/vpx_config.h @@ -37,7 +37,7 @@ #define HAVE_UNISTD_H 1 #define CONFIG_DEPENDENCY_TRACKING 1 #define CONFIG_EXTERNAL_BUILD 1 -#define CONFIG_INSTALL_DOCS 1 +#define CONFIG_INSTALL_DOCS 0 #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 @@ -95,4 +95,5 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 +#define CONFIG_MISC_FIXES 0 #endif /* VPX_CONFIG_H */ diff --git a/config/mips32-dspr2/vpx_dsp_rtcd.h b/config/mips32-dspr2/vpx_dsp_rtcd.h index b716181f..7acb8072 100644 --- a/config/mips32-dspr2/vpx_dsp_rtcd.h +++ b/config/mips32-dspr2/vpx_dsp_rtcd.h @@ -102,6 +102,18 @@ void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c +void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c + +void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c + +void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c + +void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c + void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c @@ -114,6 +126,18 @@ void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c +void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c + +void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c + +void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c + void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c @@ -126,6 +150,21 @@ void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c +void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c + +void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c + +void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c + +void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c + void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c @@ -231,6 +270,9 @@ void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vpx_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_h_predictor_8x8 vpx_h_predictor_8x8_dspr2 +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c + void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vpx_idct16x16_10_add_dspr2(const tran_low_t *input, uint8_t *dest, int dest_stride); #define vpx_idct16x16_10_add vpx_idct16x16_10_add_dspr2 @@ -681,6 +723,9 @@ uint32_t vpx_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, int sou uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); #define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_c +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c + void vpx_dsp_rtcd(void); #include "vpx_config.h" diff --git a/config/mips32-dspr2/vpx_version.h b/config/mips32-dspr2/vpx_version.h index bce03815..3b6ea1e9 100644 --- a/config/mips32-dspr2/vpx_version.h +++ b/config/mips32-dspr2/vpx_version.h @@ -1,7 +1,7 @@ #define VERSION_MAJOR 1 -#define VERSION_MINOR 4 +#define VERSION_MINOR 5 #define VERSION_PATCH 0 #define VERSION_EXTRA "" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.4.0" -#define VERSION_STRING " v1.4.0" +#define VERSION_STRING_NOSP "v1.5.0" +#define VERSION_STRING " v1.5.0" diff --git a/config/mips32/libvpx_srcs.txt b/config/mips32/libvpx_srcs.txt index f6e76f01..212026b4 100644 --- a/config/mips32/libvpx_srcs.txt +++ b/config/mips32/libvpx_srcs.txt @@ -44,6 +44,7 @@ vp8/common/quant_common.h vp8/common/reconinter.c vp8/common/reconinter.h vp8/common/reconintra.c +vp8/common/reconintra.h vp8/common/reconintra4x4.c vp8/common/reconintra4x4.h vp8/common/rtcd.c @@ -248,6 +249,7 @@ vp9/encoder/vp9_treewriter.h vp9/vp9_common.mk vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c +vp9/vp9_dx_iface.h vp9/vp9_iface_common.h vp9/vp9cx.mk vp9/vp9dx.mk diff --git a/config/mips32/vp8_rtcd.h b/config/mips32/vp8_rtcd.h index 28e23b31..791c1552 100644 --- a/config/mips32/vp8_rtcd.h +++ b/config/mips32/vp8_rtcd.h @@ -41,12 +41,6 @@ void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int int vp8_block_error_c(short *coeff, short *dqcoeff); #define vp8_block_error vp8_block_error_c -void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_c - -void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_c - void vp8_clear_system_state_c(); #define vp8_clear_system_state vp8_clear_system_state_c @@ -89,9 +83,6 @@ void vp8_fast_quantize_b_c(struct block *, struct blockd *); int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_full_search_sad vp8_full_search_sad_c -void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); -#define vp8_intra4x4_predict vp8_intra4x4_predict_c - void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); #define vp8_loop_filter_bh vp8_loop_filter_bh_c diff --git a/config/mips32/vpx_config.h b/config/mips32/vpx_config.h index 1bc7afa7..82c9cf52 100644 --- a/config/mips32/vpx_config.h +++ b/config/mips32/vpx_config.h @@ -37,7 +37,7 @@ #define HAVE_UNISTD_H 1 #define CONFIG_DEPENDENCY_TRACKING 1 #define CONFIG_EXTERNAL_BUILD 1 -#define CONFIG_INSTALL_DOCS 1 +#define CONFIG_INSTALL_DOCS 0 #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 @@ -95,4 +95,5 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 +#define CONFIG_MISC_FIXES 0 #endif /* VPX_CONFIG_H */ diff --git a/config/mips32/vpx_dsp_rtcd.h b/config/mips32/vpx_dsp_rtcd.h index cff36af5..2d2bec21 100644 --- a/config/mips32/vpx_dsp_rtcd.h +++ b/config/mips32/vpx_dsp_rtcd.h @@ -94,6 +94,18 @@ void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c +void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c + +void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c + +void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c + +void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c + void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c @@ -106,6 +118,18 @@ void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c +void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c + +void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c + +void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c + void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c @@ -118,6 +142,21 @@ void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c +void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c + +void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c + +void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c + +void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c + void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c @@ -217,6 +256,9 @@ void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c + void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); #define vpx_idct16x16_10_add vpx_idct16x16_10_add_c @@ -643,6 +685,9 @@ uint32_t vpx_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, int sou uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); #define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_c +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c + void vpx_dsp_rtcd(void); #include "vpx_config.h" diff --git a/config/mips32/vpx_version.h b/config/mips32/vpx_version.h index bce03815..3b6ea1e9 100644 --- a/config/mips32/vpx_version.h +++ b/config/mips32/vpx_version.h @@ -1,7 +1,7 @@ #define VERSION_MAJOR 1 -#define VERSION_MINOR 4 +#define VERSION_MINOR 5 #define VERSION_PATCH 0 #define VERSION_EXTRA "" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.4.0" -#define VERSION_STRING " v1.4.0" +#define VERSION_STRING_NOSP "v1.5.0" +#define VERSION_STRING " v1.5.0" diff --git a/config/mips64/libvpx_srcs.txt b/config/mips64/libvpx_srcs.txt index f6e76f01..212026b4 100644 --- a/config/mips64/libvpx_srcs.txt +++ b/config/mips64/libvpx_srcs.txt @@ -44,6 +44,7 @@ vp8/common/quant_common.h vp8/common/reconinter.c vp8/common/reconinter.h vp8/common/reconintra.c +vp8/common/reconintra.h vp8/common/reconintra4x4.c vp8/common/reconintra4x4.h vp8/common/rtcd.c @@ -248,6 +249,7 @@ vp9/encoder/vp9_treewriter.h vp9/vp9_common.mk vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c +vp9/vp9_dx_iface.h vp9/vp9_iface_common.h vp9/vp9cx.mk vp9/vp9dx.mk diff --git a/config/mips64/vp8_rtcd.h b/config/mips64/vp8_rtcd.h index 28e23b31..791c1552 100644 --- a/config/mips64/vp8_rtcd.h +++ b/config/mips64/vp8_rtcd.h @@ -41,12 +41,6 @@ void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int int vp8_block_error_c(short *coeff, short *dqcoeff); #define vp8_block_error vp8_block_error_c -void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_c - -void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_c - void vp8_clear_system_state_c(); #define vp8_clear_system_state vp8_clear_system_state_c @@ -89,9 +83,6 @@ void vp8_fast_quantize_b_c(struct block *, struct blockd *); int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_full_search_sad vp8_full_search_sad_c -void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); -#define vp8_intra4x4_predict vp8_intra4x4_predict_c - void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); #define vp8_loop_filter_bh vp8_loop_filter_bh_c diff --git a/config/mips64/vpx_config.h b/config/mips64/vpx_config.h index f19731bc..b6cc04bc 100644 --- a/config/mips64/vpx_config.h +++ b/config/mips64/vpx_config.h @@ -37,7 +37,7 @@ #define HAVE_UNISTD_H 1 #define CONFIG_DEPENDENCY_TRACKING 1 #define CONFIG_EXTERNAL_BUILD 1 -#define CONFIG_INSTALL_DOCS 1 +#define CONFIG_INSTALL_DOCS 0 #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 @@ -95,4 +95,5 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 +#define CONFIG_MISC_FIXES 0 #endif /* VPX_CONFIG_H */ diff --git a/config/mips64/vpx_dsp_rtcd.h b/config/mips64/vpx_dsp_rtcd.h index cff36af5..2d2bec21 100644 --- a/config/mips64/vpx_dsp_rtcd.h +++ b/config/mips64/vpx_dsp_rtcd.h @@ -94,6 +94,18 @@ void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c +void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c + +void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c + +void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c + +void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c + void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c @@ -106,6 +118,18 @@ void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c +void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c + +void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c + +void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c + void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c @@ -118,6 +142,21 @@ void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c +void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c + +void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c + +void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c + +void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c + void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c @@ -217,6 +256,9 @@ void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c + void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); #define vpx_idct16x16_10_add vpx_idct16x16_10_add_c @@ -643,6 +685,9 @@ uint32_t vpx_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, int sou uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); #define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_c +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c + void vpx_dsp_rtcd(void); #include "vpx_config.h" diff --git a/config/mips64/vpx_version.h b/config/mips64/vpx_version.h index bce03815..3b6ea1e9 100644 --- a/config/mips64/vpx_version.h +++ b/config/mips64/vpx_version.h @@ -1,7 +1,7 @@ #define VERSION_MAJOR 1 -#define VERSION_MINOR 4 +#define VERSION_MINOR 5 #define VERSION_PATCH 0 #define VERSION_EXTRA "" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.4.0" -#define VERSION_STRING " v1.4.0" +#define VERSION_STRING_NOSP "v1.5.0" +#define VERSION_STRING " v1.5.0" diff --git a/config/x86/libvpx_srcs.txt b/config/x86/libvpx_srcs.txt index af63fd8b..88150165 100644 --- a/config/x86/libvpx_srcs.txt +++ b/config/x86/libvpx_srcs.txt @@ -47,6 +47,7 @@ vp8/common/quant_common.h vp8/common/reconinter.c vp8/common/reconinter.h vp8/common/reconintra.c +vp8/common/reconintra.h vp8/common/reconintra4x4.c vp8/common/reconintra4x4.h vp8/common/rtcd.c @@ -79,7 +80,6 @@ vp8/common/x86/postproc_mmx.asm vp8/common/x86/postproc_sse2.asm vp8/common/x86/recon_mmx.asm vp8/common/x86/recon_sse2.asm -vp8/common/x86/recon_wrapper_sse2.c vp8/common/x86/subpixel_mmx.asm vp8/common/x86/subpixel_sse2.asm vp8/common/x86/subpixel_ssse3.asm @@ -293,6 +293,7 @@ vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm vp9/vp9_common.mk vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c +vp9/vp9_dx_iface.h vp9/vp9_iface_common.h vp9/vp9cx.mk vp9/vp9dx.mk @@ -349,6 +350,8 @@ vpx_dsp/x86/fwd_dct32x32_impl_sse2.h vpx_dsp/x86/fwd_txfm_impl_sse2.h vpx_dsp/x86/fwd_txfm_sse2.c vpx_dsp/x86/fwd_txfm_sse2.h +vpx_dsp/x86/halfpix_variance_impl_sse2.asm +vpx_dsp/x86/halfpix_variance_sse2.c vpx_dsp/x86/intrapred_sse2.asm vpx_dsp/x86/intrapred_ssse3.asm vpx_dsp/x86/inv_txfm_sse2.c diff --git a/config/x86/vp8_rtcd.h b/config/x86/vp8_rtcd.h index fc714f41..c4c70452 100644 --- a/config/x86/vp8_rtcd.h +++ b/config/x86/vp8_rtcd.h @@ -60,16 +60,6 @@ int vp8_block_error_mmx(short *coeff, short *dqcoeff); int vp8_block_error_xmm(short *coeff, short *dqcoeff); #define vp8_block_error vp8_block_error_xmm -void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -void vp8_build_intra_predictors_mbuv_s_sse2(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -void vp8_build_intra_predictors_mbuv_s_ssse3(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_ssse3 - -void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -void vp8_build_intra_predictors_mby_s_sse2(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -void vp8_build_intra_predictors_mby_s_ssse3(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_ssse3 - void vp8_clear_system_state_c(); void vpx_reset_mmx_state(); #define vp8_clear_system_state vpx_reset_mmx_state @@ -146,9 +136,6 @@ int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd * int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_full_search_sad vp8_full_search_sadx3 -void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); -#define vp8_intra4x4_predict vp8_intra4x4_predict_c - void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); void vp8_loop_filter_bh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); void vp8_loop_filter_bh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); diff --git a/config/x86/vpx_config.asm b/config/x86/vpx_config.asm index 2b7f1ccf..b6557bb8 100644 --- a/config/x86/vpx_config.asm +++ b/config/x86/vpx_config.asm @@ -25,7 +25,7 @@ %define HAVE_UNISTD_H 1 %define CONFIG_DEPENDENCY_TRACKING 1 %define CONFIG_EXTERNAL_BUILD 1 -%define CONFIG_INSTALL_DOCS 1 +%define CONFIG_INSTALL_DOCS 0 %define CONFIG_INSTALL_BINS 1 %define CONFIG_INSTALL_LIBS 1 %define CONFIG_INSTALL_SRCS 0 @@ -83,3 +83,4 @@ %define CONFIG_SPATIAL_SVC 0 %define CONFIG_FP_MB_STATS 0 %define CONFIG_EMULATE_HARDWARE 0 +%define CONFIG_MISC_FIXES 0 diff --git a/config/x86/vpx_config.h b/config/x86/vpx_config.h index 634c67ba..a5168579 100644 --- a/config/x86/vpx_config.h +++ b/config/x86/vpx_config.h @@ -37,7 +37,7 @@ #define HAVE_UNISTD_H 1 #define CONFIG_DEPENDENCY_TRACKING 1 #define CONFIG_EXTERNAL_BUILD 1 -#define CONFIG_INSTALL_DOCS 1 +#define CONFIG_INSTALL_DOCS 0 #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 @@ -95,4 +95,5 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 +#define CONFIG_MISC_FIXES 0 #endif /* VPX_CONFIG_H */ diff --git a/config/x86/vpx_dsp_rtcd.h b/config/x86/vpx_dsp_rtcd.h index 64ee53fa..af7917a2 100644 --- a/config/x86/vpx_dsp_rtcd.h +++ b/config/x86/vpx_dsp_rtcd.h @@ -116,6 +116,18 @@ void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a void vpx_d207_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_ssse3 +void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c + +void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c + +void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c + +void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c + void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vpx_d45_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_ssse3 @@ -132,6 +144,18 @@ void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d45_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_ssse3 +void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c + +void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c + +void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c + void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vpx_d63_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_ssse3 @@ -148,6 +172,21 @@ void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_ssse3 +void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c + +void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c + +void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c + +void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c + void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vpx_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_sse2 @@ -281,6 +320,9 @@ void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vpx_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_h_predictor_8x8 vpx_h_predictor_8x8_ssse3 +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c + void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); #define vpx_idct16x16_10_add vpx_idct16x16_10_add_sse2 @@ -864,15 +906,21 @@ unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, con uint32_t vpx_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); uint32_t vpx_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); -#define vpx_variance_halfpixvar16x16_h vpx_variance_halfpixvar16x16_h_mmx +uint32_t vpx_variance_halfpixvar16x16_h_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); +#define vpx_variance_halfpixvar16x16_h vpx_variance_halfpixvar16x16_h_sse2 uint32_t vpx_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); uint32_t vpx_variance_halfpixvar16x16_hv_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); -#define vpx_variance_halfpixvar16x16_hv vpx_variance_halfpixvar16x16_hv_mmx +uint32_t vpx_variance_halfpixvar16x16_hv_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); +#define vpx_variance_halfpixvar16x16_hv vpx_variance_halfpixvar16x16_hv_sse2 uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); uint32_t vpx_variance_halfpixvar16x16_v_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); -#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_mmx +uint32_t vpx_variance_halfpixvar16x16_v_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); +#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_sse2 + +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c void vpx_dsp_rtcd(void); diff --git a/config/x86/vpx_version.h b/config/x86/vpx_version.h index bce03815..3b6ea1e9 100644 --- a/config/x86/vpx_version.h +++ b/config/x86/vpx_version.h @@ -1,7 +1,7 @@ #define VERSION_MAJOR 1 -#define VERSION_MINOR 4 +#define VERSION_MINOR 5 #define VERSION_PATCH 0 #define VERSION_EXTRA "" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.4.0" -#define VERSION_STRING " v1.4.0" +#define VERSION_STRING_NOSP "v1.5.0" +#define VERSION_STRING " v1.5.0" diff --git a/config/x86_64/libvpx_srcs.txt b/config/x86_64/libvpx_srcs.txt index ac3de522..3794e3ba 100644 --- a/config/x86_64/libvpx_srcs.txt +++ b/config/x86_64/libvpx_srcs.txt @@ -47,6 +47,7 @@ vp8/common/quant_common.h vp8/common/reconinter.c vp8/common/reconinter.h vp8/common/reconintra.c +vp8/common/reconintra.h vp8/common/reconintra4x4.c vp8/common/reconintra4x4.h vp8/common/rtcd.c @@ -80,7 +81,6 @@ vp8/common/x86/postproc_mmx.asm vp8/common/x86/postproc_sse2.asm vp8/common/x86/recon_mmx.asm vp8/common/x86/recon_sse2.asm -vp8/common/x86/recon_wrapper_sse2.c vp8/common/x86/subpixel_mmx.asm vp8/common/x86/subpixel_sse2.asm vp8/common/x86/subpixel_ssse3.asm @@ -296,6 +296,7 @@ vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm vp9/vp9_common.mk vp9/vp9_cx_iface.c vp9/vp9_dx_iface.c +vp9/vp9_dx_iface.h vp9/vp9_iface_common.h vp9/vp9cx.mk vp9/vp9dx.mk @@ -353,6 +354,8 @@ vpx_dsp/x86/fwd_txfm_impl_sse2.h vpx_dsp/x86/fwd_txfm_sse2.c vpx_dsp/x86/fwd_txfm_sse2.h vpx_dsp/x86/fwd_txfm_ssse3_x86_64.asm +vpx_dsp/x86/halfpix_variance_impl_sse2.asm +vpx_dsp/x86/halfpix_variance_sse2.c vpx_dsp/x86/intrapred_sse2.asm vpx_dsp/x86/intrapred_ssse3.asm vpx_dsp/x86/inv_txfm_sse2.c diff --git a/config/x86_64/vp8_rtcd.h b/config/x86_64/vp8_rtcd.h index fc714f41..c4c70452 100644 --- a/config/x86_64/vp8_rtcd.h +++ b/config/x86_64/vp8_rtcd.h @@ -60,16 +60,6 @@ int vp8_block_error_mmx(short *coeff, short *dqcoeff); int vp8_block_error_xmm(short *coeff, short *dqcoeff); #define vp8_block_error vp8_block_error_xmm -void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -void vp8_build_intra_predictors_mbuv_s_sse2(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -void vp8_build_intra_predictors_mbuv_s_ssse3(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride); -#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_ssse3 - -void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -void vp8_build_intra_predictors_mby_s_sse2(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -void vp8_build_intra_predictors_mby_s_ssse3(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride); -#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_ssse3 - void vp8_clear_system_state_c(); void vpx_reset_mmx_state(); #define vp8_clear_system_state vpx_reset_mmx_state @@ -146,9 +136,6 @@ int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd * int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv); #define vp8_full_search_sad vp8_full_search_sadx3 -void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); -#define vp8_intra4x4_predict vp8_intra4x4_predict_c - void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); void vp8_loop_filter_bh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); void vp8_loop_filter_bh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); diff --git a/config/x86_64/vpx_config.asm b/config/x86_64/vpx_config.asm index 6f0800b4..774d73fb 100644 --- a/config/x86_64/vpx_config.asm +++ b/config/x86_64/vpx_config.asm @@ -25,7 +25,7 @@ %define HAVE_UNISTD_H 1 %define CONFIG_DEPENDENCY_TRACKING 1 %define CONFIG_EXTERNAL_BUILD 1 -%define CONFIG_INSTALL_DOCS 1 +%define CONFIG_INSTALL_DOCS 0 %define CONFIG_INSTALL_BINS 1 %define CONFIG_INSTALL_LIBS 1 %define CONFIG_INSTALL_SRCS 0 @@ -83,3 +83,4 @@ %define CONFIG_SPATIAL_SVC 0 %define CONFIG_FP_MB_STATS 0 %define CONFIG_EMULATE_HARDWARE 0 +%define CONFIG_MISC_FIXES 0 diff --git a/config/x86_64/vpx_config.h b/config/x86_64/vpx_config.h index 8796347b..9278f1e8 100644 --- a/config/x86_64/vpx_config.h +++ b/config/x86_64/vpx_config.h @@ -37,7 +37,7 @@ #define HAVE_UNISTD_H 1 #define CONFIG_DEPENDENCY_TRACKING 1 #define CONFIG_EXTERNAL_BUILD 1 -#define CONFIG_INSTALL_DOCS 1 +#define CONFIG_INSTALL_DOCS 0 #define CONFIG_INSTALL_BINS 1 #define CONFIG_INSTALL_LIBS 1 #define CONFIG_INSTALL_SRCS 0 @@ -95,4 +95,5 @@ #define CONFIG_SPATIAL_SVC 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 +#define CONFIG_MISC_FIXES 0 #endif /* VPX_CONFIG_H */ diff --git a/config/x86_64/vpx_dsp_rtcd.h b/config/x86_64/vpx_dsp_rtcd.h index e78d8ef6..73962338 100644 --- a/config/x86_64/vpx_dsp_rtcd.h +++ b/config/x86_64/vpx_dsp_rtcd.h @@ -116,6 +116,18 @@ void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a void vpx_d207_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_ssse3 +void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c + +void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c + +void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c + +void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c + void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vpx_d45_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_ssse3 @@ -132,6 +144,18 @@ void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d45_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_ssse3 +void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c + +void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c + +void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c + void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vpx_d63_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_ssse3 @@ -148,6 +172,21 @@ void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab void vpx_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_ssse3 +void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c + +void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c + +void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c + +void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c + void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void vpx_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_sse2 @@ -282,6 +321,9 @@ void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov void vpx_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define vpx_h_predictor_8x8 vpx_h_predictor_8x8_ssse3 +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c + void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); #define vpx_idct16x16_10_add vpx_idct16x16_10_add_sse2 @@ -870,15 +912,21 @@ unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, con uint32_t vpx_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); uint32_t vpx_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); -#define vpx_variance_halfpixvar16x16_h vpx_variance_halfpixvar16x16_h_mmx +uint32_t vpx_variance_halfpixvar16x16_h_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); +#define vpx_variance_halfpixvar16x16_h vpx_variance_halfpixvar16x16_h_sse2 uint32_t vpx_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); uint32_t vpx_variance_halfpixvar16x16_hv_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); -#define vpx_variance_halfpixvar16x16_hv vpx_variance_halfpixvar16x16_hv_mmx +uint32_t vpx_variance_halfpixvar16x16_hv_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); +#define vpx_variance_halfpixvar16x16_hv vpx_variance_halfpixvar16x16_hv_sse2 uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); uint32_t vpx_variance_halfpixvar16x16_v_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); -#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_mmx +uint32_t vpx_variance_halfpixvar16x16_v_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); +#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_sse2 + +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c void vpx_dsp_rtcd(void); diff --git a/config/x86_64/vpx_version.h b/config/x86_64/vpx_version.h index bce03815..3b6ea1e9 100644 --- a/config/x86_64/vpx_version.h +++ b/config/x86_64/vpx_version.h @@ -1,7 +1,7 @@ #define VERSION_MAJOR 1 -#define VERSION_MINOR 4 +#define VERSION_MINOR 5 #define VERSION_PATCH 0 #define VERSION_EXTRA "" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.4.0" -#define VERSION_STRING " v1.4.0" +#define VERSION_STRING_NOSP "v1.5.0" +#define VERSION_STRING " v1.5.0" diff --git a/libvpx/.mailmap b/libvpx/.mailmap index 0bfda120..42f3617b 100644 --- a/libvpx/.mailmap +++ b/libvpx/.mailmap @@ -1,14 +1,21 @@ Adrian Grange <agrange@google.com> -Alex Converse <aconverse@google.com> <alex.converse@gmail.com> +Adrian Grange <agrange@google.com> <agrange@agrange-macbookpro.roam.corp.google.com> +Aℓex Converse <aconverse@google.com> +Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com> Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com> Alpha Lam <hclam@google.com> <hclam@chromium.org> Deb Mukherjee <debargha@google.com> Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com> Guillaume Martres <gmartres@google.com> <smarter3@gmail.com> Hangyu Kuang <hkuang@google.com> +Hangyu Kuang <hkuang@google.com> <hkuang@hkuang-macbookpro.roam.corp.google.com> +Hui Su <huisu@google.com> +Jacky Chen <jackychen@google.com> Jim Bankoski <jimbankoski@google.com> Johann Koenig <johannkoenig@google.com> Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com> +Johann Koenig <johannkoenig@google.com> <johannkoenig@dhcp-172-19-7-52.mtv.corp.google.com> +Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com> John Koleszar <jkoleszar@google.com> Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org> Marco Paniconi <marpan@google.com> @@ -17,10 +24,13 @@ Pascal Massimino <pascal.massimino@gmail.com> Paul Wilkins <paulwilkins@google.com> Ralph Giles <giles@xiph.org> <giles@entropywave.com> Ralph Giles <giles@xiph.org> <giles@mozilla.com> +Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com> Sami Pietilä <samipietila@google.com> Tamar Levy <tamar.levy@intel.com> Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com> Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com> Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com> Tom Finegan <tomfinegan@google.com> +Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org> Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com> +Yaowu Xu <yaowu@google.com> <yaowu@YAOWU2-W.ad.corp.google.com> diff --git a/libvpx/AUTHORS b/libvpx/AUTHORS index 2f63d7c5..f89b6776 100644 --- a/libvpx/AUTHORS +++ b/libvpx/AUTHORS @@ -5,9 +5,9 @@ Aaron Watry <awatry@gmail.com> Abo Talib Mahfoodh <ab.mahfoodh@gmail.com> Adam Xu <adam@xuyaowu.com> Adrian Grange <agrange@google.com> +Aℓex Converse <aconverse@google.com> Ahmad Sharif <asharif@google.com> Alexander Voronov <avoronov@graphics.cs.msu.ru> -Alex Converse <aconverse@google.com> Alexis Ballier <aballier@gentoo.org> Alok Ahuja <waveletcoeff@gmail.com> Alpha Lam <hclam@google.com> @@ -16,8 +16,10 @@ Ami Fischman <fischman@chromium.org> Andoni Morales Alastruey <ylatuya@gmail.com> Andres Mejia <mcitadel@gmail.com> Andrew Russell <anrussell@google.com> +Angie Chiang <angiebird@google.com> Aron Rosenberg <arosenberg@logitech.com> Attila Nagy <attilanagy@google.com> +Brion Vibber <bvibber@wikimedia.org> changjun.yang <changjun.yang@intel.com> Charles 'Buck' Krasic <ckrasic@google.com> chm <chm@rock-chips.com> @@ -27,6 +29,7 @@ Deb Mukherjee <debargha@google.com> Dim Temp <dimtemp0@gmail.com> Dmitry Kovalev <dkovalev@google.com> Dragan Mrdjan <dmrdjan@mips.com> +Ed Baker <edward.baker@intel.com> Ehsan Akhgari <ehsan.akhgari@gmail.com> Erik Niemeyer <erik.a.niemeyer@intel.com> Fabio Pedretti <fabio.ped@libero.it> @@ -34,6 +37,8 @@ Frank Galligan <fgalligan@google.com> Fredrik Söderquist <fs@opera.com> Fritz Koenig <frkoenig@google.com> Gaute Strokkenes <gaute.strokkenes@broadcom.com> +Geza Lore <gezalore@gmail.com> +Ghislain MARY <ghislainmary2@gmail.com> Giuseppe Scrivano <gscrivano@gnu.org> Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com> Guillaume Martres <gmartres@google.com> @@ -44,7 +49,7 @@ Henrik Lundin <hlundin@google.com> Hui Su <huisu@google.com> Ivan Maltz <ivanmaltz@google.com> Jacek Caban <cjacek@gmail.com> -JackyChen <jackychen@google.com> +Jacky Chen <jackychen@google.com> James Berry <jamesberry@google.com> James Yu <james.yu@linaro.org> James Zern <jzern@google.com> @@ -60,9 +65,11 @@ Jingning Han <jingning@google.com> Joey Parrish <joeyparrish@google.com> Johann Koenig <johannkoenig@google.com> John Koleszar <jkoleszar@google.com> +Johnny Klonaris <google@jawknee.com> John Stark <jhnstrk@gmail.com> Joshua Bleecher Snyder <josh@treelinelabs.com> Joshua Litt <joshualitt@google.com> +Julia Robson <juliamrobson@gmail.com> Justin Clift <justin@salasaga.org> Justin Lebar <justin.lebar@gmail.com> KO Myung-Hun <komh@chollian.net> @@ -82,6 +89,7 @@ Mike Hommey <mhommey@mozilla.com> Mikhal Shemer <mikhal@google.com> Minghai Shang <minghai@google.com> Morton Jonuschat <yabawock@gmail.com> +Nico Weber <thakis@chromium.org> Parag Salasakar <img.mips1@gmail.com> Pascal Massimino <pascal.massimino@gmail.com> Patrik Westin <patrik.westin@gmail.com> @@ -96,7 +104,7 @@ Rafael Ávila de Espíndola <rafael.espindola@gmail.com> Rafaël Carré <funman@videolan.org> Ralph Giles <giles@xiph.org> Rob Bradford <rob@linux.intel.com> -Ronald S. Bultje <rbultje@google.com> +Ronald S. Bultje <rsbultje@gmail.com> Rui Ueyama <ruiu@google.com> Sami Pietilä <samipietila@google.com> Scott Graham <scottmg@chromium.org> @@ -104,6 +112,7 @@ Scott LaVarnway <slavarnway@google.com> Sean McGovern <gseanmcg@gmail.com> Sergey Ulanov <sergeyu@chromium.org> Shimon Doodkin <helpmepro1@gmail.com> +Shunyao Li <shunyaoli@google.com> Stefan Holmer <holmer@google.com> Suman Sunkara <sunkaras@google.com> Taekhyun Kim <takim@nvidia.com> diff --git a/libvpx/CHANGELOG b/libvpx/CHANGELOG index b0d30644..7746cc6c 100644 --- a/libvpx/CHANGELOG +++ b/libvpx/CHANGELOG @@ -1,7 +1,19 @@ -xxxx-yy-zz v1.4.0 "Changes for next release" - vpxenc is changed to use VP9 by default. - Encoder controls added for 1 pass SVC. - Decoder control to toggle on/off loopfilter. +2015-11-09 v1.5.0 "Javan Whistling Duck" + This release improves upon the VP9 encoder and speeds up the encoding and + decoding processes. + + - Upgrading: + This release is ABI incompatible with 1.4.0. It drops deprecated VP8 + controls and adds a variety of VP9 controls for testing. + + The vpxenc utility now prefers VP9 by default. + + - Enhancements: + Faster VP9 encoding and decoding + Smaller library size by combining functions used by VP8 and VP9 + + - Bug Fixes: + A variety of fuzzing issues 2015-04-03 v1.4.0 "Indian Runner Duck" This release includes significant improvements to the VP9 codec. diff --git a/libvpx/build/make/Makefile b/libvpx/build/make/Makefile index f1b1cca3..3081a926 100644 --- a/libvpx/build/make/Makefile +++ b/libvpx/build/make/Makefile @@ -140,6 +140,8 @@ $(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx $(STACKREALIGN) $(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx $(STACKREALIGN) $(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 $(STACKREALIGN) $(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(STACKREALIGN) +$(BUILD_PFX)%vp9_reconintra.c.d: CFLAGS += $(STACKREALIGN) +$(BUILD_PFX)%vp9_reconintra.c.o: CFLAGS += $(STACKREALIGN) $(BUILD_PFX)%.c.d: %.c $(if $(quiet),@echo " [DEP] $@") @@ -285,7 +287,7 @@ define archive_template # for creating them. $(1): $(if $(quiet),@echo " [AR] $$@") - $(qexec)$$(AR) $$(ARFLAGS) $$@ $$? + $(qexec)$$(AR) $$(ARFLAGS) $$@ $$^ endef define so_template diff --git a/libvpx/build/make/configure.sh b/libvpx/build/make/configure.sh index 688fa12c..c592b638 100755 --- a/libvpx/build/make/configure.sh +++ b/libvpx/build/make/configure.sh @@ -73,6 +73,7 @@ Build options: --target=TARGET target platform tuple [generic-gnu] --cpu=CPU optimize for a specific cpu rather than a family --extra-cflags=ECFLAGS add ECFLAGS to CFLAGS [$CFLAGS] + --extra-cxxflags=ECXXFLAGS add ECXXFLAGS to CXXFLAGS [$CXXFLAGS] ${toggle_extra_warnings} emit harmless warnings (always non-fatal) ${toggle_werror} treat warnings as errors, if possible (not available with all compilers) @@ -200,6 +201,10 @@ disabled(){ eval test "x\$$1" = "xno" } +# Iterates through positional parameters, checks to confirm the parameter has +# not been explicitly (force) disabled, and enables the setting controlled by +# the parameter when the setting is not disabled. +# Note: Does NOT alter RTCD generation options ($RTCD_OPTIONS). soft_enable() { for var in $*; do if ! disabled $var; then @@ -209,6 +214,10 @@ soft_enable() { done } +# Iterates through positional parameters, checks to confirm the parameter has +# not been explicitly (force) enabled, and disables the setting controlled by +# the parameter when the setting is not enabled. +# Note: Does NOT alter RTCD generation options ($RTCD_OPTIONS). soft_disable() { for var in $*; do if ! enabled $var; then @@ -337,6 +346,10 @@ check_add_cflags() { check_cflags "$@" && add_cflags_only "$@" } +check_add_cxxflags() { + check_cxxflags "$@" && add_cxxflags_only "$@" +} + check_add_asflags() { log add_asflags "$@" add_asflags "$@" @@ -428,7 +441,7 @@ NM=${NM} CFLAGS = ${CFLAGS} CXXFLAGS = ${CXXFLAGS} -ARFLAGS = -rus\$(if \$(quiet),c,v) +ARFLAGS = -crs\$(if \$(quiet),,v) LDFLAGS = ${LDFLAGS} ASFLAGS = ${ASFLAGS} extralibs = ${extralibs} @@ -503,6 +516,9 @@ process_common_cmdline() { --extra-cflags=*) extra_cflags="${optval}" ;; + --extra-cxxflags=*) + extra_cxxflags="${optval}" + ;; --enable-?*|--disable-?*) eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'` if echo "${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null; then @@ -617,6 +633,11 @@ show_darwin_sdk_path() { xcodebuild -sdk $1 -version Path 2>/dev/null } +# Print the major version number of the Darwin SDK specified by $1. +show_darwin_sdk_major_version() { + xcrun --sdk $1 --show-sdk-version 2>/dev/null | cut -d. -f1 +} + process_common_toolchain() { if [ -z "$toolchain" ]; then gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}" @@ -729,13 +750,14 @@ process_common_toolchain() { # platforms, so use the newest one available. case ${toolchain} in arm*-darwin*) - ios_sdk_dir="$(show_darwin_sdk_path iphoneos)" - if [ -d "${ios_sdk_dir}" ]; then - add_cflags "-isysroot ${ios_sdk_dir}" - add_ldflags "-isysroot ${ios_sdk_dir}" + add_cflags "-miphoneos-version-min=${IOS_VERSION_MIN}" + iphoneos_sdk_dir="$(show_darwin_sdk_path iphoneos)" + if [ -d "${iphoneos_sdk_dir}" ]; then + add_cflags "-isysroot ${iphoneos_sdk_dir}" + add_ldflags "-isysroot ${iphoneos_sdk_dir}" fi ;; - *-darwin*) + x86*-darwin*) osx_sdk_dir="$(show_darwin_sdk_path macosx)" if [ -d "${osx_sdk_dir}" ]; then add_cflags "-isysroot ${osx_sdk_dir}" @@ -811,16 +833,35 @@ process_common_toolchain() { die "Disabling neon while keeping neon-asm is not supported" fi case ${toolchain} in + # Apple iOS SDKs no longer support armv6 as of the version 9 + # release (coincides with release of Xcode 7). Only enable media + # when using earlier SDK releases. *-darwin*) - # Neon is guaranteed on iOS 6+ devices, while old media extensions - # no longer assemble with iOS 9 SDK + if [ "$(show_darwin_sdk_major_version iphoneos)" -lt 9 ]; then + soft_enable media + else + soft_disable media + RTCD_OPTIONS="${RTCD_OPTIONS}--disable-media " + fi ;; *) soft_enable media + ;; esac ;; armv6) - soft_enable media + case ${toolchain} in + *-darwin*) + if [ "$(show_darwin_sdk_major_version iphoneos)" -lt 9 ]; then + soft_enable media + else + die "Your iOS SDK does not support armv6." + fi + ;; + *) + soft_enable media + ;; + esac ;; esac @@ -1003,6 +1044,12 @@ EOF done asm_conversion_cmd="${source_path}/build/make/ads2gas_apple.pl" + + if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ]; then + check_add_cflags -fembed-bitcode + check_add_asflags -fembed-bitcode + check_add_ldflags -fembed-bitcode + fi ;; linux*) @@ -1081,7 +1128,9 @@ EOF CROSS=${CROSS:-g} ;; os2) + disable_feature pic AS=${AS:-nasm} + add_ldflags -Zhigh-mem ;; esac @@ -1171,7 +1220,8 @@ EOF && AS="" fi [ "${AS}" = auto ] || [ -z "${AS}" ] \ - && die "Neither yasm nor nasm have been found" + && die "Neither yasm nor nasm have been found." \ + "See the prerequisites section in the README for more info." ;; esac log_echo " using $AS" @@ -1210,6 +1260,13 @@ EOF enabled x86 && sim_arch="-arch i386" || sim_arch="-arch x86_64" add_cflags ${sim_arch} add_ldflags ${sim_arch} + + if [ "$(show_darwin_sdk_major_version iphonesimulator)" -gt 8 ]; then + # yasm v1.3.0 doesn't know what -fembed-bitcode means, so turning it + # on is pointless (unless building a C-only lib). Warn the user, but + # do nothing here. + log "Warning: Bitcode embed disabled for simulator targets." + fi ;; os2) add_asflags -f aout @@ -1323,12 +1380,6 @@ EOF add_cflags -D_LARGEFILE_SOURCE add_cflags -D_FILE_OFFSET_BITS=64 fi - - # append any user defined extra cflags - if [ -n "${extra_cflags}" ] ; then - check_add_cflags ${extra_cflags} || \ - die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler" - fi } process_toolchain() { diff --git a/libvpx/build/make/iosbuild.sh b/libvpx/build/make/iosbuild.sh index 89fa6818..6f7180d0 100755 --- a/libvpx/build/make/iosbuild.sh +++ b/libvpx/build/make/iosbuild.sh @@ -25,7 +25,6 @@ CONFIGURE_ARGS="--disable-docs DIST_DIR="_dist" FRAMEWORK_DIR="VPX.framework" HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx" -MAKE_JOBS=1 SCRIPT_DIR=$(dirname "$0") LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd) LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo) @@ -41,15 +40,24 @@ TARGETS="arm64-darwin-gcc build_target() { local target="$1" local old_pwd="$(pwd)" + local target_specific_flags="" vlog "***Building target: ${target}***" + case "${target}" in + x86-*) + target_specific_flags="--enable-pic" + vlog "Enabled PIC for ${target}" + ;; + esac + mkdir "${target}" cd "${target}" eval "${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \ - ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${devnull} + ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${target_specific_flags} \ + ${devnull} export DIST_DIR - eval make -j ${MAKE_JOBS} dist ${devnull} + eval make dist ${devnull} cd "${old_pwd}" vlog "***Done building target: ${target}***" @@ -194,11 +202,12 @@ cat << EOF Usage: ${0##*/} [arguments] --help: Display this message and exit. --extra-configure-args <args>: Extra args to pass when configuring libvpx. - --jobs: Number of make jobs. --preserve-build-output: Do not delete the build directory. --show-build-output: Show output from each library build. --targets <targets>: Override default target list. Defaults: ${TARGETS} + --test-link: Confirms all targets can be linked. Functionally identical to + passing --enable-examples via --extra-configure-args. --verbose: Output information about the environment and each stage of the build. EOF @@ -227,16 +236,15 @@ while [ -n "$1" ]; do iosbuild_usage exit ;; - --jobs) - MAKE_JOBS="$2" - shift - ;; --preserve-build-output) PRESERVE_BUILD_OUTPUT=yes ;; --show-build-output) devnull= ;; + --test-link) + EXTRA_CONFIGURE_ARGS="${EXTRA_CONFIGURE_ARGS} --enable-examples" + ;; --targets) TARGETS="$2" shift @@ -260,11 +268,11 @@ cat << EOF EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS} FRAMEWORK_DIR=${FRAMEWORK_DIR} HEADER_DIR=${HEADER_DIR} - MAKE_JOBS=${MAKE_JOBS} - PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT} LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR} LIPO=${LIPO} + MAKEFLAGS=${MAKEFLAGS} ORIG_PWD=${ORIG_PWD} + PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT} TARGETS="${TARGETS}" EOF fi diff --git a/libvpx/configure b/libvpx/configure index ac196dac..a40f3abb 100755 --- a/libvpx/configure +++ b/libvpx/configure @@ -264,6 +264,7 @@ EXPERIMENT_LIST=" spatial_svc fp_mb_stats emulate_hardware + misc_fixes " CONFIG_LIST=" dependency_tracking @@ -716,6 +717,16 @@ EOF esac # libwebm needs to be linked with C++ standard library enabled webm_io && LD=${CXX} + + # append any user defined extra cflags + if [ -n "${extra_cflags}" ] ; then + check_add_cflags ${extra_cflags} || \ + die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler" + fi + if [ -n "${extra_cxxflags}" ]; then + check_add_cxxflags ${extra_cxxflags} || \ + die "Requested extra CXXFLAGS '${extra_cxxflags}' not supported by compiler" + fi } diff --git a/libvpx/examples.mk b/libvpx/examples.mk index dfa5a654..f10bec68 100644 --- a/libvpx/examples.mk +++ b/libvpx/examples.mk @@ -36,6 +36,8 @@ LIBYUV_SRCS += third_party/libyuv/include/libyuv/basic_types.h \ third_party/libyuv/source/scale_neon64.cc \ third_party/libyuv/source/scale_win.cc \ +LIBWEBM_COMMON_SRCS += third_party/libwebm/webmids.hpp + LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \ third_party/libwebm/mkvmuxerutil.cpp \ third_party/libwebm/mkvwriter.cpp \ @@ -43,8 +45,7 @@ LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \ third_party/libwebm/mkvmuxertypes.hpp \ third_party/libwebm/mkvmuxerutil.hpp \ third_party/libwebm/mkvparser.hpp \ - third_party/libwebm/mkvwriter.hpp \ - third_party/libwebm/webmids.hpp + third_party/libwebm/mkvwriter.hpp LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser.cpp \ third_party/libwebm/mkvreader.cpp \ @@ -68,6 +69,7 @@ ifeq ($(CONFIG_LIBYUV),yes) vpxdec.SRCS += $(LIBYUV_SRCS) endif ifeq ($(CONFIG_WEBM_IO),yes) + vpxdec.SRCS += $(LIBWEBM_COMMON_SRCS) vpxdec.SRCS += $(LIBWEBM_PARSER_SRCS) vpxdec.SRCS += webmdec.cc webmdec.h endif @@ -89,6 +91,7 @@ ifeq ($(CONFIG_LIBYUV),yes) vpxenc.SRCS += $(LIBYUV_SRCS) endif ifeq ($(CONFIG_WEBM_IO),yes) + vpxenc.SRCS += $(LIBWEBM_COMMON_SRCS) vpxenc.SRCS += $(LIBWEBM_MUXER_SRCS) vpxenc.SRCS += webmenc.cc webmenc.h endif diff --git a/libvpx/examples/vp9_spatial_svc_encoder.c b/libvpx/examples/vp9_spatial_svc_encoder.c index 5a609766..b26e9873 100644 --- a/libvpx/examples/vp9_spatial_svc_encoder.c +++ b/libvpx/examples/vp9_spatial_svc_encoder.c @@ -25,6 +25,7 @@ #include "../tools_common.h" #include "../video_writer.h" +#include "../vpx_ports/vpx_timer.h" #include "vpx/svc_context.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" @@ -79,6 +80,8 @@ static const arg_def_t rc_end_usage_arg = ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q"); static const arg_def_t speed_arg = ARG_DEF("sp", "speed", 1, "speed configuration"); +static const arg_def_t aqmode_arg = + ARG_DEF("aq", "aqmode", 1, "aq-mode off/on"); #if CONFIG_VP9_HIGHBITDEPTH static const struct arg_enum_list bitdepth_enum[] = { @@ -100,7 +103,7 @@ static const arg_def_t *svc_args[] = { &kf_dist_arg, &scale_factors_arg, &passes_arg, &pass_arg, &fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg, &max_bitrate_arg, &temporal_layers_arg, &temporal_layering_mode_arg, - &lag_in_frame_arg, &threads_arg, + &lag_in_frame_arg, &threads_arg, &aqmode_arg, #if OUTPUT_RC_STATS &output_rc_stats_arg, #endif @@ -220,6 +223,8 @@ static void parse_command_line(int argc, const char **argv_, #endif } else if (arg_match(&arg, &speed_arg, argi)) { svc_ctx->speed = arg_parse_uint(&arg); + } else if (arg_match(&arg, &aqmode_arg, argi)) { + svc_ctx->aqmode = arg_parse_uint(&arg); } else if (arg_match(&arg, &threads_arg, argi)) { svc_ctx->threads = arg_parse_uint(&arg); } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) { @@ -539,6 +544,59 @@ vpx_codec_err_t parse_superframe_index(const uint8_t *data, } #endif +// Example pattern for spatial layers and 2 temporal layers used in the +// bypass/flexible mode. The pattern corresponds to the pattern +// VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in +// non-flexible mode. +void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers, + int is_key_frame, + vpx_svc_ref_frame_config_t *ref_frame_config) { + for (sl = 0; sl < num_spatial_layers; ++sl) { + if (!tl) { + if (!sl) { + ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + } else { + if (is_key_frame) { + ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_LAST | + VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + } else { + ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + } + } + } else if (tl == 1) { + if (!sl) { + ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_GF; + } else { + ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_GF; + } + } + if (tl == 0) { + ref_frame_config->lst_fb_idx[sl] = sl; + if (sl) + ref_frame_config->gld_fb_idx[sl] = sl - 1; + else + ref_frame_config->gld_fb_idx[sl] = 0; + ref_frame_config->alt_fb_idx[sl] = 0; + } else if (tl == 1) { + ref_frame_config->lst_fb_idx[sl] = sl; + ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1; + ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl; + } + } +} + int main(int argc, const char **argv) { AppInput app_input = {0}; VpxVideoWriter *writer = NULL; @@ -559,11 +617,14 @@ int main(int argc, const char **argv) { VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL}; struct RateControlStats rc; vpx_svc_layer_id_t layer_id; + vpx_svc_ref_frame_config_t ref_frame_config; int sl, tl; double sum_bitrate = 0.0; double sum_bitrate2 = 0.0; double framerate = 30.0; #endif + struct vpx_usec_timer timer; + int64_t cx_time = 0; memset(&svc_ctx, 0, sizeof(svc_ctx)); svc_ctx.log_print = 1; exec_name = argv[0]; @@ -632,6 +693,9 @@ int main(int argc, const char **argv) { vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed); if (svc_ctx.threads) vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1)); + if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1) + vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); + // Encode frames while (!end_of_stream) { @@ -643,9 +707,36 @@ int main(int argc, const char **argv) { end_of_stream = 1; } + // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates) + // and the buffer indices for each spatial layer of the current + // (super)frame to be encoded. The temporal layer_id for the current frame + // also needs to be set. + // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS" + // mode to "VP9E_LAYERING_MODE_BYPASS". + if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + // Example for 2 temporal layers. + if (frame_cnt % 2 == 0) + layer_id.temporal_layer_id = 0; + else + layer_id.temporal_layer_id = 1; + // Note that we only set the temporal layer_id, since we are calling + // the encode for the whole superframe. The encoder will internally loop + // over all the spatial layers for the current superframe. + vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id); + set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id, + svc_ctx.spatial_layers, + frame_cnt == 0, + &ref_frame_config); + vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG, + &ref_frame_config); + } + + vpx_usec_timer_start(&timer); res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw), pts, frame_duration, svc_ctx.speed >= 5 ? VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY); + vpx_usec_timer_mark(&timer); + cx_time += vpx_usec_timer_elapsed(&timer); printf("%s", vpx_svc_get_message(&svc_ctx)); if (res != VPX_CODEC_OK) { @@ -784,6 +875,10 @@ int main(int argc, const char **argv) { } } #endif + printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", + frame_cnt, + 1000 * (float)cx_time / (double)(frame_cnt * 1000000), + 1000000 * (double)frame_cnt / (double)cx_time); vpx_img_free(&raw); // display average size, psnr printf("%s", vpx_svc_dump_statistics(&svc_ctx)); diff --git a/libvpx/examples/vpx_temporal_svc_encoder.c b/libvpx/examples/vpx_temporal_svc_encoder.c index ee7de6b7..5adda9ee 100644 --- a/libvpx/examples/vpx_temporal_svc_encoder.c +++ b/libvpx/examples/vpx_temporal_svc_encoder.c @@ -684,14 +684,14 @@ int main(int argc, char **argv) { if (strncmp(encoder->name, "vp8", 3) == 0) { vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed); vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff); - vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0); + vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); } else if (strncmp(encoder->name, "vp9", 3) == 0) { vpx_svc_extra_cfg_t svc_params; vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed); vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0); vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0); - vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0); + vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0); vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1)); if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0)) diff --git a/libvpx/libs.mk b/libvpx/libs.mk index b9d4b286..f28d84a5 100644 --- a/libvpx/libs.mk +++ b/libvpx/libs.mk @@ -53,7 +53,7 @@ CODEC_SRCS-yes += $(addprefix vpx_dsp/,$(call enabled,DSP_SRCS)) include $(SRC_PATH_BARE)/vpx_util/vpx_util.mk CODEC_SRCS-yes += $(addprefix vpx_util/,$(call enabled,UTIL_SRCS)) -ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),) +ifeq ($(CONFIG_VP8),yes) VP8_PREFIX=vp8/ include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk endif @@ -76,7 +76,7 @@ ifeq ($(CONFIG_VP8_DECODER),yes) CODEC_DOC_SECTIONS += vp8 vp8_decoder endif -ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),) +ifeq ($(CONFIG_VP9),yes) VP9_PREFIX=vp9/ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk endif @@ -110,7 +110,7 @@ VP9_PREFIX=vp9/ $(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra # VP10 make file -ifneq ($(CONFIG_VP10_ENCODER)$(CONFIG_VP10_DECODER),) +ifeq ($(CONFIG_VP10),yes) VP10_PREFIX=vp10/ include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10_common.mk endif @@ -260,7 +260,7 @@ OBJS-yes += $(LIBVPX_OBJS) LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS) -SO_VERSION_MAJOR := 2 +SO_VERSION_MAJOR := 3 SO_VERSION_MINOR := 0 SO_VERSION_PATCH := 0 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS)) diff --git a/libvpx/test/active_map_refresh_test.cc b/libvpx/test/active_map_refresh_test.cc new file mode 100644 index 00000000..c9456614 --- /dev/null +++ b/libvpx/test/active_map_refresh_test.cc @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <algorithm> +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/y4m_video_source.h" + +namespace { + +// Check if any pixel in a 16x16 macroblock varies between frames. +int CheckMb(const vpx_image_t ¤t, const vpx_image_t &previous, + int mb_r, int mb_c) { + for (int plane = 0; plane < 3; plane++) { + int r = 16 * mb_r; + int c0 = 16 * mb_c; + int r_top = std::min(r + 16, static_cast<int>(current.d_h)); + int c_top = std::min(c0 + 16, static_cast<int>(current.d_w)); + r = std::max(r, 0); + c0 = std::max(c0, 0); + if (plane > 0 && current.x_chroma_shift) { + c_top = (c_top + 1) >> 1; + c0 >>= 1; + } + if (plane > 0 && current.y_chroma_shift) { + r_top = (r_top + 1) >> 1; + r >>= 1; + } + for (; r < r_top; ++r) { + for (int c = c0; c < c_top; ++c) { + if (current.planes[plane][current.stride[plane] * r + c] != + previous.planes[plane][previous.stride[plane] * r + c]) + return 1; + } + } + } + return 0; +} + +void GenerateMap(int mb_rows, int mb_cols, const vpx_image_t ¤t, + const vpx_image_t &previous, uint8_t *map) { + for (int mb_r = 0; mb_r < mb_rows; ++mb_r) { + for (int mb_c = 0; mb_c < mb_cols; ++mb_c) { + map[mb_r * mb_cols + mb_c] = CheckMb(current, previous, mb_r, mb_c); + } + } +} + +const int kAqModeCyclicRefresh = 3; + +class ActiveMapRefreshTest + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> { + protected: + ActiveMapRefreshTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~ActiveMapRefreshTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + cpu_used_ = GET_PARAM(2); + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + ::libvpx_test::Y4mVideoSource *y4m_video = + static_cast<libvpx_test::Y4mVideoSource *>(video); + if (video->frame() == 1) { + encoder->Control(VP8E_SET_CPUUSED, cpu_used_); + encoder->Control(VP9E_SET_AQ_MODE, kAqModeCyclicRefresh); + } else if (video->frame() >= 2 && video->img()) { + vpx_image_t *current = video->img(); + vpx_image_t *previous = y4m_holder_->img(); + ASSERT_TRUE(previous != NULL); + vpx_active_map_t map = vpx_active_map_t(); + const int width = static_cast<int>(current->d_w); + const int height = static_cast<int>(current->d_h); + const int mb_width = (width + 15) / 16; + const int mb_height = (height + 15) / 16; + uint8_t *active_map = new uint8_t[mb_width * mb_height]; + GenerateMap(mb_height, mb_width, *current, *previous, active_map); + map.cols = mb_width; + map.rows = mb_height; + map.active_map = active_map; + encoder->Control(VP8E_SET_ACTIVEMAP, &map); + delete[] active_map; + } + if (video->img()) { + y4m_video->SwapBuffers(y4m_holder_); + } + } + + int cpu_used_; + ::libvpx_test::Y4mVideoSource *y4m_holder_; +}; + +TEST_P(ActiveMapRefreshTest, Test) { + cfg_.g_lag_in_frames = 0; + cfg_.g_profile = 1; + cfg_.rc_target_bitrate = 600; + cfg_.rc_resize_allowed = 0; + cfg_.rc_min_quantizer = 8; + cfg_.rc_max_quantizer = 30; + cfg_.g_pass = VPX_RC_ONE_PASS; + cfg_.rc_end_usage = VPX_CBR; + cfg_.kf_max_dist = 90000; + + ::libvpx_test::Y4mVideoSource video("desktop_credits.y4m", 0, 30); + ::libvpx_test::Y4mVideoSource video_holder("desktop_credits.y4m", 0, 30); + video_holder.Begin(); + y4m_holder_ = &video_holder; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +VP9_INSTANTIATE_TEST_CASE(ActiveMapRefreshTest, + ::testing::Values(::libvpx_test::kRealTime), + ::testing::Range(5, 6)); +} // namespace diff --git a/libvpx/test/convolve_test.cc b/libvpx/test/convolve_test.cc index e0e929e6..08267882 100644 --- a/libvpx/test/convolve_test.cc +++ b/libvpx/test/convolve_test.cc @@ -960,511 +960,72 @@ TEST_P(ConvolveTest, CheckScalingFiltering) { using std::tr1::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH -#if HAVE_SSE2 && ARCH_X86_64 -void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, - filter_x_stride, filter_y, filter_y_stride, - w, h, 8); -} - -void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} - -void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} - -void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} - -void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} - -void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} - -void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); +#define WRAP(func, bd) \ +void wrap_ ## func ## _ ## bd(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, \ + int filter_x_stride, \ + const int16_t *filter_y, \ + int filter_y_stride, \ + int w, int h) { \ + vpx_highbd_ ## func(src, src_stride, dst, dst_stride, filter_x, \ + filter_x_stride, filter_y, filter_y_stride, \ + w, h, bd); \ } +#if HAVE_SSE2 && ARCH_X86_64 +#if CONFIG_USE_X86INC +WRAP(convolve_copy_sse2, 8) +WRAP(convolve_avg_sse2, 8) +WRAP(convolve_copy_sse2, 10) +WRAP(convolve_avg_sse2, 10) +WRAP(convolve_copy_sse2, 12) +WRAP(convolve_avg_sse2, 12) +#endif // CONFIG_USE_X86INC +WRAP(convolve8_horiz_sse2, 8) +WRAP(convolve8_avg_horiz_sse2, 8) +WRAP(convolve8_vert_sse2, 8) +WRAP(convolve8_avg_vert_sse2, 8) +WRAP(convolve8_sse2, 8) +WRAP(convolve8_avg_sse2, 8) +WRAP(convolve8_horiz_sse2, 10) +WRAP(convolve8_avg_horiz_sse2, 10) +WRAP(convolve8_vert_sse2, 10) +WRAP(convolve8_avg_vert_sse2, 10) +WRAP(convolve8_sse2, 10) +WRAP(convolve8_avg_sse2, 10) +WRAP(convolve8_horiz_sse2, 12) +WRAP(convolve8_avg_horiz_sse2, 12) +WRAP(convolve8_vert_sse2, 12) +WRAP(convolve8_avg_vert_sse2, 12) +WRAP(convolve8_sse2, 12) +WRAP(convolve8_avg_sse2, 12) #endif // HAVE_SSE2 && ARCH_X86_64 -void wrap_convolve_copy_c_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve_avg_c_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 8); -} - -void wrap_convolve_copy_c_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve_avg_c_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 10); -} - -void wrap_convolve_copy_c_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} - -void wrap_convolve_avg_c_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} - -void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} - -void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} - -void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} - -void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} - -void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} - -void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, int h) { - vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride, - filter_x, filter_x_stride, - filter_y, filter_y_stride, w, h, 12); -} +WRAP(convolve_copy_c, 8) +WRAP(convolve_avg_c, 8) +WRAP(convolve8_horiz_c, 8) +WRAP(convolve8_avg_horiz_c, 8) +WRAP(convolve8_vert_c, 8) +WRAP(convolve8_avg_vert_c, 8) +WRAP(convolve8_c, 8) +WRAP(convolve8_avg_c, 8) +WRAP(convolve_copy_c, 10) +WRAP(convolve_avg_c, 10) +WRAP(convolve8_horiz_c, 10) +WRAP(convolve8_avg_horiz_c, 10) +WRAP(convolve8_vert_c, 10) +WRAP(convolve8_avg_vert_c, 10) +WRAP(convolve8_c, 10) +WRAP(convolve8_avg_c, 10) +WRAP(convolve_copy_c, 12) +WRAP(convolve_avg_c, 12) +WRAP(convolve8_horiz_c, 12) +WRAP(convolve8_avg_horiz_c, 12) +WRAP(convolve8_vert_c, 12) +WRAP(convolve8_avg_vert_c, 12) +WRAP(convolve8_c, 12) +WRAP(convolve8_avg_c, 12) +#undef WRAP const ConvolveFunctions convolve8_c( wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, @@ -1563,7 +1124,11 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values( #if HAVE_SSE2 && ARCH_X86_64 #if CONFIG_VP9_HIGHBITDEPTH const ConvolveFunctions convolve8_sse2( +#if CONFIG_USE_X86INC + wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8, +#else wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, +#endif // CONFIG_USE_X86INC wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, @@ -1571,7 +1136,11 @@ const ConvolveFunctions convolve8_sse2( wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8); const ConvolveFunctions convolve10_sse2( +#if CONFIG_USE_X86INC + wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10, +#else wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, +#endif // CONFIG_USE_X86INC wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, @@ -1579,7 +1148,11 @@ const ConvolveFunctions convolve10_sse2( wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10); const ConvolveFunctions convolve12_sse2( +#if CONFIG_USE_X86INC + wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12, +#else wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, +#endif // CONFIG_USE_X86INC wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, diff --git a/libvpx/test/dct16x16_test.cc b/libvpx/test/dct16x16_test.cc index e9de76ad..332210da 100644 --- a/libvpx/test/dct16x16_test.cc +++ b/libvpx/test/dct16x16_test.cc @@ -40,30 +40,6 @@ static int round(double x) { #endif const int kNumCoeffs = 256; -const double PI = 3.1415926535898; -void reference2_16x16_idct_2d(double *input, double *output) { - double x; - for (int l = 0; l < 16; ++l) { - for (int k = 0; k < 16; ++k) { - double s = 0; - for (int i = 0; i < 16; ++i) { - for (int j = 0; j < 16; ++j) { - x = cos(PI * j * (l + 0.5) / 16.0) * - cos(PI * i * (k + 0.5) / 16.0) * - input[i * 16 + j] / 256; - if (i != 0) - x *= sqrt(2.0); - if (j != 0) - x *= sqrt(2.0); - s += x; - } - } - output[k*16+l] = s; - } - } -} - - const double C1 = 0.995184726672197; const double C2 = 0.98078528040323; const double C3 = 0.956940335732209; diff --git a/libvpx/test/encode_test_driver.cc b/libvpx/test/encode_test_driver.cc index be4ef9af..128436ee 100644 --- a/libvpx/test/encode_test_driver.cc +++ b/libvpx/test/encode_test_driver.cc @@ -195,6 +195,7 @@ void EncoderTest::RunLoop(VideoSource *video) { video->Begin(); encoder->InitEncoder(video); + ASSERT_FALSE(::testing::Test::HasFatalFailure()); unsigned long dec_init_flags = 0; // NOLINT // Use fragment decoder if encoder outputs partitions. diff --git a/libvpx/test/encode_test_driver.h b/libvpx/test/encode_test_driver.h index 9ecc4989..6d0a72f9 100644 --- a/libvpx/test/encode_test_driver.h +++ b/libvpx/test/encode_test_driver.h @@ -124,6 +124,11 @@ class Encoder { ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } + void Control(int ctrl_id, int *arg) { + const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); + } + void Control(int ctrl_id, struct vpx_scaling_mode *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); diff --git a/libvpx/test/error_resilience_test.cc b/libvpx/test/error_resilience_test.cc index 9e512adb..9a2ad2f3 100644 --- a/libvpx/test/error_resilience_test.cc +++ b/libvpx/test/error_resilience_test.cc @@ -20,10 +20,11 @@ const int kMaxErrorFrames = 12; const int kMaxDroppableFrames = 12; class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest, - public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> { + public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, bool> { protected: ErrorResilienceTestLarge() : EncoderTest(GET_PARAM(0)), + svc_support_(GET_PARAM(2)), psnr_(0.0), nframes_(0), mismatch_psnr_(0.0), @@ -193,6 +194,8 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest, pattern_switch_ = frame_switch; } + bool svc_support_; + private: double psnr_; unsigned int nframes_; @@ -302,6 +305,10 @@ TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) { // two layer temporal pattern. The base layer does not predict from the top // layer, so successful decoding is expected. TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) { + // This test doesn't run if SVC is not supported. + if (!svc_support_) + return; + const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 500; @@ -347,6 +354,10 @@ TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) { // for a two layer temporal pattern, where at some point in the // sequence, the LAST ref is not used anymore. TEST_P(ErrorResilienceTestLarge, 2LayersNoRefLast) { + // This test doesn't run if SVC is not supported. + if (!svc_support_) + return; + const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 500; @@ -579,9 +590,13 @@ TEST_P(ErrorResilienceTestLargeCodecControls, CodecControl3TemporalLayers) { } } -VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES); +VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES, + ::testing::Values(true)); VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLargeCodecControls, ONE_PASS_TEST_MODES); -VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES); -VP10_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES); +VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES, + ::testing::Values(true)); +// SVC-related tests don't run for VP10 since SVC is not supported. +VP10_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES, + ::testing::Values(false)); } // namespace diff --git a/libvpx/test/frame_size_tests.cc b/libvpx/test/frame_size_tests.cc index 95cc66ad..d39c8f6e 100644 --- a/libvpx/test/frame_size_tests.cc +++ b/libvpx/test/frame_size_tests.cc @@ -74,7 +74,7 @@ TEST_F(VP9FrameSizeTestsLarge, ValidSizes) { // size or almost 1 gig of memory. // In total the allocations will exceed 2GiB which may cause a failure with // mingw + wine, use a smaller size in that case. -#if defined(_WIN32) && !defined(_WIN64) +#if defined(_WIN32) && !defined(_WIN64) || defined(__OS2__) video.SetSize(4096, 3072); #else video.SetSize(4096, 4096); diff --git a/libvpx/test/idct8x8_test.cc b/libvpx/test/idct8x8_test.cc index 987ba753..7f9d751d 100644 --- a/libvpx/test/idct8x8_test.cc +++ b/libvpx/test/idct8x8_test.cc @@ -67,43 +67,6 @@ void reference_dct_2d(int16_t input[64], double output[64]) { output[i] *= 2; } -void reference_idct_1d(double input[8], double output[8]) { - const double kPi = 3.141592653589793238462643383279502884; - const double kSqrt2 = 1.414213562373095048801688724209698; - for (int k = 0; k < 8; k++) { - output[k] = 0.0; - for (int n = 0; n < 8; n++) { - output[k] += input[n]*cos(kPi*(2*k+1)*n/16.0); - if (n == 0) - output[k] = output[k]/kSqrt2; - } - } -} - -void reference_idct_2d(double input[64], int16_t output[64]) { - double out[64], out2[64]; - // First transform rows - for (int i = 0; i < 8; ++i) { - double temp_in[8], temp_out[8]; - for (int j = 0; j < 8; ++j) - temp_in[j] = input[j + i*8]; - reference_idct_1d(temp_in, temp_out); - for (int j = 0; j < 8; ++j) - out[j + i*8] = temp_out[j]; - } - // Then transform columns - for (int i = 0; i < 8; ++i) { - double temp_in[8], temp_out[8]; - for (int j = 0; j < 8; ++j) - temp_in[j] = out[j*8 + i]; - reference_idct_1d(temp_in, temp_out); - for (int j = 0; j < 8; ++j) - out2[j*8 + i] = temp_out[j]; - } - for (int i = 0; i < 64; ++i) - output[i] = round(out2[i]/32); -} - TEST(VP9Idct8x8Test, AccuracyCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 10000; diff --git a/libvpx/test/intrapred_test.cc b/libvpx/test/intrapred_test.cc deleted file mode 100644 index 65a06974..00000000 --- a/libvpx/test/intrapred_test.cc +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <string.h> - -#include "third_party/googletest/src/include/gtest/gtest.h" - -#include "./vpx_config.h" -#include "./vp8_rtcd.h" -#include "test/acm_random.h" -#include "test/clear_system_state.h" -#include "test/register_state_check.h" -#include "vp8/common/blockd.h" -#include "vpx_mem/vpx_mem.h" - -namespace { - -using libvpx_test::ACMRandom; - -class IntraPredBase { - public: - virtual ~IntraPredBase() { libvpx_test::ClearSystemState(); } - - protected: - void SetupMacroblock(MACROBLOCKD *mbptr, - MODE_INFO *miptr, - uint8_t *data, - int block_size, - int stride, - int num_planes) { - mbptr_ = mbptr; - miptr_ = miptr; - mbptr_->up_available = 1; - mbptr_->left_available = 1; - mbptr_->mode_info_context = miptr_; - stride_ = stride; - block_size_ = block_size; - num_planes_ = num_planes; - for (int p = 0; p < num_planes; p++) - data_ptr_[p] = data + stride * (block_size + 1) * p + - stride + block_size; - } - - void FillRandom() { - // Fill edges with random data - ACMRandom rnd(ACMRandom::DeterministicSeed()); - for (int p = 0; p < num_planes_; p++) { - for (int x = -1 ; x <= block_size_; x++) - data_ptr_[p][x - stride_] = rnd.Rand8(); - for (int y = 0; y < block_size_; y++) - data_ptr_[p][y * stride_ - 1] = rnd.Rand8(); - } - } - - virtual void Predict(MB_PREDICTION_MODE mode) = 0; - - void SetLeftUnavailable() { - mbptr_->left_available = 0; - for (int p = 0; p < num_planes_; p++) - for (int i = -1; i < block_size_; ++i) - data_ptr_[p][stride_ * i - 1] = 129; - } - - void SetTopUnavailable() { - mbptr_->up_available = 0; - for (int p = 0; p < num_planes_; p++) - memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2); - } - - void SetTopLeftUnavailable() { - SetLeftUnavailable(); - SetTopUnavailable(); - } - - int BlockSizeLog2Min1() const { - switch (block_size_) { - case 16: - return 3; - case 8: - return 2; - default: - return 0; - } - } - - // check DC prediction output against a reference - void CheckDCPrediction() const { - for (int p = 0; p < num_planes_; p++) { - // calculate expected DC - int expected; - if (mbptr_->up_available || mbptr_->left_available) { - int sum = 0, shift = BlockSizeLog2Min1() + mbptr_->up_available + - mbptr_->left_available; - if (mbptr_->up_available) - for (int x = 0; x < block_size_; x++) - sum += data_ptr_[p][x - stride_]; - if (mbptr_->left_available) - for (int y = 0; y < block_size_; y++) - sum += data_ptr_[p][y * stride_ - 1]; - expected = (sum + (1 << (shift - 1))) >> shift; - } else { - expected = 0x80; - } - // check that all subsequent lines are equal to the first - for (int y = 1; y < block_size_; ++y) - ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_], - block_size_)); - // within the first line, ensure that each pixel has the same value - for (int x = 1; x < block_size_; ++x) - ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]); - // now ensure that that pixel has the expected (DC) value - ASSERT_EQ(expected, data_ptr_[p][0]); - } - } - - // check V prediction output against a reference - void CheckVPrediction() const { - // check that all lines equal the top border - for (int p = 0; p < num_planes_; p++) - for (int y = 0; y < block_size_; y++) - ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_], - &data_ptr_[p][y * stride_], block_size_)); - } - - // check H prediction output against a reference - void CheckHPrediction() const { - // for each line, ensure that each pixel is equal to the left border - for (int p = 0; p < num_planes_; p++) - for (int y = 0; y < block_size_; y++) - for (int x = 0; x < block_size_; x++) - ASSERT_EQ(data_ptr_[p][-1 + y * stride_], - data_ptr_[p][x + y * stride_]); - } - - static int ClipByte(int value) { - if (value > 255) - return 255; - else if (value < 0) - return 0; - return value; - } - - // check TM prediction output against a reference - void CheckTMPrediction() const { - for (int p = 0; p < num_planes_; p++) - for (int y = 0; y < block_size_; y++) - for (int x = 0; x < block_size_; x++) { - const int expected = ClipByte(data_ptr_[p][x - stride_] - + data_ptr_[p][stride_ * y - 1] - - data_ptr_[p][-1 - stride_]); - ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]); - } - } - - // Actual test - void RunTest() { - { - SCOPED_TRACE("DC_PRED"); - FillRandom(); - Predict(DC_PRED); - CheckDCPrediction(); - } - { - SCOPED_TRACE("DC_PRED LEFT"); - FillRandom(); - SetLeftUnavailable(); - Predict(DC_PRED); - CheckDCPrediction(); - } - { - SCOPED_TRACE("DC_PRED TOP"); - FillRandom(); - SetTopUnavailable(); - Predict(DC_PRED); - CheckDCPrediction(); - } - { - SCOPED_TRACE("DC_PRED TOP_LEFT"); - FillRandom(); - SetTopLeftUnavailable(); - Predict(DC_PRED); - CheckDCPrediction(); - } - { - SCOPED_TRACE("H_PRED"); - FillRandom(); - Predict(H_PRED); - CheckHPrediction(); - } - { - SCOPED_TRACE("V_PRED"); - FillRandom(); - Predict(V_PRED); - CheckVPrediction(); - } - { - SCOPED_TRACE("TM_PRED"); - FillRandom(); - Predict(TM_PRED); - CheckTMPrediction(); - } - } - - MACROBLOCKD *mbptr_; - MODE_INFO *miptr_; - uint8_t *data_ptr_[2]; // in the case of Y, only [0] is used - int stride_; - int block_size_; - int num_planes_; -}; - -typedef void (*IntraPredYFunc)(MACROBLOCKD *x, - uint8_t *yabove_row, - uint8_t *yleft, - int left_stride, - uint8_t *ypred_ptr, - int y_stride); - -class IntraPredYTest - : public IntraPredBase, - public ::testing::TestWithParam<IntraPredYFunc> { - public: - static void SetUpTestCase() { - mb_ = reinterpret_cast<MACROBLOCKD*>( - vpx_memalign(32, sizeof(MACROBLOCKD))); - mi_ = reinterpret_cast<MODE_INFO*>( - vpx_memalign(32, sizeof(MODE_INFO))); - data_array_ = reinterpret_cast<uint8_t*>( - vpx_memalign(kDataAlignment, kDataBufferSize)); - } - - static void TearDownTestCase() { - vpx_free(data_array_); - vpx_free(mi_); - vpx_free(mb_); - data_array_ = NULL; - } - - protected: - static const int kBlockSize = 16; - static const int kDataAlignment = 16; - static const int kStride = kBlockSize * 3; - // We use 48 so that the data pointer of the first pixel in each row of - // each macroblock is 16-byte aligned, and this gives us access to the - // top-left and top-right corner pixels belonging to the top-left/right - // macroblocks. - // We use 17 lines so we have one line above us for top-prediction. - static const int kDataBufferSize = kStride * (kBlockSize + 1); - - virtual void SetUp() { - pred_fn_ = GetParam(); - SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 1); - } - - virtual void Predict(MB_PREDICTION_MODE mode) { - mbptr_->mode_info_context->mbmi.mode = mode; - ASM_REGISTER_STATE_CHECK(pred_fn_(mbptr_, - data_ptr_[0] - kStride, - data_ptr_[0] - 1, kStride, - data_ptr_[0], kStride)); - } - - IntraPredYFunc pred_fn_; - static uint8_t* data_array_; - static MACROBLOCKD * mb_; - static MODE_INFO *mi_; -}; - -MACROBLOCKD* IntraPredYTest::mb_ = NULL; -MODE_INFO* IntraPredYTest::mi_ = NULL; -uint8_t* IntraPredYTest::data_array_ = NULL; - -TEST_P(IntraPredYTest, IntraPredTests) { - RunTest(); -} - -INSTANTIATE_TEST_CASE_P(C, IntraPredYTest, - ::testing::Values( - vp8_build_intra_predictors_mby_s_c)); -#if HAVE_SSE2 -INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest, - ::testing::Values( - vp8_build_intra_predictors_mby_s_sse2)); -#endif -#if HAVE_SSSE3 -INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest, - ::testing::Values( - vp8_build_intra_predictors_mby_s_ssse3)); -#endif -#if HAVE_NEON -INSTANTIATE_TEST_CASE_P(NEON, IntraPredYTest, - ::testing::Values( - vp8_build_intra_predictors_mby_s_neon)); -#endif -#if HAVE_MSA -INSTANTIATE_TEST_CASE_P(MSA, IntraPredYTest, - ::testing::Values( - vp8_build_intra_predictors_mby_s_msa)); -#endif - -typedef void (*IntraPredUvFunc)(MACROBLOCKD *x, - uint8_t *uabove_row, - uint8_t *vabove_row, - uint8_t *uleft, - uint8_t *vleft, - int left_stride, - uint8_t *upred_ptr, - uint8_t *vpred_ptr, - int pred_stride); - -class IntraPredUVTest - : public IntraPredBase, - public ::testing::TestWithParam<IntraPredUvFunc> { - public: - static void SetUpTestCase() { - mb_ = reinterpret_cast<MACROBLOCKD*>( - vpx_memalign(32, sizeof(MACROBLOCKD))); - mi_ = reinterpret_cast<MODE_INFO*>( - vpx_memalign(32, sizeof(MODE_INFO))); - data_array_ = reinterpret_cast<uint8_t*>( - vpx_memalign(kDataAlignment, kDataBufferSize)); - } - - static void TearDownTestCase() { - vpx_free(data_array_); - vpx_free(mi_); - vpx_free(mb_); - data_array_ = NULL; - } - - protected: - static const int kBlockSize = 8; - static const int kDataAlignment = 8; - static const int kStride = kBlockSize * 3; - // We use 24 so that the data pointer of the first pixel in each row of - // each macroblock is 8-byte aligned, and this gives us access to the - // top-left and top-right corner pixels belonging to the top-left/right - // macroblocks. - // We use 9 lines so we have one line above us for top-prediction. - // [0] = U, [1] = V - static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1); - - virtual void SetUp() { - pred_fn_ = GetParam(); - SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 2); - } - - virtual void Predict(MB_PREDICTION_MODE mode) { - mbptr_->mode_info_context->mbmi.uv_mode = mode; - pred_fn_(mbptr_, data_ptr_[0] - kStride, data_ptr_[1] - kStride, - data_ptr_[0] - 1, data_ptr_[1] - 1, kStride, - data_ptr_[0], data_ptr_[1], kStride); - } - - IntraPredUvFunc pred_fn_; - // We use 24 so that the data pointer of the first pixel in each row of - // each macroblock is 8-byte aligned, and this gives us access to the - // top-left and top-right corner pixels belonging to the top-left/right - // macroblocks. - // We use 9 lines so we have one line above us for top-prediction. - // [0] = U, [1] = V - static uint8_t* data_array_; - static MACROBLOCKD* mb_; - static MODE_INFO* mi_; -}; - -MACROBLOCKD* IntraPredUVTest::mb_ = NULL; -MODE_INFO* IntraPredUVTest::mi_ = NULL; -uint8_t* IntraPredUVTest::data_array_ = NULL; - -TEST_P(IntraPredUVTest, IntraPredTests) { - RunTest(); -} - -INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest, - ::testing::Values( - vp8_build_intra_predictors_mbuv_s_c)); -#if HAVE_SSE2 -INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest, - ::testing::Values( - vp8_build_intra_predictors_mbuv_s_sse2)); -#endif -#if HAVE_SSSE3 -INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest, - ::testing::Values( - vp8_build_intra_predictors_mbuv_s_ssse3)); -#endif -#if HAVE_NEON -INSTANTIATE_TEST_CASE_P(NEON, IntraPredUVTest, - ::testing::Values( - vp8_build_intra_predictors_mbuv_s_neon)); -#endif -#if HAVE_MSA -INSTANTIATE_TEST_CASE_P(MSA, IntraPredUVTest, - ::testing::Values( - vp8_build_intra_predictors_mbuv_s_msa)); -#endif - -} // namespace diff --git a/libvpx/test/invalid_file_test.cc b/libvpx/test/invalid_file_test.cc index 1b5ef5c8..f4241eb8 100644 --- a/libvpx/test/invalid_file_test.cc +++ b/libvpx/test/invalid_file_test.cc @@ -63,9 +63,22 @@ class InvalidFileTest EXPECT_NE(res, EOF) << "Read result data failed"; // Check results match. - EXPECT_EQ(expected_res_dec, res_dec) - << "Results don't match: frame number = " << video.frame_number() - << ". (" << decoder->DecodeError() << ")"; + const DecodeParam input = GET_PARAM(1); + if (input.threads > 1) { + // The serial decode check is too strict for tile-threaded decoding as + // there is no guarantee on the decode order nor which specific error + // will take precedence. Currently a tile-level error is not forwarded so + // the frame will simply be marked corrupt. + EXPECT_TRUE(res_dec == expected_res_dec || + res_dec == VPX_CODEC_CORRUPT_FRAME) + << "Results don't match: frame number = " << video.frame_number() + << ". (" << decoder->DecodeError() << "). Expected: " + << expected_res_dec << " or " << VPX_CODEC_CORRUPT_FRAME; + } else { + EXPECT_EQ(expected_res_dec, res_dec) + << "Results don't match: frame number = " << video.frame_number() + << ". (" << decoder->DecodeError() << ")"; + } return !HasFailure(); } @@ -145,7 +158,7 @@ TEST_P(InvalidFileInvalidPeekTest, ReturnCode) { } const DecodeParam kVP9InvalidFileInvalidPeekTests[] = { - {1, "invalid-vp90-01-v2.webm"}, + {1, "invalid-vp90-01-v3.webm"}, }; VP9_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest, diff --git a/libvpx/test/lpf_8_test.cc b/libvpx/test/lpf_8_test.cc index 966e1095..0bf6b0c2 100644 --- a/libvpx/test/lpf_8_test.cc +++ b/libvpx/test/lpf_8_test.cc @@ -590,7 +590,9 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2), make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1), - make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1))); + make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1), + make_tuple(&wrapper_vertical_16_dual_sse2, + &wrapper_vertical_16_dual_c, 8, 1))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif diff --git a/libvpx/test/register_state_check.h b/libvpx/test/register_state_check.h index 8e72f911..489c4194 100644 --- a/libvpx/test/register_state_check.h +++ b/libvpx/test/register_state_check.h @@ -30,7 +30,9 @@ #if defined(_WIN64) -#define _WIN32_LEAN_AND_MEAN +#undef NOMINMAX +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN #include <windows.h> #include <winnt.h> diff --git a/libvpx/test/resize_test.cc b/libvpx/test/resize_test.cc index f1134aaf..98b6f87e 100644 --- a/libvpx/test/resize_test.cc +++ b/libvpx/test/resize_test.cc @@ -81,6 +81,15 @@ static void write_ivf_frame_header(const vpx_codec_cx_pkt_t *const pkt, const unsigned int kInitialWidth = 320; const unsigned int kInitialHeight = 240; +struct FrameInfo { + FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h) + : pts(_pts), w(_w), h(_h) {} + + vpx_codec_pts_t pts; + unsigned int w; + unsigned int h; +}; + unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) { if (frame < 10) return val; @@ -120,15 +129,6 @@ class ResizeTest : public ::libvpx_test::EncoderTest, virtual ~ResizeTest() {} - struct FrameInfo { - FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h) - : pts(_pts), w(_w), h(_h) {} - - vpx_codec_pts_t pts; - unsigned int w; - unsigned int h; - }; - virtual void SetUp() { InitializeConfig(); SetMode(GET_PARAM(1)); @@ -196,13 +196,27 @@ class ResizeInternalTest : public ResizeTest { virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { - if (video->frame() == kStepDownFrame) { - struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE}; - encoder->Control(VP8E_SET_SCALEMODE, &mode); - } - if (video->frame() == kStepUpFrame) { - struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL}; - encoder->Control(VP8E_SET_SCALEMODE, &mode); + if (change_config_) { + int new_q = 60; + if (video->frame() == 0) { + struct vpx_scaling_mode mode = {VP8E_ONETWO, VP8E_ONETWO}; + encoder->Control(VP8E_SET_SCALEMODE, &mode); + } + if (video->frame() == 1) { + struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL}; + encoder->Control(VP8E_SET_SCALEMODE, &mode); + cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = new_q; + encoder->Config(&cfg_); + } + } else { + if (video->frame() == kStepDownFrame) { + struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE}; + encoder->Control(VP8E_SET_SCALEMODE, &mode); + } + if (video->frame() == kStepUpFrame) { + struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL}; + encoder->Control(VP8E_SET_SCALEMODE, &mode); + } } } @@ -227,6 +241,7 @@ class ResizeInternalTest : public ResizeTest { #endif double frame0_psnr_; + bool change_config_; #if WRITE_COMPRESSED_STREAM FILE *outfile_; unsigned int out_frames_; @@ -237,6 +252,7 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) { ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 10); init_flags_ = VPX_CODEC_USE_PSNR; + change_config_ = false; // q picked such that initial keyframe on this clip is ~30dB PSNR cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48; @@ -261,6 +277,143 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) { } } +TEST_P(ResizeInternalTest, TestInternalResizeChangeConfig) { + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 10); + cfg_.g_w = 352; + cfg_.g_h = 288; + change_config_ = true; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> { + protected: + ResizeInternalRealtimeTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~ResizeInternalRealtimeTest() {} + + virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, + libvpx_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(VP9E_SET_AQ_MODE, 3); + encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); + } + + if (change_bitrate_ && video->frame() == 120) { + change_bitrate_ = false; + cfg_.rc_target_bitrate = 500; + encoder->Config(&cfg_); + } + } + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + set_cpu_used_ = GET_PARAM(2); + } + + virtual void DecompressedFrameHook(const vpx_image_t &img, + vpx_codec_pts_t pts) { + frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h)); + } + + void DefaultConfig() { + cfg_.g_w = 352; + cfg_.g_h = 288; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_end_usage = VPX_CBR; + cfg_.kf_mode = VPX_KF_AUTO; + cfg_.g_lag_in_frames = 0; + cfg_.kf_min_dist = cfg_.kf_max_dist = 3000; + // Enable dropped frames. + cfg_.rc_dropframe_thresh = 1; + // Enable error_resilience mode. + cfg_.g_error_resilient = 1; + // Enable dynamic resizing. + cfg_.rc_resize_allowed = 1; + // Run at low bitrate. + cfg_.rc_target_bitrate = 200; + } + + std::vector< FrameInfo > frame_info_list_; + int set_cpu_used_; + bool change_bitrate_; +}; + +// Verify the dynamic resizer behavior for real time, 1 pass CBR mode. +// Run at low bitrate, with resize_allowed = 1, and verify that we get +// one resize down event. +TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDown) { + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 299); + DefaultConfig(); + change_bitrate_ = false; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + unsigned int last_w = cfg_.g_w; + unsigned int last_h = cfg_.g_h; + int resize_count = 0; + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + if (info->w != last_w || info->h != last_h) { + // Verify that resize down occurs. + ASSERT_LT(info->w, last_w); + ASSERT_LT(info->h, last_h); + last_w = info->w; + last_h = info->h; + resize_count++; + } + } + + // Verify that we get 1 resize down event in this test. + ASSERT_EQ(1, resize_count) << "Resizing should occur."; +} + +// Verify the dynamic resizer behavior for real time, 1 pass CBR mode. +// Start at low target bitrate, raise the bitrate in the middle of the clip, +// scaling-up should occur after bitrate changed. +TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDownUpChangeBitRate) { + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 299); + DefaultConfig(); + change_bitrate_ = true; + // Disable dropped frames. + cfg_.rc_dropframe_thresh = 0; + // Starting bitrate low. + cfg_.rc_target_bitrate = 100; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + unsigned int last_w = cfg_.g_w; + unsigned int last_h = cfg_.g_h; + int resize_count = 0; + for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + if (info->w != last_w || info->h != last_h) { + resize_count++; + if (resize_count == 1) { + // Verify that resize down occurs. + ASSERT_LT(info->w, last_w); + ASSERT_LT(info->h, last_h); + } else if (resize_count == 2) { + // Verify that resize up occurs. + ASSERT_GT(info->w, last_w); + ASSERT_GT(info->h, last_h); + } + last_w = info->w; + last_h = info->h; + } + } + + // Verify that we get 2 resize events in this test. + ASSERT_EQ(2, resize_count) << "Resizing should occur twice."; +} + vpx_img_fmt_t CspForFrameNumber(int frame) { if (frame < 10) return VPX_IMG_FMT_I420; @@ -371,6 +524,9 @@ VP9_INSTANTIATE_TEST_CASE(ResizeTest, ::testing::Values(::libvpx_test::kRealTime)); VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest, ::testing::Values(::libvpx_test::kOnePassBest)); +VP9_INSTANTIATE_TEST_CASE(ResizeInternalRealtimeTest, + ::testing::Values(::libvpx_test::kRealTime), + ::testing::Range(5, 9)); VP9_INSTANTIATE_TEST_CASE(ResizeCspTest, ::testing::Values(::libvpx_test::kRealTime)); } // namespace diff --git a/libvpx/test/sixtap_predict_test.cc b/libvpx/test/sixtap_predict_test.cc index 8c7c98d8..1e682e7b 100644 --- a/libvpx/test/sixtap_predict_test.cc +++ b/libvpx/test/sixtap_predict_test.cc @@ -201,7 +201,7 @@ const SixtapPredictFunc sixtap_16x16_neon = vp8_sixtap_predict16x16_neon; const SixtapPredictFunc sixtap_8x8_neon = vp8_sixtap_predict8x8_neon; const SixtapPredictFunc sixtap_8x4_neon = vp8_sixtap_predict8x4_neon; INSTANTIATE_TEST_CASE_P( - DISABLED_NEON, SixtapPredictTest, ::testing::Values( + NEON, SixtapPredictTest, ::testing::Values( make_tuple(16, 16, sixtap_16x16_neon), make_tuple(8, 8, sixtap_8x8_neon), make_tuple(8, 4, sixtap_8x4_neon))); diff --git a/libvpx/test/superframe_test.cc b/libvpx/test/superframe_test.cc index a8102b75..90aa75b4 100644 --- a/libvpx/test/superframe_test.cc +++ b/libvpx/test/superframe_test.cc @@ -16,8 +16,13 @@ namespace { +const int kTestMode = 0; +const int kSuperframeSyntax = 1; + +typedef std::tr1::tuple<libvpx_test::TestMode,int> SuperframeTestParam; + class SuperframeTest : public ::libvpx_test::EncoderTest, - public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> { + public ::libvpx_test::CodecTestWithParam<SuperframeTestParam> { protected: SuperframeTest() : EncoderTest(GET_PARAM(0)), modified_buf_(NULL), last_sf_pts_(0) {} @@ -25,9 +30,13 @@ class SuperframeTest : public ::libvpx_test::EncoderTest, virtual void SetUp() { InitializeConfig(); - SetMode(GET_PARAM(1)); + const SuperframeTestParam input = GET_PARAM(1); + const libvpx_test::TestMode mode = std::tr1::get<kTestMode>(input); + const int syntax = std::tr1::get<kSuperframeSyntax>(input); + SetMode(mode); sf_count_ = 0; sf_count_max_ = INT_MAX; + is_vp10_style_superframe_ = syntax; } virtual void TearDown() { @@ -50,7 +59,8 @@ class SuperframeTest : public ::libvpx_test::EncoderTest, const uint8_t marker = buffer[pkt->data.frame.sz - 1]; const int frames = (marker & 0x7) + 1; const int mag = ((marker >> 3) & 3) + 1; - const unsigned int index_sz = 2 + mag * frames; + const unsigned int index_sz = + 2 + mag * (frames - is_vp10_style_superframe_); if ((marker & 0xe0) == 0xc0 && pkt->data.frame.sz >= index_sz && buffer[pkt->data.frame.sz - index_sz] == marker) { @@ -75,6 +85,7 @@ class SuperframeTest : public ::libvpx_test::EncoderTest, return pkt; } + int is_vp10_style_superframe_; int sf_count_; int sf_count_max_; vpx_codec_cx_pkt_t modified_pkt_; @@ -92,9 +103,11 @@ TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) { EXPECT_EQ(sf_count_, 1); } -VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Values( - ::libvpx_test::kTwoPassGood)); +VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine( + ::testing::Values(::libvpx_test::kTwoPassGood), + ::testing::Values(0))); -VP10_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Values( - ::libvpx_test::kTwoPassGood)); +VP10_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine( + ::testing::Values(::libvpx_test::kTwoPassGood), + ::testing::Values(CONFIG_MISC_FIXES))); } // namespace diff --git a/libvpx/test/test-data.mk b/libvpx/test/test-data.mk index dda1c182..4280b35f 100644 --- a/libvpx/test/test-data.mk +++ b/libvpx/test/test-data.mk @@ -18,6 +18,7 @@ LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv +LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += desktop_credits.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m @@ -687,8 +688,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm.md5 endif # CONFIG_VP9_HIGHBITDEPTH # Invalid files for testing libvpx error checking. -LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm -LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm.res +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm diff --git a/libvpx/test/test-data.sha1 b/libvpx/test/test-data.sha1 index 3590f4e3..4e4ac623 100644 --- a/libvpx/test/test-data.sha1 +++ b/libvpx/test/test-data.sha1 @@ -6,8 +6,8 @@ b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv 456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res c123d1f9f02fb4143abb5e271916e3a3080de8f6 *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf 456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res -fe346136b9b8c1e6f6084cc106485706915795e4 *invalid-vp90-01-v2.webm -25751f5d3b05ff03f0719ad42cd625348eb8961e *invalid-vp90-01-v2.webm.res +fe346136b9b8c1e6f6084cc106485706915795e4 *invalid-vp90-01-v3.webm +5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-vp90-01-v3.webm.res d78e2fceba5ac942246503ec8366f879c4775ca5 *invalid-vp90-02-v2.webm 8e2eff4af87d2b561cce2365713269e301457ef3 *invalid-vp90-02-v2.webm.res df1a1453feb3c00d7d89746c7003b4163523bff3 *invalid-vp90-03-v3.webm @@ -743,3 +743,4 @@ d06285d109ecbaef63b0cbcc44d70a129186f51c *invalid-vp90-2-03-size-224x196.webm.iv e60d859b0ef2b331b21740cf6cb83fabe469b079 *invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf 0ae808dca4d3c1152a9576e14830b6faa39f1b4a *invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf.res 9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m +5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m diff --git a/libvpx/test/test.mk b/libvpx/test/test.mk index 6bb08bed..8d662448 100644 --- a/libvpx/test/test.mk +++ b/libvpx/test/test.mk @@ -36,6 +36,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += invalid_file_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_frame_parallel_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_refresh_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc @@ -110,7 +111,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc LIBVPX_TEST_SRCS-yes += idct_test.cc -LIBVPX_TEST_SRCS-yes += intrapred_test.cc LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc LIBVPX_TEST_SRCS-yes += vpx_scale_test.cc @@ -167,6 +167,10 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) := test_intra_pred_speed.cc TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) += ../md5_utils.h ../md5_utils.c +## VP10 +LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm_test.cc + endif # CONFIG_SHARED include $(SRC_PATH_BARE)/test/test-data.mk diff --git a/libvpx/test/test_libvpx.cc b/libvpx/test/test_libvpx.cc index 26499174..005ea8d1 100644 --- a/libvpx/test/test_libvpx.cc +++ b/libvpx/test/test_libvpx.cc @@ -26,6 +26,7 @@ extern void vpx_dsp_rtcd(); extern void vpx_scale_rtcd(); } +#if ARCH_X86 || ARCH_X86_64 static void append_negative_gtest_filter(const char *str) { std::string filter = ::testing::FLAGS_gtest_filter; // Negative patterns begin with one '-' followed by a ':' separated list. @@ -33,6 +34,7 @@ static void append_negative_gtest_filter(const char *str) { filter += str; ::testing::FLAGS_gtest_filter = filter; } +#endif // ARCH_X86 || ARCH_X86_64 int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); @@ -55,7 +57,7 @@ int main(int argc, char **argv) { append_negative_gtest_filter(":AVX.*:AVX/*"); if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter(":AVX2.*:AVX2/*"); -#endif +#endif // ARCH_X86 || ARCH_X86_64 #if !CONFIG_SHARED // Shared library builds don't support whitebox tests diff --git a/libvpx/test/util.h b/libvpx/test/util.h index 3c45721f..b27bffa9 100644 --- a/libvpx/test/util.h +++ b/libvpx/test/util.h @@ -19,8 +19,7 @@ // Macros #define GET_PARAM(k) std::tr1::get< k >(GetParam()) -static double compute_psnr(const vpx_image_t *img1, - const vpx_image_t *img2) { +inline double compute_psnr(const vpx_image_t *img1, const vpx_image_t *img2) { assert((img1->fmt == img2->fmt) && (img1->d_w == img2->d_w) && (img1->d_h == img2->d_h)); diff --git a/libvpx/test/video_source.h b/libvpx/test/video_source.h index 63294d14..ade323e7 100644 --- a/libvpx/test/video_source.h +++ b/libvpx/test/video_source.h @@ -11,6 +11,9 @@ #define TEST_VIDEO_SOURCE_H_ #if defined(_WIN32) +#undef NOMINMAX +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN #include <windows.h> #endif #include <cstdio> @@ -48,7 +51,7 @@ static std::string GetDataPath() { #undef TO_STRING #undef STRINGIFY -static FILE *OpenTestDataFile(const std::string& file_name) { +inline FILE *OpenTestDataFile(const std::string& file_name) { const std::string path_to_source = GetDataPath() + "/" + file_name; return fopen(path_to_source.c_str(), "rb"); } diff --git a/libvpx/test/vp10_dct_test.cc b/libvpx/test/vp10_dct_test.cc new file mode 100644 index 00000000..b2c301ae --- /dev/null +++ b/libvpx/test/vp10_dct_test.cc @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> +#include <stdlib.h> +#include <new> + +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/util.h" +#include "./vpx_config.h" +#include "vpx_ports/msvc.h" + +#undef CONFIG_COEFFICIENT_RANGE_CHECKING +#define CONFIG_COEFFICIENT_RANGE_CHECKING 1 +#include "vp10/encoder/dct.c" + +using libvpx_test::ACMRandom; + +namespace { +void reference_dct_1d(const double *in, double *out, int size) { + const double PI = 3.141592653589793238462643383279502884; + const double kInvSqrt2 = 0.707106781186547524400844362104; + for (int k = 0; k < size; ++k) { + out[k] = 0; + for (int n = 0; n < size; ++n) { + out[k] += in[n] * cos(PI * (2 * n + 1) * k / (2 * size)); + } + if (k == 0) + out[k] = out[k] * kInvSqrt2; + } +} + +typedef void (*FdctFuncRef)(const double *in, double *out, int size); +typedef void (*IdctFuncRef)(const double *in, double *out, int size); +typedef void (*FdctFunc)(const tran_low_t *in, tran_low_t *out); +typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out); + +class TransTestBase { + public: + virtual ~TransTestBase() {} + + protected: + void RunFwdAccuracyCheck() { + tran_low_t *input = new tran_low_t[txfm_size_]; + tran_low_t *output = new tran_low_t[txfm_size_]; + double *ref_input = new double[txfm_size_]; + double *ref_output = new double[txfm_size_]; + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 5000; + for (int ti = 0; ti < count_test_block; ++ti) { + for (int ni = 0; ni < txfm_size_; ++ni) { + input[ni] = rnd.Rand8() - rnd.Rand8(); + ref_input[ni] = static_cast<double>(input[ni]); + } + + fwd_txfm_(input, output); + fwd_txfm_ref_(ref_input, ref_output, txfm_size_); + + for (int ni = 0; ni < txfm_size_; ++ni) { + EXPECT_LE( + abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))), + max_error_); + } + } + + delete[] input; + delete[] output; + delete[] ref_input; + delete[] ref_output; + } + + double max_error_; + int txfm_size_; + FdctFunc fwd_txfm_; + FdctFuncRef fwd_txfm_ref_; +}; + +typedef std::tr1::tuple<FdctFunc, FdctFuncRef, int, int> FdctParam; +class Vp10FwdTxfm + : public TransTestBase, + public ::testing::TestWithParam<FdctParam> { + public: + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + fwd_txfm_ref_ = GET_PARAM(1); + txfm_size_ = GET_PARAM(2); + max_error_ = GET_PARAM(3); + } + virtual void TearDown() {} +}; + +TEST_P(Vp10FwdTxfm, RunFwdAccuracyCheck) { + RunFwdAccuracyCheck(); +} + +INSTANTIATE_TEST_CASE_P( + C, Vp10FwdTxfm, + ::testing::Values( + FdctParam(&fdct4, &reference_dct_1d, 4, 1), + FdctParam(&fdct8, &reference_dct_1d, 8, 1), + FdctParam(&fdct16, &reference_dct_1d, 16, 2))); +} // namespace diff --git a/libvpx/test/vp10_inv_txfm_test.cc b/libvpx/test/vp10_inv_txfm_test.cc new file mode 100644 index 00000000..c49081ef --- /dev/null +++ b/libvpx/test/vp10_inv_txfm_test.cc @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "./vp10_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "vp10/common/blockd.h" +#include "vp10/common/scan.h" +#include "vpx/vpx_integer.h" +#include "vp10/common/vp10_inv_txfm.h" + +using libvpx_test::ACMRandom; + +namespace { +const double PI = 3.141592653589793238462643383279502884; +const double kInvSqrt2 = 0.707106781186547524400844362104; + +void reference_idct_1d(const double *in, double *out, int size) { + for (int n = 0; n < size; ++n) { + out[n] = 0; + for (int k = 0; k < size; ++k) { + if (k == 0) + out[n] += kInvSqrt2 * in[k] * cos(PI * (2 * n + 1) * k / (2 * size)); + else + out[n] += in[k] * cos(PI * (2 * n + 1) * k / (2 * size)); + } + } +} + +typedef void (*IdctFuncRef)(const double *in, double *out, int size); +typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out); + +class TransTestBase { + public: + virtual ~TransTestBase() {} + + protected: + void RunInvAccuracyCheck() { + tran_low_t *input = new tran_low_t[txfm_size_]; + tran_low_t *output = new tran_low_t[txfm_size_]; + double *ref_input = new double[txfm_size_]; + double *ref_output = new double[txfm_size_]; + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 5000; + for (int ti = 0; ti < count_test_block; ++ti) { + for (int ni = 0; ni < txfm_size_; ++ni) { + input[ni] = rnd.Rand8() - rnd.Rand8(); + ref_input[ni] = static_cast<double>(input[ni]); + } + + fwd_txfm_(input, output); + fwd_txfm_ref_(ref_input, ref_output, txfm_size_); + + for (int ni = 0; ni < txfm_size_; ++ni) { + EXPECT_LE( + abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))), + max_error_); + } + } + + delete[] input; + delete[] output; + delete[] ref_input; + delete[] ref_output; + } + + double max_error_; + int txfm_size_; + IdctFunc fwd_txfm_; + IdctFuncRef fwd_txfm_ref_; +}; + +typedef std::tr1::tuple<IdctFunc, IdctFuncRef, int, int> IdctParam; +class Vp10InvTxfm + : public TransTestBase, + public ::testing::TestWithParam<IdctParam> { + public: + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + fwd_txfm_ref_ = GET_PARAM(1); + txfm_size_ = GET_PARAM(2); + max_error_ = GET_PARAM(3); + } + virtual void TearDown() {} +}; + +TEST_P(Vp10InvTxfm, RunInvAccuracyCheck) { + RunInvAccuracyCheck(); +} + +INSTANTIATE_TEST_CASE_P( + C, Vp10InvTxfm, + ::testing::Values( + IdctParam(&vp10_idct4_c, &reference_idct_1d, 4, 1), + IdctParam(&vp10_idct8_c, &reference_idct_1d, 8, 2), + IdctParam(&vp10_idct16_c, &reference_idct_1d, 16, 4), + IdctParam(&vp10_idct32_c, &reference_idct_1d, 32, 6)) +); + +typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride); +typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride); +typedef std::tr1::tuple<FwdTxfmFunc, + InvTxfmFunc, + InvTxfmFunc, + TX_SIZE, int> PartialInvTxfmParam; +const int kMaxNumCoeffs = 1024; +class Vp10PartialIDctTest + : public ::testing::TestWithParam<PartialInvTxfmParam> { + public: + virtual ~Vp10PartialIDctTest() {} + virtual void SetUp() { + ftxfm_ = GET_PARAM(0); + full_itxfm_ = GET_PARAM(1); + partial_itxfm_ = GET_PARAM(2); + tx_size_ = GET_PARAM(3); + last_nonzero_ = GET_PARAM(4); + } + + virtual void TearDown() { libvpx_test::ClearSystemState(); } + + protected: + int last_nonzero_; + TX_SIZE tx_size_; + FwdTxfmFunc ftxfm_; + InvTxfmFunc full_itxfm_; + InvTxfmFunc partial_itxfm_; +}; + +TEST_P(Vp10PartialIDctTest, RunQuantCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int size; + switch (tx_size_) { + case TX_4X4: + size = 4; + break; + case TX_8X8: + size = 8; + break; + case TX_16X16: + size = 16; + break; + case TX_32X32: + size = 32; + break; + default: + FAIL() << "Wrong Size!"; + break; + } + DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]); + + const int count_test_block = 1000; + const int block_size = size * size; + + DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]); + + int max_error = 0; + for (int i = 0; i < count_test_block; ++i) { + // clear out destination buffer + memset(dst1, 0, sizeof(*dst1) * block_size); + memset(dst2, 0, sizeof(*dst2) * block_size); + memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size); + memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size); + + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + for (int i = 0; i < count_test_block; ++i) { + // Initialize a test block with input range [-255, 255]. + if (i == 0) { + for (int j = 0; j < block_size; ++j) + input_extreme_block[j] = 255; + } else if (i == 1) { + for (int j = 0; j < block_size; ++j) + input_extreme_block[j] = -255; + } else { + for (int j = 0; j < block_size; ++j) { + input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255; + } + } + + ftxfm_(input_extreme_block, output_ref_block, size); + + // quantization with maximum allowed step sizes + test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336; + for (int j = 1; j < last_nonzero_; ++j) + test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] + = (output_ref_block[j] / 1828) * 1828; + } + + ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size)); + ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size)); + + for (int j = 0; j < block_size; ++j) { + const int diff = dst1[j] - dst2[j]; + const int error = diff * diff; + if (max_error < error) + max_error = error; + } + } + + EXPECT_EQ(0, max_error) + << "Error: partial inverse transform produces different results"; +} + +TEST_P(Vp10PartialIDctTest, ResultsMatch) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + int size; + switch (tx_size_) { + case TX_4X4: + size = 4; + break; + case TX_8X8: + size = 8; + break; + case TX_16X16: + size = 16; + break; + case TX_32X32: + size = 32; + break; + default: + FAIL() << "Wrong Size!"; + break; + } + DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]); + DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]); + const int count_test_block = 1000; + const int max_coeff = 32766 / 4; + const int block_size = size * size; + int max_error = 0; + for (int i = 0; i < count_test_block; ++i) { + // clear out destination buffer + memset(dst1, 0, sizeof(*dst1) * block_size); + memset(dst2, 0, sizeof(*dst2) * block_size); + memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size); + memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size); + int max_energy_leftover = max_coeff * max_coeff; + for (int j = 0; j < last_nonzero_; ++j) { + int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) * + (rnd.Rand16() - 32768) / 65536); + max_energy_leftover -= coef * coef; + if (max_energy_leftover < 0) { + max_energy_leftover = 0; + coef = 0; + } + test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] = coef; + } + + memcpy(test_coef_block2, test_coef_block1, + sizeof(*test_coef_block2) * block_size); + + ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size)); + ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size)); + + for (int j = 0; j < block_size; ++j) { + const int diff = dst1[j] - dst2[j]; + const int error = diff * diff; + if (max_error < error) + max_error = error; + } + } + + EXPECT_EQ(0, max_error) + << "Error: partial inverse transform produces different results"; +} +using std::tr1::make_tuple; + +INSTANTIATE_TEST_CASE_P( + C, Vp10PartialIDctTest, + ::testing::Values( + make_tuple(&vpx_fdct32x32_c, + &vp10_idct32x32_1024_add_c, + &vp10_idct32x32_34_add_c, + TX_32X32, 34), + make_tuple(&vpx_fdct32x32_c, + &vp10_idct32x32_1024_add_c, + &vp10_idct32x32_1_add_c, + TX_32X32, 1), + make_tuple(&vpx_fdct16x16_c, + &vp10_idct16x16_256_add_c, + &vp10_idct16x16_10_add_c, + TX_16X16, 10), + make_tuple(&vpx_fdct16x16_c, + &vp10_idct16x16_256_add_c, + &vp10_idct16x16_1_add_c, + TX_16X16, 1), + make_tuple(&vpx_fdct8x8_c, + &vp10_idct8x8_64_add_c, + &vp10_idct8x8_12_add_c, + TX_8X8, 12), + make_tuple(&vpx_fdct8x8_c, + &vp10_idct8x8_64_add_c, + &vp10_idct8x8_1_add_c, + TX_8X8, 1), + make_tuple(&vpx_fdct4x4_c, + &vp10_idct4x4_16_add_c, + &vp10_idct4x4_1_add_c, + TX_4X4, 1))); +} // namespace diff --git a/libvpx/test/vp9_arf_freq_test.cc b/libvpx/test/vp9_arf_freq_test.cc index 87ff15b6..89200d40 100644 --- a/libvpx/test/vp9_arf_freq_test.cc +++ b/libvpx/test/vp9_arf_freq_test.cc @@ -230,9 +230,23 @@ VP9_INSTANTIATE_TEST_CASE( ::testing::ValuesIn(kEncodeVectors), ::testing::ValuesIn(kMinArfVectors)); +#if CONFIG_VP9_HIGHBITDEPTH +# if CONFIG_VP10_ENCODER +// TODO(angiebird): 25-29 fail in high bitdepth mode. +INSTANTIATE_TEST_CASE_P( + DISABLED_VP10, ArfFreqTest, + ::testing::Combine( + ::testing::Values(static_cast<const libvpx_test::CodecFactory *>( + &libvpx_test::kVP10)), + ::testing::ValuesIn(kTestVectors), + ::testing::ValuesIn(kEncodeVectors), + ::testing::ValuesIn(kMinArfVectors))); +# endif // CONFIG_VP10_ENCODER +#else VP10_INSTANTIATE_TEST_CASE( ArfFreqTest, ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors), ::testing::ValuesIn(kMinArfVectors)); +#endif // CONFIG_VP9_HIGHBITDEPTH } // namespace diff --git a/libvpx/test/vp9_encoder_parms_get_to_decoder.cc b/libvpx/test/vp9_encoder_parms_get_to_decoder.cc index a02070e4..3ef6022a 100644 --- a/libvpx/test/vp9_encoder_parms_get_to_decoder.cc +++ b/libvpx/test/vp9_encoder_parms_get_to_decoder.cc @@ -14,38 +14,10 @@ #include "test/encode_test_driver.h" #include "test/util.h" #include "test/y4m_video_source.h" -#include "test/yuv_video_source.h" -#include "vp9/decoder/vp9_decoder.h" - -typedef vpx_codec_stream_info_t vp9_stream_info_t; -struct vpx_codec_alg_priv { - vpx_codec_priv_t base; - vpx_codec_dec_cfg_t cfg; - vp9_stream_info_t si; - struct VP9Decoder *pbi; - int postproc_cfg_set; - vp8_postproc_cfg_t postproc_cfg; - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; - vpx_image_t img; - int img_avail; - int flushed; - int invert_tile_order; - int frame_parallel_decode; - - // External frame buffer info to save for VP9 common. - void *ext_priv; // Private data associated with the external frame buffers. - vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb; - vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb; -}; - -static vpx_codec_alg_priv_t *get_alg_priv(vpx_codec_ctx_t *ctx) { - return (vpx_codec_alg_priv_t *)ctx->priv; -} +#include "vp9/vp9_dx_iface.h" namespace { -const unsigned int kFramerate = 50; const int kCpuUsed = 2; struct EncodePerfTestVideo { @@ -66,35 +38,27 @@ struct EncodeParameters { int32_t lossless; int32_t error_resilient; int32_t frame_parallel; + vpx_color_range_t color_range; vpx_color_space_t cs; + int render_size[2]; // TODO(JBB): quantizers / bitrate }; const EncodeParameters kVP9EncodeParameterSet[] = { - {0, 0, 0, 1, 0, VPX_CS_BT_601}, - {0, 0, 0, 0, 0, VPX_CS_BT_709}, - {0, 0, 1, 0, 0, VPX_CS_BT_2020}, - {0, 2, 0, 0, 1, VPX_CS_UNKNOWN}, - // TODO(JBB): Test profiles (requires more work). + {0, 0, 0, 1, 0, VPX_CR_STUDIO_RANGE, VPX_CS_BT_601}, + {0, 0, 0, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_709}, + {0, 0, 1, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_2020}, + {0, 2, 0, 0, 1, VPX_CR_STUDIO_RANGE, VPX_CS_UNKNOWN, { 640, 480 }}, + // TODO(JBB): Test profiles (requires more work). }; -int is_extension_y4m(const char *filename) { - const char *dot = strrchr(filename, '.'); - if (!dot || dot == filename) - return 0; - else - return !strcmp(dot, ".y4m"); -} - class VpxEncoderParmsGetToDecoder : public ::libvpx_test::EncoderTest, - public ::libvpx_test::CodecTestWith2Params<EncodeParameters, \ + public ::libvpx_test::CodecTestWith2Params<EncodeParameters, EncodePerfTestVideo> { protected: VpxEncoderParmsGetToDecoder() - : EncoderTest(GET_PARAM(0)), - encode_parms(GET_PARAM(1)) { - } + : EncoderTest(GET_PARAM(0)), encode_parms(GET_PARAM(1)) {} virtual ~VpxEncoderParmsGetToDecoder() {} @@ -112,6 +76,7 @@ class VpxEncoderParmsGetToDecoder ::libvpx_test::Encoder *encoder) { if (video->frame() == 1) { encoder->Control(VP9E_SET_COLOR_SPACE, encode_parms.cs); + encoder->Control(VP9E_SET_COLOR_RANGE, encode_parms.color_range); encoder->Control(VP9E_SET_LOSSLESS, encode_parms.lossless); encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, encode_parms.frame_parallel); @@ -122,37 +87,44 @@ class VpxEncoderParmsGetToDecoder encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); + if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) + encoder->Control(VP9E_SET_RENDER_SIZE, encode_parms.render_size); } } virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec, - const libvpx_test::VideoSource& video, + const libvpx_test::VideoSource &video, libvpx_test::Decoder *decoder) { - vpx_codec_ctx_t* vp9_decoder = decoder->GetDecoder(); - vpx_codec_alg_priv_t* priv = - (vpx_codec_alg_priv_t*) get_alg_priv(vp9_decoder); - - VP9Decoder* pbi = priv->pbi; - VP9_COMMON* common = &pbi->common; + vpx_codec_ctx_t *const vp9_decoder = decoder->GetDecoder(); + vpx_codec_alg_priv_t *const priv = + reinterpret_cast<vpx_codec_alg_priv_t *>(vp9_decoder->priv); + FrameWorkerData *const worker_data = + reinterpret_cast<FrameWorkerData *>(priv->frame_workers[0].data1); + VP9_COMMON *const common = &worker_data->pbi->common; if (encode_parms.lossless) { - EXPECT_EQ(common->base_qindex, 0); - EXPECT_EQ(common->y_dc_delta_q, 0); - EXPECT_EQ(common->uv_dc_delta_q, 0); - EXPECT_EQ(common->uv_ac_delta_q, 0); - EXPECT_EQ(common->tx_mode, ONLY_4X4); + EXPECT_EQ(0, common->base_qindex); + EXPECT_EQ(0, common->y_dc_delta_q); + EXPECT_EQ(0, common->uv_dc_delta_q); + EXPECT_EQ(0, common->uv_ac_delta_q); + EXPECT_EQ(ONLY_4X4, common->tx_mode); } - EXPECT_EQ(common->error_resilient_mode, encode_parms.error_resilient); + EXPECT_EQ(encode_parms.error_resilient, common->error_resilient_mode); if (encode_parms.error_resilient) { - EXPECT_EQ(common->frame_parallel_decoding_mode, 1); - EXPECT_EQ(common->use_prev_frame_mvs, 0); + EXPECT_EQ(1, common->frame_parallel_decoding_mode); + EXPECT_EQ(0, common->use_prev_frame_mvs); } else { - EXPECT_EQ(common->frame_parallel_decoding_mode, - encode_parms.frame_parallel); + EXPECT_EQ(encode_parms.frame_parallel, + common->frame_parallel_decoding_mode); + } + EXPECT_EQ(encode_parms.color_range, common->color_range); + EXPECT_EQ(encode_parms.cs, common->color_space); + if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) { + EXPECT_EQ(encode_parms.render_size[0], common->render_width); + EXPECT_EQ(encode_parms.render_size[1], common->render_height); } - EXPECT_EQ(common->color_space, encode_parms.cs); - EXPECT_EQ(common->log2_tile_cols, encode_parms.tile_cols); - EXPECT_EQ(common->log2_tile_rows, encode_parms.tile_rows); + EXPECT_EQ(encode_parms.tile_cols, common->log2_tile_cols); + EXPECT_EQ(encode_parms.tile_rows, common->log2_tile_rows); EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError(); return VPX_CODEC_OK == res_dec; @@ -164,35 +136,18 @@ class VpxEncoderParmsGetToDecoder EncodeParameters encode_parms; }; -// TODO(hkuang): This test conflicts with frame parallel decode. So disable it -// for now until fix. -TEST_P(VpxEncoderParmsGetToDecoder, DISABLED_BitstreamParms) { +TEST_P(VpxEncoderParmsGetToDecoder, BitstreamParms) { init_flags_ = VPX_CODEC_USE_PSNR; - libvpx_test::VideoSource *video; - if (is_extension_y4m(test_video_.name)) { - video = new libvpx_test::Y4mVideoSource(test_video_.name, - 0, test_video_.frames); - } else { - video = new libvpx_test::YUVVideoSource(test_video_.name, - VPX_IMG_FMT_I420, - test_video_.width, - test_video_.height, - kFramerate, 1, 0, - test_video_.frames); - } + libvpx_test::VideoSource *const video = + new libvpx_test::Y4mVideoSource(test_video_.name, 0, test_video_.frames); + ASSERT_TRUE(video != NULL); ASSERT_NO_FATAL_FAILURE(RunLoop(video)); - delete(video); + delete video; } -VP9_INSTANTIATE_TEST_CASE( - VpxEncoderParmsGetToDecoder, - ::testing::ValuesIn(kVP9EncodeParameterSet), - ::testing::ValuesIn(kVP9EncodePerfTestVectors)); - -VP10_INSTANTIATE_TEST_CASE( - VpxEncoderParmsGetToDecoder, - ::testing::ValuesIn(kVP9EncodeParameterSet), - ::testing::ValuesIn(kVP9EncodePerfTestVectors)); +VP9_INSTANTIATE_TEST_CASE(VpxEncoderParmsGetToDecoder, + ::testing::ValuesIn(kVP9EncodeParameterSet), + ::testing::ValuesIn(kVP9EncodePerfTestVectors)); } // namespace diff --git a/libvpx/test/vp9_end_to_end_test.cc b/libvpx/test/vp9_end_to_end_test.cc index e100eb95..be1fa68c 100644 --- a/libvpx/test/vp9_end_to_end_test.cc +++ b/libvpx/test/vp9_end_to_end_test.cc @@ -187,9 +187,23 @@ VP9_INSTANTIATE_TEST_CASE( ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kCpuUsedVectors)); +#if CONFIG_VP9_HIGHBITDEPTH +# if CONFIG_VP10_ENCODER +// TODO(angiebird): many fail in high bitdepth mode. +INSTANTIATE_TEST_CASE_P( + DISABLED_VP10, EndToEndTestLarge, + ::testing::Combine( + ::testing::Values(static_cast<const libvpx_test::CodecFactory *>( + &libvpx_test::kVP10)), + ::testing::ValuesIn(kEncodingModeVectors), + ::testing::ValuesIn(kTestVectors), + ::testing::ValuesIn(kCpuUsedVectors))); +# endif // CONFIG_VP10_ENCODER +#else VP10_INSTANTIATE_TEST_CASE( EndToEndTestLarge, ::testing::ValuesIn(kEncodingModeVectors), ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kCpuUsedVectors)); +#endif // CONFIG_VP9_HIGHBITDEPTH } // namespace diff --git a/libvpx/test/vp9_error_block_test.cc b/libvpx/test/vp9_error_block_test.cc index 8c5d5a2e..77b12ea8 100644 --- a/libvpx/test/vp9_error_block_test.cc +++ b/libvpx/test/vp9_error_block_test.cc @@ -67,12 +67,22 @@ TEST_P(ErrorBlockTest, OperationCheck) { int64_t ret; int64_t ref_ssz; int64_t ref_ret; + const int msb = bit_depth_ + 8 - 1; for (int i = 0; i < kNumIterations; ++i) { int err_count = 0; block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64 for (int j = 0; j < block_size; j++) { - coeff[j] = rnd(2 << 20) - (1 << 20); - dqcoeff[j] = rnd(2 << 20) - (1 << 20); + // coeff and dqcoeff will always have at least the same sign, and this + // can be used for optimization, so generate test input precisely. + if (rnd(2)) { + // Positive number + coeff[j] = rnd(1 << msb); + dqcoeff[j] = rnd(1 << msb); + } else { + // Negative number + coeff[j] = -rnd(1 << msb); + dqcoeff[j] = -rnd(1 << msb); + } } ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_); @@ -85,7 +95,7 @@ TEST_P(ErrorBlockTest, OperationCheck) { err_count_total += err_count; } EXPECT_EQ(0, err_count_total) - << "Error: Error Block Test, C output doesn't match SSE2 output. " + << "Error: Error Block Test, C output doesn't match optimized output. " << "First failed at test case " << first_failure; } @@ -100,23 +110,36 @@ TEST_P(ErrorBlockTest, ExtremeValues) { int64_t ret; int64_t ref_ssz; int64_t ref_ret; - int max_val = ((1 << 20) - 1); + const int msb = bit_depth_ + 8 - 1; + int max_val = ((1 << msb) - 1); for (int i = 0; i < kNumIterations; ++i) { int err_count = 0; - int k = (i / 9) % 5; + int k = (i / 9) % 9; // Change the maximum coeff value, to test different bit boundaries - if ( k == 4 && (i % 9) == 0 ) { + if ( k == 8 && (i % 9) == 0 ) { max_val >>= 1; } block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64 for (int j = 0; j < block_size; j++) { - if (k < 4) { // Test at maximum values - coeff[j] = k % 2 ? max_val : -max_val; - dqcoeff[j] = (k >> 1) % 2 ? max_val : -max_val; + if (k < 4) { + // Test at positive maximum values + coeff[j] = k % 2 ? max_val : 0; + dqcoeff[j] = (k >> 1) % 2 ? max_val : 0; + } else if (k < 8) { + // Test at negative maximum values + coeff[j] = k % 2 ? -max_val : 0; + dqcoeff[j] = (k >> 1) % 2 ? -max_val : 0; } else { - coeff[j] = rnd(2 << 14) - (1 << 14); - dqcoeff[j] = rnd(2 << 14) - (1 << 14); + if (rnd(2)) { + // Positive number + coeff[j] = rnd(1 << 14); + dqcoeff[j] = rnd(1 << 14); + } else { + // Negative number + coeff[j] = -rnd(1 << 14); + dqcoeff[j] = -rnd(1 << 14); + } } } ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, @@ -130,13 +153,30 @@ TEST_P(ErrorBlockTest, ExtremeValues) { err_count_total += err_count; } EXPECT_EQ(0, err_count_total) - << "Error: Error Block Test, C output doesn't match SSE2 output. " + << "Error: Error Block Test, C output doesn't match optimized output. " << "First failed at test case " << first_failure; } using std::tr1::make_tuple; +#if CONFIG_USE_X86INC +int64_t wrap_vp9_highbd_block_error_8bit_c(const tran_low_t *coeff, + const tran_low_t *dqcoeff, + intptr_t block_size, + int64_t *ssz, int bps) { + assert(bps == 8); + return vp9_highbd_block_error_8bit_c(coeff, dqcoeff, block_size, ssz); +} + #if HAVE_SSE2 +int64_t wrap_vp9_highbd_block_error_8bit_sse2(const tran_low_t *coeff, + const tran_low_t *dqcoeff, + intptr_t block_size, + int64_t *ssz, int bps) { + assert(bps == 8); + return vp9_highbd_block_error_8bit_sse2(coeff, dqcoeff, block_size, ssz); +} + INSTANTIATE_TEST_CASE_P( SSE2, ErrorBlockTest, ::testing::Values( @@ -145,7 +185,27 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c, VPX_BITS_12), make_tuple(&vp9_highbd_block_error_sse2, - &vp9_highbd_block_error_c, VPX_BITS_8))); + &vp9_highbd_block_error_c, VPX_BITS_8), + make_tuple(&wrap_vp9_highbd_block_error_8bit_sse2, + &wrap_vp9_highbd_block_error_8bit_c, VPX_BITS_8))); #endif // HAVE_SSE2 + +#if HAVE_AVX +int64_t wrap_vp9_highbd_block_error_8bit_avx(const tran_low_t *coeff, + const tran_low_t *dqcoeff, + intptr_t block_size, + int64_t *ssz, int bps) { + assert(bps == 8); + return vp9_highbd_block_error_8bit_avx(coeff, dqcoeff, block_size, ssz); +} + +INSTANTIATE_TEST_CASE_P( + AVX, ErrorBlockTest, + ::testing::Values( + make_tuple(&wrap_vp9_highbd_block_error_8bit_avx, + &wrap_vp9_highbd_block_error_8bit_c, VPX_BITS_8))); +#endif // HAVE_AVX + +#endif // CONFIG_USE_X86INC #endif // CONFIG_VP9_HIGHBITDEPTH } // namespace diff --git a/libvpx/test/vp9_thread_test.cc b/libvpx/test/vp9_thread_test.cc index 233e1b1a..92e4b968 100644 --- a/libvpx/test/vp9_thread_test.cc +++ b/libvpx/test/vp9_thread_test.cc @@ -190,7 +190,7 @@ string DecodeFile(const string& filename, int num_threads) { void DecodeFiles(const FileList files[]) { for (const FileList *iter = files; iter->name != NULL; ++iter) { SCOPED_TRACE(iter->name); - for (int t = 2; t <= 8; ++t) { + for (int t = 1; t <= 8; ++t) { EXPECT_EQ(iter->expected_md5, DecodeFile(iter->name, t)) << "threads = " << t; } @@ -235,13 +235,13 @@ TEST(VPxWorkerThreadTest, TestSerialInterface) { EXPECT_EQ(expected_md5, DecodeFile(filename, 2)); } -TEST(VP9DecodeMultiThreadedTest, Decode) { +TEST(VP9DecodeMultiThreadedTest, NoTilesNonFrameParallel) { // no tiles or frame parallel; this exercises loop filter threading. EXPECT_EQ("b35a1b707b28e82be025d960aba039bc", DecodeFile("vp90-2-03-size-226x226.webm", 2)); } -TEST(VP9DecodeMultiThreadedTest, Decode2) { +TEST(VP9DecodeMultiThreadedTest, FrameParallel) { static const FileList files[] = { { "vp90-2-08-tile_1x2_frame_parallel.webm", "68ede6abd66bae0a2edf2eb9232241b6" }, @@ -255,8 +255,7 @@ TEST(VP9DecodeMultiThreadedTest, Decode2) { DecodeFiles(files); } -// Test tile quantity changes within one file. -TEST(VP9DecodeMultiThreadedTest, Decode3) { +TEST(VP9DecodeMultiThreadedTest, FrameParallelResize) { static const FileList files[] = { { "vp90-2-14-resize-fp-tiles-1-16.webm", "0cd5e632c326297e975f38949c31ea94" }, @@ -307,6 +306,19 @@ TEST(VP9DecodeMultiThreadedTest, Decode3) { DecodeFiles(files); } + +TEST(VP9DecodeMultiThreadedTest, NonFrameParallel) { + static const FileList files[] = { + { "vp90-2-08-tile_1x2.webm", "570b4a5d5a70d58b5359671668328a16" }, + { "vp90-2-08-tile_1x4.webm", "988d86049e884c66909d2d163a09841a" }, + { "vp90-2-08-tile_1x8.webm", "0941902a52e9092cb010905eab16364c" }, + { "vp90-2-08-tile-4x1.webm", "06505aade6647c583c8e00a2f582266f" }, + { "vp90-2-08-tile-4x4.webm", "85c2299892460d76e2c600502d52bfe2" }, + { NULL, NULL } + }; + + DecodeFiles(files); +} #endif // CONFIG_WEBM_IO INSTANTIATE_TEST_CASE_P(Synchronous, VPxWorkerThreadTest, ::testing::Bool()); diff --git a/libvpx/test/y4m_video_source.h b/libvpx/test/y4m_video_source.h index 378e75bf..03d9388d 100644 --- a/libvpx/test/y4m_video_source.h +++ b/libvpx/test/y4m_video_source.h @@ -9,6 +9,7 @@ */ #ifndef TEST_Y4M_VIDEO_SOURCE_H_ #define TEST_Y4M_VIDEO_SOURCE_H_ +#include <algorithm> #include <string> #include "test/video_source.h" @@ -91,6 +92,18 @@ class Y4mVideoSource : public VideoSource { y4m_input_fetch_frame(&y4m_, input_file_, img_.get()); } + // Swap buffers with another y4m source. This allows reading a new frame + // while keeping the old frame around. A whole Y4mSource is required and + // not just a vpx_image_t because of how the y4m reader manipulates + // vpx_image_t internals, + void SwapBuffers(Y4mVideoSource *other) { + std::swap(other->y4m_.dst_buf, y4m_.dst_buf); + vpx_image_t *tmp; + tmp = other->img_.release(); + other->img_.reset(img_.release()); + img_.reset(tmp); + } + protected: void CloseSource() { y4m_input_close(&y4m_); diff --git a/libvpx/third_party/libwebm/README.libvpx b/libvpx/third_party/libwebm/README.libvpx index 91875e11..2989d3d8 100644 --- a/libvpx/third_party/libwebm/README.libvpx +++ b/libvpx/third_party/libwebm/README.libvpx @@ -1,7 +1,10 @@ URL: https://chromium.googlesource.com/webm/libwebm -Version: 2dec09426ab62b794464cc9971bd135b4d313e65 +Version: 476366249e1fda7710a389cd41c57db42305e0d4 License: BSD License File: LICENSE.txt Description: libwebm is used to handle WebM container I/O. + +Local Changes: +* <none> diff --git a/libvpx/third_party/libwebm/mkvmuxer.hpp b/libvpx/third_party/libwebm/mkvmuxer.hpp index 497ad4cf..03a002c9 100644 --- a/libvpx/third_party/libwebm/mkvmuxer.hpp +++ b/libvpx/third_party/libwebm/mkvmuxer.hpp @@ -528,7 +528,7 @@ class Tracks { public: // Audio and video type defined by the Matroska specs. enum { kVideo = 0x1, kAudio = 0x2 }; - // Opus, Vorbis, VP8, and VP9 codec ids defined by the Matroska specs. + static const char kOpusCodecId[]; static const char kVorbisCodecId[]; static const char kVp8CodecId[]; diff --git a/libvpx/third_party/libwebm/mkvparser.cpp b/libvpx/third_party/libwebm/mkvparser.cpp index fc01be52..f2855d50 100644 --- a/libvpx/third_party/libwebm/mkvparser.cpp +++ b/libvpx/third_party/libwebm/mkvparser.cpp @@ -7,45 +7,66 @@ // be found in the AUTHORS file in the root of the source tree. #include "mkvparser.hpp" + +#if defined(_MSC_VER) && _MSC_VER < 1800 +#include <float.h> // _isnan() / _finite() +#define MSC_COMPAT +#endif + #include <cassert> +#include <climits> +#include <cmath> #include <cstring> #include <new> -#include <climits> + +#include "webmids.hpp" #ifdef _MSC_VER // Disable MSVC warnings that suggest making code non-portable. #pragma warning(disable : 4996) #endif -mkvparser::IMkvReader::~IMkvReader() {} +namespace mkvparser { + +#ifdef MSC_COMPAT +inline bool isnan(double val) { return !!_isnan(val); } +inline bool isinf(double val) { return !_finite(val); } +#else +inline bool isnan(double val) { return std::isnan(val); } +inline bool isinf(double val) { return std::isinf(val); } +#endif // MSC_COMPAT + +IMkvReader::~IMkvReader() {} -void mkvparser::GetVersion(int& major, int& minor, int& build, int& revision) { +template<typename Type> Type* SafeArrayAlloc(unsigned long long num_elements, + unsigned long long element_size) { + if (num_elements == 0 || element_size == 0) + return NULL; + + const size_t kMaxAllocSize = 0x80000000; // 2GiB + const unsigned long long num_bytes = num_elements * element_size; + if (element_size > (kMaxAllocSize / num_elements)) + return NULL; + if (num_bytes != static_cast<size_t>(num_bytes)) + return NULL; + + return new (std::nothrow) Type[static_cast<size_t>(num_bytes)]; +} + +void GetVersion(int& major, int& minor, int& build, int& revision) { major = 1; minor = 0; build = 0; revision = 30; } -long long mkvparser::ReadUInt(IMkvReader* pReader, long long pos, long& len) { - assert(pReader); - assert(pos >= 0); - - int status; - - //#ifdef _DEBUG - // long long total, available; - // status = pReader->Length(&total, &available); - // assert(status >= 0); - // assert((total < 0) || (available <= total)); - // assert(pos < available); - // assert((available - pos) >= 1); //assume here max u-int len is 8 - //#endif +long long ReadUInt(IMkvReader* pReader, long long pos, long& len) { + if (!pReader || pos < 0) + return E_FILE_FORMAT_INVALID; len = 1; - unsigned char b; - - status = pReader->Read(pos, 1, &b); + int status = pReader->Read(pos, 1, &b); if (status < 0) // error or underflow return status; @@ -63,10 +84,6 @@ long long mkvparser::ReadUInt(IMkvReader* pReader, long long pos, long& len) { ++len; } - //#ifdef _DEBUG - // assert((available - pos) >= len); - //#endif - long long result = b & (~m); ++pos; @@ -92,16 +109,76 @@ long long mkvparser::ReadUInt(IMkvReader* pReader, long long pos, long& len) { return result; } -long long mkvparser::GetUIntLength(IMkvReader* pReader, long long pos, - long& len) { - assert(pReader); - assert(pos >= 0); +// Reads an EBML ID and returns it. +// An ID must at least 1 byte long, cannot exceed 4, and its value must be +// greater than 0. +// See known EBML values and EBMLMaxIDLength: +// http://www.matroska.org/technical/specs/index.html +// Returns the ID, or a value less than 0 to report an error while reading the +// ID. +long long ReadID(IMkvReader* pReader, long long pos, long& len) { + if (pReader == NULL || pos < 0) + return E_FILE_FORMAT_INVALID; + + // Read the first byte. The length in bytes of the ID is determined by + // finding the first set bit in the first byte of the ID. + unsigned char temp_byte = 0; + int read_status = pReader->Read(pos, 1, &temp_byte); + + if (read_status < 0) + return E_FILE_FORMAT_INVALID; + else if (read_status > 0) // No data to read. + return E_BUFFER_NOT_FULL; + + if (temp_byte == 0) // ID length > 8 bytes; invalid file. + return E_FILE_FORMAT_INVALID; + + int bit_pos = 0; + const int kMaxIdLengthInBytes = 4; + const int kCheckByte = 0x80; + + // Find the first bit that's set. + bool found_bit = false; + for (; bit_pos < kMaxIdLengthInBytes; ++bit_pos) { + if ((kCheckByte >> bit_pos) & temp_byte) { + found_bit = true; + break; + } + } + + if (!found_bit) { + // The value is too large to be a valid ID. + return E_FILE_FORMAT_INVALID; + } + + // Read the remaining bytes of the ID (if any). + const int id_length = bit_pos + 1; + long long ebml_id = temp_byte; + for (int i = 1; i < id_length; ++i) { + ebml_id <<= 8; + read_status = pReader->Read(pos + i, 1, &temp_byte); + + if (read_status < 0) + return E_FILE_FORMAT_INVALID; + else if (read_status > 0) + return E_BUFFER_NOT_FULL; + + ebml_id |= temp_byte; + } + + len = id_length; + return ebml_id; +} + +long long GetUIntLength(IMkvReader* pReader, long long pos, long& len) { + if (!pReader || pos < 0) + return E_FILE_FORMAT_INVALID; long long total, available; int status = pReader->Length(&total, &available); - assert(status >= 0); - assert((total < 0) || (available <= total)); + if (status < 0 || (total >= 0 && available > total)) + return E_FILE_FORMAT_INVALID; len = 1; @@ -112,11 +189,9 @@ long long mkvparser::GetUIntLength(IMkvReader* pReader, long long pos, status = pReader->Read(pos, 1, &b); - if (status < 0) + if (status != 0) return status; - assert(status == 0); - if (b == 0) // we can't handle u-int values larger than 8 bytes return E_FILE_FORMAT_INVALID; @@ -132,12 +207,8 @@ long long mkvparser::GetUIntLength(IMkvReader* pReader, long long pos, // TODO(vigneshv): This function assumes that unsigned values never have their // high bit set. -long long mkvparser::UnserializeUInt(IMkvReader* pReader, long long pos, - long long size) { - assert(pReader); - assert(pos >= 0); - - if ((size <= 0) || (size > 8)) +long long UnserializeUInt(IMkvReader* pReader, long long pos, long long size) { + if (!pReader || pos < 0 || (size <= 0) || (size > 8)) return E_FILE_FORMAT_INVALID; long long result = 0; @@ -159,12 +230,9 @@ long long mkvparser::UnserializeUInt(IMkvReader* pReader, long long pos, return result; } -long mkvparser::UnserializeFloat(IMkvReader* pReader, long long pos, - long long size_, double& result) { - assert(pReader); - assert(pos >= 0); - - if ((size_ != 4) && (size_ != 8)) +long UnserializeFloat(IMkvReader* pReader, long long pos, long long size_, + double& result) { + if (!pReader || pos < 0 || ((size_ != 4) && (size_ != 8))) return E_FILE_FORMAT_INVALID; const long size = static_cast<long>(size_); @@ -195,8 +263,6 @@ long mkvparser::UnserializeFloat(IMkvReader* pReader, long long pos, result = f; } else { - assert(size == 8); - union { double d; unsigned long long dd; @@ -216,28 +282,25 @@ long mkvparser::UnserializeFloat(IMkvReader* pReader, long long pos, result = d; } + if (mkvparser::isinf(result) || mkvparser::isnan(result)) + return E_FILE_FORMAT_INVALID; + return 0; } -long mkvparser::UnserializeInt(IMkvReader* pReader, long long pos, - long long size, long long& result) { - assert(pReader); - assert(pos >= 0); - assert(size > 0); - assert(size <= 8); - - { - signed char b; - - const long status = pReader->Read(pos, 1, (unsigned char*)&b); +long UnserializeInt(IMkvReader* pReader, long long pos, long long size, + long long& result_ref) { + if (!pReader || pos < 0 || size < 1 || size > 8) + return E_FILE_FORMAT_INVALID; - if (status < 0) - return status; + signed char first_byte = 0; + const long status = pReader->Read(pos, 1, (unsigned char*)&first_byte); - result = b; + if (status < 0) + return status; - ++pos; - } + unsigned long long result = first_byte; + ++pos; for (long i = 1; i < size; ++i) { unsigned char b; @@ -253,27 +316,28 @@ long mkvparser::UnserializeInt(IMkvReader* pReader, long long pos, ++pos; } - return 0; // success + result_ref = static_cast<long long>(result); + return 0; } -long mkvparser::UnserializeString(IMkvReader* pReader, long long pos, - long long size_, char*& str) { +long UnserializeString(IMkvReader* pReader, long long pos, long long size, + char*& str) { delete[] str; str = NULL; - if (size_ >= LONG_MAX) // we need (size+1) chars + if (size >= LONG_MAX || size < 0) return E_FILE_FORMAT_INVALID; - const long size = static_cast<long>(size_); - - str = new (std::nothrow) char[size + 1]; + // +1 for '\0' terminator + const long required_size = static_cast<long>(size) + 1; + str = SafeArrayAlloc<char>(1, required_size); if (str == NULL) - return -1; + return E_FILE_FORMAT_INVALID; unsigned char* const buf = reinterpret_cast<unsigned char*>(str); - const long status = pReader->Read(pos, size, buf); + const long status = pReader->Read(pos, static_cast<long>(size), buf); if (status) { delete[] str; @@ -282,137 +346,149 @@ long mkvparser::UnserializeString(IMkvReader* pReader, long long pos, return status; } - str[size] = '\0'; - - return 0; // success + str[required_size - 1] = '\0'; + return 0; } -long mkvparser::ParseElementHeader(IMkvReader* pReader, long long& pos, - long long stop, long long& id, - long long& size) { - if ((stop >= 0) && (pos >= stop)) +long ParseElementHeader(IMkvReader* pReader, long long& pos, + long long stop, long long& id, + long long& size) { + if (stop >= 0 && pos >= stop) return E_FILE_FORMAT_INVALID; long len; - id = ReadUInt(pReader, pos, len); + id = ReadID(pReader, pos, len); if (id < 0) return E_FILE_FORMAT_INVALID; pos += len; // consume id - if ((stop >= 0) && (pos >= stop)) + if (stop >= 0 && pos >= stop) return E_FILE_FORMAT_INVALID; size = ReadUInt(pReader, pos, len); - if (size < 0) + if (size < 0 || len < 1 || len > 8) { + // Invalid: Negative payload size, negative or 0 length integer, or integer + // larger than 64 bits (libwebm cannot handle them). + return E_FILE_FORMAT_INVALID; + } + + // Avoid rolling over pos when very close to LLONG_MAX. + const unsigned long long rollover_check = + static_cast<unsigned long long>(pos) + len; + if (rollover_check > LLONG_MAX) return E_FILE_FORMAT_INVALID; pos += len; // consume length of size // pos now designates payload - if ((stop >= 0) && ((pos + size) > stop)) + if (stop >= 0 && pos >= stop) return E_FILE_FORMAT_INVALID; return 0; // success } -bool mkvparser::Match(IMkvReader* pReader, long long& pos, unsigned long id_, - long long& val) { - assert(pReader); - assert(pos >= 0); +bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id, + long long& val) { + if (!pReader || pos < 0) + return false; - long long total, available; + long long total = 0; + long long available = 0; const long status = pReader->Length(&total, &available); - assert(status >= 0); - assert((total < 0) || (available <= total)); - if (status < 0) + if (status < 0 || (total >= 0 && available > total)) return false; - long len; + long len = 0; - const long long id = ReadUInt(pReader, pos, len); - assert(id >= 0); - assert(len > 0); - assert(len <= 8); - assert((pos + len) <= available); + const long long id = ReadID(pReader, pos, len); + if (id < 0 || (available - pos) > len) + return false; - if ((unsigned long)id != id_) + if (static_cast<unsigned long>(id) != expected_id) return false; pos += len; // consume id const long long size = ReadUInt(pReader, pos, len); - assert(size >= 0); - assert(size <= 8); - assert(len > 0); - assert(len <= 8); - assert((pos + len) <= available); + if (size < 0 || size > 8 || len < 1 || len > 8 || (available - pos) > len) + return false; pos += len; // consume length of size of payload val = UnserializeUInt(pReader, pos, size); - assert(val >= 0); + if (val < 0) + return false; pos += size; // consume size of payload return true; } -bool mkvparser::Match(IMkvReader* pReader, long long& pos, unsigned long id_, - unsigned char*& buf, size_t& buflen) { - assert(pReader); - assert(pos >= 0); +bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id, + unsigned char*& buf, size_t& buflen) { + if (!pReader || pos < 0) + return false; - long long total, available; + long long total = 0; + long long available = 0; long status = pReader->Length(&total, &available); - assert(status >= 0); - assert((total < 0) || (available <= total)); - if (status < 0) + if (status < 0 || (total >= 0 && available > total)) return false; - long len; - const long long id = ReadUInt(pReader, pos, len); - assert(id >= 0); - assert(len > 0); - assert(len <= 8); - assert((pos + len) <= available); + long len = 0; + const long long id = ReadID(pReader, pos, len); + if (id < 0 || (available - pos) > len) + return false; - if ((unsigned long)id != id_) + if (static_cast<unsigned long>(id) != expected_id) return false; pos += len; // consume id - const long long size_ = ReadUInt(pReader, pos, len); - assert(size_ >= 0); - assert(len > 0); - assert(len <= 8); - assert((pos + len) <= available); + const long long size = ReadUInt(pReader, pos, len); + if (size < 0 || len <= 0 || len > 8 || (available - pos) > len) + return false; + + unsigned long long rollover_check = + static_cast<unsigned long long>(pos) + len; + if (rollover_check > LLONG_MAX) + return false; pos += len; // consume length of size of payload - assert((pos + size_) <= available); - const long buflen_ = static_cast<long>(size_); + rollover_check = static_cast<unsigned long long>(pos) + size; + if (rollover_check > LLONG_MAX) + return false; + + if ((pos + size) > available) + return false; + + if (size >= LONG_MAX) + return false; + + const long buflen_ = static_cast<long>(size); - buf = new (std::nothrow) unsigned char[buflen_]; - assert(buf); // TODO + buf = SafeArrayAlloc<unsigned char>(1, buflen_); + if (!buf) + return false; status = pReader->Read(pos, buflen_, buf); - assert(status == 0); // TODO + if (status != 0) + return false; buflen = buflen_; - pos += size_; // consume size of payload + pos += size; // consume size of payload return true; } -namespace mkvparser { - EBMLHeader::EBMLHeader() : m_docType(NULL) { Init(); } EBMLHeader::~EBMLHeader() { delete[] m_docType; } @@ -433,7 +509,8 @@ void EBMLHeader::Init() { } long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { - assert(pReader); + if (!pReader) + return E_FILE_FORMAT_INVALID; long long total, available; @@ -445,67 +522,45 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { pos = 0; long long end = (available >= 1024) ? 1024 : available; - for (;;) { - unsigned char b = 0; - - while (pos < end) { - status = pReader->Read(pos, 1, &b); - - if (status < 0) // error - return status; - - if (b == 0x1A) - break; - - ++pos; - } + // Scan until we find what looks like the first byte of the EBML header. + const long long kMaxScanBytes = (available >= 1024) ? 1024 : available; + const unsigned char kEbmlByte0 = 0x1A; + unsigned char scan_byte = 0; - if (b != 0x1A) { - if (pos >= 1024) - return E_FILE_FORMAT_INVALID; // don't bother looking anymore + while (pos < kMaxScanBytes) { + status = pReader->Read(pos, 1, &scan_byte); - if ((total >= 0) && ((total - available) < 5)) - return E_FILE_FORMAT_INVALID; - - return available + 5; // 5 = 4-byte ID + 1st byte of size - } - - if ((total >= 0) && ((total - pos) < 5)) - return E_FILE_FORMAT_INVALID; - - if ((available - pos) < 5) - return pos + 5; // try again later - - long len; - - const long long result = ReadUInt(pReader, pos, len); - - if (result < 0) // error - return result; + if (status < 0) // error + return status; + else if (status > 0) + return E_BUFFER_NOT_FULL; - if (result == 0x0A45DFA3) { // EBML Header ID - pos += len; // consume ID + if (scan_byte == kEbmlByte0) break; - } - ++pos; // throw away just the 0x1A byte, and try again + ++pos; } - // pos designates start of size field + long len = 0; + const long long ebml_id = ReadID(pReader, pos, len); - // get length of size field + // TODO(tomfinegan): Move Matroska ID constants into a common namespace. + if (len != 4 || ebml_id != mkvmuxer::kMkvEBML) + return E_FILE_FORMAT_INVALID; - long len; + // Move read pos forward to the EBML header size field. + pos += 4; + + // Read length of size field. long long result = GetUIntLength(pReader, pos, len); if (result < 0) // error - return result; - - if (result > 0) // need more data - return result; + return E_FILE_FORMAT_INVALID; + else if (result > 0) // need more data + return E_BUFFER_NOT_FULL; - assert(len > 0); - assert(len <= 8); + if (len < 1 || len > 8) + return E_FILE_FORMAT_INVALID; if ((total >= 0) && ((total - pos) < len)) return E_FILE_FORMAT_INVALID; @@ -513,8 +568,7 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { if ((available - pos) < len) return pos + len; // try again later - // get the EBML header size - + // Read the EBML header size. result = ReadUInt(pReader, pos, len); if (result < 0) // error @@ -542,30 +596,30 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { if (status < 0) // error return status; - if (size == 0) // weird + if (size == 0) return E_FILE_FORMAT_INVALID; - if (id == 0x0286) { // version + if (id == mkvmuxer::kMkvEBMLVersion) { m_version = UnserializeUInt(pReader, pos, size); if (m_version <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x02F7) { // read version + } else if (id == mkvmuxer::kMkvEBMLReadVersion) { m_readVersion = UnserializeUInt(pReader, pos, size); if (m_readVersion <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x02F2) { // max id length + } else if (id == mkvmuxer::kMkvEBMLMaxIDLength) { m_maxIdLength = UnserializeUInt(pReader, pos, size); if (m_maxIdLength <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x02F3) { // max size length + } else if (id == mkvmuxer::kMkvEBMLMaxSizeLength) { m_maxSizeLength = UnserializeUInt(pReader, pos, size); if (m_maxSizeLength <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x0282) { // doctype + } else if (id == mkvmuxer::kMkvDocType) { if (m_docType) return E_FILE_FORMAT_INVALID; @@ -573,12 +627,12 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { if (status) // error return status; - } else if (id == 0x0287) { // doctype version + } else if (id == mkvmuxer::kMkvDocTypeVersion) { m_docTypeVersion = UnserializeUInt(pReader, pos, size); if (m_docTypeVersion <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x0285) { // doctype read version + } else if (id == mkvmuxer::kMkvDocTypeReadVersion) { m_docTypeReadVersion = UnserializeUInt(pReader, pos, size); if (m_docTypeReadVersion <= 0) @@ -588,7 +642,18 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { pos += size; } - assert(pos == end); + if (pos != end) + return E_FILE_FORMAT_INVALID; + + // Make sure DocType, DocTypeReadVersion, and DocTypeVersion are valid. + if (m_docType == NULL || m_docTypeReadVersion <= 0 || m_docTypeVersion <= 0) + return E_FILE_FORMAT_INVALID; + + // Make sure EBMLMaxIDLength and EBMLMaxSizeLength are valid. + if (m_maxIdLength <= 0 || m_maxIdLength > 4 || + m_maxSizeLength <= 0 || m_maxSizeLength > 8) + return E_FILE_FORMAT_INVALID; + return 0; } @@ -621,8 +686,6 @@ Segment::~Segment() { while (i != j) { Cluster* const p = *i++; - assert(p); - delete p; } @@ -638,8 +701,8 @@ Segment::~Segment() { long long Segment::CreateInstance(IMkvReader* pReader, long long pos, Segment*& pSegment) { - assert(pReader); - assert(pos >= 0); + if (pReader == NULL || pos < 0) + return E_PARSE_FAILED; pSegment = NULL; @@ -691,10 +754,10 @@ long long Segment::CreateInstance(IMkvReader* pReader, long long pos, return pos + len; const long long idpos = pos; - const long long id = ReadUInt(pReader, pos, len); + const long long id = ReadID(pReader, pos, len); - if (id < 0) // error - return id; + if (id < 0) + return E_FILE_FORMAT_INVALID; pos += len; // consume ID @@ -723,7 +786,7 @@ long long Segment::CreateInstance(IMkvReader* pReader, long long pos, // Handle "unknown size" for live streaming of webm files. const long long unknown_size = (1LL << (7 * len)) - 1; - if (id == 0x08538067) { // Segment ID + if (id == mkvmuxer::kMkvSegment) { if (size == unknown_size) size = -1; @@ -733,12 +796,9 @@ long long Segment::CreateInstance(IMkvReader* pReader, long long pos, else if ((pos + size) > total) size = -1; - pSegment = new (std::nothrow) Segment(pReader, idpos, - // elem_size - pos, size); - - if (pSegment == 0) - return -1; // generic error + pSegment = new (std::nothrow) Segment(pReader, idpos, pos, size); + if (pSegment == NULL) + return E_PARSE_FAILED; return 0; // success } @@ -767,11 +827,15 @@ long long Segment::ParseHeaders() { if (status < 0) // error return status; - assert((total < 0) || (available <= total)); + if (total > 0 && available > total) + return E_FILE_FORMAT_INVALID; const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - assert((segment_stop < 0) || (total < 0) || (segment_stop <= total)); - assert((segment_stop < 0) || (m_pos <= segment_stop)); + + if ((segment_stop >= 0 && total >= 0 && segment_stop > total) || + (segment_stop >= 0 && m_pos > segment_stop)) { + return E_FILE_FORMAT_INVALID; + } for (;;) { if ((total >= 0) && (m_pos >= total)) @@ -783,6 +847,11 @@ long long Segment::ParseHeaders() { long long pos = m_pos; const long long element_start = pos; + // Avoid rolling over pos when very close to LLONG_MAX. + unsigned long long rollover_check = pos + 1ULL; + if (rollover_check > LLONG_MAX) + return E_FILE_FORMAT_INVALID; + if ((pos + 1) > available) return (pos + 1); @@ -792,8 +861,10 @@ long long Segment::ParseHeaders() { if (result < 0) // error return result; - if (result > 0) // underflow (weird) + if (result > 0) { + // MkvReader doesn't have enough data to satisfy this read attempt. return (pos + 1); + } if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; @@ -802,12 +873,12 @@ long long Segment::ParseHeaders() { return pos + len; const long long idpos = pos; - const long long id = ReadUInt(m_pReader, idpos, len); + const long long id = ReadID(m_pReader, idpos, len); - if (id < 0) // error - return id; + if (id < 0) + return E_FILE_FORMAT_INVALID; - if (id == 0x0F43B675) // Cluster ID + if (id == mkvmuxer::kMkvCluster) break; pos += len; // consume ID @@ -821,8 +892,10 @@ long long Segment::ParseHeaders() { if (result < 0) // error return result; - if (result > 0) // underflow (weird) + if (result > 0) { + // MkvReader doesn't have enough data to satisfy this read attempt. return (pos + 1); + } if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; @@ -832,11 +905,19 @@ long long Segment::ParseHeaders() { const long long size = ReadUInt(m_pReader, pos, len); - if (size < 0) // error + if (size < 0 || len < 1 || len > 8) { + // TODO(tomfinegan): ReadUInt should return an error when len is < 1 or + // len > 8 is true instead of checking this _everywhere_. return size; + } pos += len; // consume length of size of element + // Avoid rolling over pos when very close to LLONG_MAX. + rollover_check = static_cast<unsigned long long>(pos) + size; + if (rollover_check > LLONG_MAX) + return E_FILE_FORMAT_INVALID; + const long long element_size = size + pos - element_start; // Pos now points to start of payload @@ -849,7 +930,7 @@ long long Segment::ParseHeaders() { if ((pos + size) > available) return pos + size; - if (id == 0x0549A966) { // Segment Info ID + if (id == mkvmuxer::kMkvInfo) { if (m_pInfo) return E_FILE_FORMAT_INVALID; @@ -863,7 +944,7 @@ long long Segment::ParseHeaders() { if (status) return status; - } else if (id == 0x0654AE6B) { // Tracks ID + } else if (id == mkvmuxer::kMkvTracks) { if (m_pTracks) return E_FILE_FORMAT_INVALID; @@ -877,7 +958,7 @@ long long Segment::ParseHeaders() { if (status) return status; - } else if (id == 0x0C53BB6B) { // Cues ID + } else if (id == mkvmuxer::kMkvCues) { if (m_pCues == NULL) { m_pCues = new (std::nothrow) Cues(this, pos, size, element_start, element_size); @@ -885,7 +966,7 @@ long long Segment::ParseHeaders() { if (m_pCues == NULL) return -1; } - } else if (id == 0x014D9B74) { // SeekHead ID + } else if (id == mkvmuxer::kMkvSeekHead) { if (m_pSeekHead == NULL) { m_pSeekHead = new (std::nothrow) SeekHead(this, pos, size, element_start, element_size); @@ -898,7 +979,7 @@ long long Segment::ParseHeaders() { if (status) return status; } - } else if (id == 0x0043A770) { // Chapters ID + } else if (id == mkvmuxer::kMkvChapters) { if (m_pChapters == NULL) { m_pChapters = new (std::nothrow) Chapters(this, pos, size, element_start, element_size); @@ -911,7 +992,7 @@ long long Segment::ParseHeaders() { if (status) return status; } - } else if (id == 0x0254C367) { // Tags ID + } else if (id == mkvmuxer::kMkvTags) { if (m_pTags == NULL) { m_pTags = new (std::nothrow) Tags(this, pos, size, element_start, element_size); @@ -929,7 +1010,8 @@ long long Segment::ParseHeaders() { m_pos = pos + size; // consume payload } - assert((segment_stop < 0) || (m_pos <= segment_stop)); + if (segment_stop >= 0 && m_pos > segment_stop) + return E_FILE_FORMAT_INVALID; if (m_pInfo == NULL) // TODO: liberalize this behavior return E_FILE_FORMAT_INVALID; @@ -960,7 +1042,8 @@ long Segment::DoLoadCluster(long long& pos, long& len) { if (status < 0) // error return status; - assert((total < 0) || (avail <= total)); + if (total >= 0 && avail > total) + return E_FILE_FORMAT_INVALID; const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; @@ -988,7 +1071,7 @@ long Segment::DoLoadCluster(long long& pos, long& len) { if (result < 0) // error return static_cast<long>(result); - if (result > 0) // weird + if (result > 0) return E_BUFFER_NOT_FULL; if ((segment_stop >= 0) && ((pos + len) > segment_stop)) @@ -998,10 +1081,10 @@ long Segment::DoLoadCluster(long long& pos, long& len) { return E_BUFFER_NOT_FULL; const long long idpos = pos; - const long long id = ReadUInt(m_pReader, idpos, len); + const long long id = ReadID(m_pReader, idpos, len); - if (id < 0) // error (or underflow) - return static_cast<long>(id); + if (id < 0) + return E_FILE_FORMAT_INVALID; pos += len; // consume ID @@ -1017,7 +1100,7 @@ long Segment::DoLoadCluster(long long& pos, long& len) { if (result < 0) // error return static_cast<long>(result); - if (result > 0) // weird + if (result > 0) return E_BUFFER_NOT_FULL; if ((segment_stop >= 0) && ((pos + len) > segment_stop)) @@ -1035,7 +1118,8 @@ long Segment::DoLoadCluster(long long& pos, long& len) { // pos now points to start of payload - if (size == 0) { // weird + if (size == 0) { + // Missing element payload: move on. m_pos = pos; continue; } @@ -1047,24 +1131,30 @@ long Segment::DoLoadCluster(long long& pos, long& len) { return E_FILE_FORMAT_INVALID; } - if (id == 0x0C53BB6B) { // Cues ID - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; // TODO: liberalize + if (id == mkvmuxer::kMkvCues) { + if (size == unknown_size) { + // Cues element of unknown size: Not supported. + return E_FILE_FORMAT_INVALID; + } if (m_pCues == NULL) { const long long element_size = (pos - idpos) + size; - m_pCues = new Cues(this, pos, size, idpos, element_size); - assert(m_pCues); // TODO + m_pCues = new (std::nothrow) Cues(this, pos, size, idpos, element_size); + if (m_pCues == NULL) + return -1; } m_pos = pos + size; // consume payload continue; } - if (id != 0x0F43B675) { // Cluster ID + if (id != mkvmuxer::kMkvCluster) { + // Besides the Segment, Libwebm allows only cluster elements of unknown + // size. Fail the parse upon encountering a non-cluster element reporting + // unknown size. if (size == unknown_size) - return E_FILE_FORMAT_INVALID; // TODO: liberalize + return E_FILE_FORMAT_INVALID; m_pos = pos + size; // consume payload continue; @@ -1080,7 +1170,10 @@ long Segment::DoLoadCluster(long long& pos, long& len) { break; } - assert(cluster_off >= 0); // have cluster + if (cluster_off < 0) { + // No cluster, die. + return E_FILE_FORMAT_INVALID; + } long long pos_; long len_; @@ -1126,14 +1219,16 @@ long Segment::DoLoadCluster(long long& pos, long& len) { const long idx = m_clusterCount; if (m_clusterPreloadCount > 0) { - assert(idx < m_clusterSize); + if (idx >= m_clusterSize) + return E_FILE_FORMAT_INVALID; Cluster* const pCluster = m_clusters[idx]; - assert(pCluster); - assert(pCluster->m_index < 0); + if (pCluster == NULL || pCluster->m_index >= 0) + return E_FILE_FORMAT_INVALID; const long long off = pCluster->GetPosition(); - assert(off >= 0); + if (off < 0) + return E_FILE_FORMAT_INVALID; if (off == cluster_off) { // preloaded already if (status == 0) // no entries found @@ -1155,7 +1250,8 @@ long Segment::DoLoadCluster(long long& pos, long& len) { --m_clusterPreloadCount; m_pos = pos; // consume payload - assert((segment_stop < 0) || (m_pos <= segment_stop)); + if (segment_stop >= 0 && m_pos > segment_stop) + return E_FILE_FORMAT_INVALID; return 0; // success } @@ -1182,19 +1278,21 @@ long Segment::DoLoadCluster(long long& pos, long& len) { // status > 0 means we have an entry Cluster* const pCluster = Cluster::Create(this, idx, cluster_off); - // element_size); - assert(pCluster); + if (pCluster == NULL) + return -1; - AppendCluster(pCluster); - assert(m_clusters); - assert(idx < m_clusterSize); - assert(m_clusters[idx] == pCluster); + if (!AppendCluster(pCluster)) { + delete pCluster; + return -1; + } if (cluster_size >= 0) { pos += cluster_size; m_pos = pos; - assert((segment_stop < 0) || (m_pos <= segment_stop)); + + if (segment_stop > 0 && m_pos > segment_stop) + return E_FILE_FORMAT_INVALID; return 0; } @@ -1210,8 +1308,8 @@ long Segment::DoLoadCluster(long long& pos, long& len) { } long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) { - assert(m_pos < 0); - assert(m_pUnknownSize); + if (m_pos >= 0 || m_pUnknownSize == NULL) + return E_PARSE_FAILED; const long status = m_pUnknownSize->Parse(pos, len); @@ -1221,12 +1319,11 @@ long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) { if (status == 0) // parsed a block return 2; // continue parsing - assert(status > 0); // nothing left to parse of this cluster - const long long start = m_pUnknownSize->m_element_start; - const long long size = m_pUnknownSize->GetElementSize(); - assert(size >= 0); + + if (size < 0) + return E_FILE_FORMAT_INVALID; pos = start + size; m_pos = pos; @@ -1236,24 +1333,26 @@ long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) { return 2; // continue parsing } -void Segment::AppendCluster(Cluster* pCluster) { - assert(pCluster); - assert(pCluster->m_index >= 0); +bool Segment::AppendCluster(Cluster* pCluster) { + if (pCluster == NULL || pCluster->m_index < 0) + return false; const long count = m_clusterCount + m_clusterPreloadCount; long& size = m_clusterSize; - assert(size >= count); - const long idx = pCluster->m_index; - assert(idx == m_clusterCount); + + if (size < count || idx != m_clusterCount) + return false; if (count >= size) { const long n = (size <= 0) ? 2048 : 2 * size; - Cluster** const qq = new Cluster*[n]; - Cluster** q = qq; + Cluster** const qq = new (std::nothrow) Cluster*[n]; + if (qq == NULL) + return false; + Cluster** q = qq; Cluster** p = m_clusters; Cluster** const pp = p + count; @@ -1267,18 +1366,18 @@ void Segment::AppendCluster(Cluster* pCluster) { } if (m_clusterPreloadCount > 0) { - assert(m_clusters); - Cluster** const p = m_clusters + m_clusterCount; - assert(*p); - assert((*p)->m_index < 0); + if (*p == NULL || (*p)->m_index >= 0) + return false; Cluster** q = p + m_clusterPreloadCount; - assert(q < (m_clusters + size)); + if (q >= (m_clusters + size)) + return false; for (;;) { Cluster** const qq = q - 1; - assert((*qq)->m_index < 0); + if ((*qq)->m_index >= 0) + return false; *q = *qq; q = qq; @@ -1290,22 +1389,25 @@ void Segment::AppendCluster(Cluster* pCluster) { m_clusters[idx] = pCluster; ++m_clusterCount; + return true; } -void Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) { - assert(pCluster); - assert(pCluster->m_index < 0); - assert(idx >= m_clusterCount); +bool Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) { + if (pCluster == NULL || pCluster->m_index >= 0 || idx < m_clusterCount) + return false; const long count = m_clusterCount + m_clusterPreloadCount; long& size = m_clusterSize; - assert(size >= count); + if (size < count) + return false; if (count >= size) { const long n = (size <= 0) ? 2048 : 2 * size; - Cluster** const qq = new Cluster*[n]; + Cluster** const qq = new (std::nothrow) Cluster*[n]; + if (qq == NULL) + return false; Cluster** q = qq; Cluster** p = m_clusters; @@ -1320,17 +1422,20 @@ void Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) { size = n; } - assert(m_clusters); + if (m_clusters == NULL) + return false; Cluster** const p = m_clusters + idx; Cluster** q = m_clusters + count; - assert(q >= p); - assert(q < (m_clusters + size)); + if (q < p || q >= (m_clusters + size)) + return false; while (q > p) { Cluster** const qq = q - 1; - assert((*qq)->m_index < 0); + + if ((*qq)->m_index >= 0) + return false; *q = *qq; q = qq; @@ -1338,13 +1443,12 @@ void Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) { m_clusters[idx] = pCluster; ++m_clusterPreloadCount; + return true; } long Segment::Load() { - assert(m_clusters == NULL); - assert(m_clusterSize == 0); - assert(m_clusterCount == 0); - // assert(m_size >= 0); + if (m_clusters != NULL || m_clusterSize != 0 || m_clusterCount != 0) + return E_PARSE_FAILED; // Outermost (level 0) segment object has been constructed, // and pos designates start of payload. We need to find the @@ -1358,8 +1462,8 @@ long Segment::Load() { if (header_status > 0) // underflow return E_BUFFER_NOT_FULL; - assert(m_pInfo); - assert(m_pTracks); + if (m_pInfo == NULL || m_pTracks == NULL) + return E_FILE_FORMAT_INVALID; for (;;) { const int status = LoadCluster(); @@ -1408,16 +1512,19 @@ long SeekHead::Parse() { if (status < 0) // error return status; - if (id == 0x0DBB) // SeekEntry ID + if (id == mkvmuxer::kMkvSeek) ++entry_count; - else if (id == 0x6C) // Void ID + else if (id == mkvmuxer::kMkvVoid) ++void_element_count; pos += size; // consume payload - assert(pos <= stop); + + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; m_entries = new (std::nothrow) Entry[entry_count]; @@ -1446,14 +1553,14 @@ long SeekHead::Parse() { if (status < 0) // error return status; - if (id == 0x0DBB) { // SeekEntry ID + if (id == mkvmuxer::kMkvSeek) { if (ParseEntry(pReader, pos, size, pEntry)) { Entry& e = *pEntry++; e.element_start = idpos; e.element_size = (pos + size) - idpos; } - } else if (id == 0x6C) { // Void ID + } else if (id == mkvmuxer::kMkvVoid) { VoidElement& e = *pVoidElement++; e.element_start = idpos; @@ -1461,10 +1568,12 @@ long SeekHead::Parse() { } pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries); assert(count_ >= 0); @@ -1553,9 +1662,9 @@ long Segment::ParseCues(long long off, long long& pos, long& len) { const long long idpos = pos; - const long long id = ReadUInt(m_pReader, idpos, len); + const long long id = ReadID(m_pReader, idpos, len); - if (id != 0x0C53BB6B) // Cues ID + if (id != mkvmuxer::kMkvCues) return E_FILE_FORMAT_INVALID; pos += len; // consume ID @@ -1615,7 +1724,8 @@ long Segment::ParseCues(long long off, long long& pos, long& len) { m_pCues = new (std::nothrow) Cues(this, pos, size, element_start, element_size); - assert(m_pCues); // TODO + if (m_pCues == NULL) + return -1; return 0; // success } @@ -1632,10 +1742,11 @@ bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_, // parse the container for the level-1 element ID - const long long seekIdId = ReadUInt(pReader, pos, len); - // seekIdId; + const long long seekIdId = ReadID(pReader, pos, len); + if (seekIdId < 0) + return false; - if (seekIdId != 0x13AB) // SeekID ID + if (seekIdId != mkvmuxer::kMkvSeekID) return false; if ((pos + len) > stop) @@ -1677,9 +1788,9 @@ bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_, pos += seekIdSize; // consume SeekID payload - const long long seekPosId = ReadUInt(pReader, pos, len); + const long long seekPosId = ReadID(pReader, pos, len); - if (seekPosId != 0x13AC) // SeekPos ID + if (seekPosId != mkvmuxer::kMkvSeekPosition) return false; if ((pos + len) > stop) @@ -1757,8 +1868,8 @@ bool Cues::Init() const { if (m_cue_points) return true; - assert(m_count == 0); - assert(m_preload_count == 0); + if (m_count != 0 || m_preload_count != 0) + return false; IMkvReader* const pReader = m_pSegment->m_pReader; @@ -1772,7 +1883,7 @@ bool Cues::Init() const { long len; - const long long id = ReadUInt(pReader, pos, len); + const long long id = ReadID(pReader, pos, len); if (id < 0 || (pos + len) > stop) { return false; } @@ -1789,21 +1900,27 @@ bool Cues::Init() const { return false; } - if (id == 0x3B) // CuePoint ID - PreloadCuePoint(cue_points_size, idpos); + if (id == mkvmuxer::kMkvCuePoint) { + if (!PreloadCuePoint(cue_points_size, idpos)) + return false; + } pos += size; // skip payload } return true; } -void Cues::PreloadCuePoint(long& cue_points_size, long long pos) const { - assert(m_count == 0); +bool Cues::PreloadCuePoint(long& cue_points_size, long long pos) const { + if (m_count != 0) + return false; if (m_preload_count >= cue_points_size) { const long n = (cue_points_size <= 0) ? 2048 : 2 * cue_points_size; - CuePoint** const qq = new CuePoint*[n]; + CuePoint** const qq = new (std::nothrow) CuePoint*[n]; + if (qq == NULL) + return false; + CuePoint** q = qq; // beginning of target CuePoint** p = m_cue_points; // beginning of source @@ -1818,14 +1935,15 @@ void Cues::PreloadCuePoint(long& cue_points_size, long long pos) const { cue_points_size = n; } - CuePoint* const pCP = new CuePoint(m_preload_count, pos); + CuePoint* const pCP = new (std::nothrow) CuePoint(m_preload_count, pos); + if (pCP == NULL) + return false; + m_cue_points[m_preload_count++] = pCP; + return true; } bool Cues::LoadCuePoint() const { - // odbgstream os; - // os << "Cues::LoadCuePoint" << endl; - const long long stop = m_start + m_size; if (m_pos >= stop) @@ -1843,32 +1961,33 @@ bool Cues::LoadCuePoint() const { long len; - const long long id = ReadUInt(pReader, m_pos, len); - assert(id >= 0); // TODO - assert((m_pos + len) <= stop); + const long long id = ReadID(pReader, m_pos, len); + if (id < 0 || (m_pos + len) > stop) + return false; m_pos += len; // consume ID const long long size = ReadUInt(pReader, m_pos, len); - assert(size >= 0); - assert((m_pos + len) <= stop); + if (size < 0 || (m_pos + len) > stop) + return false; m_pos += len; // consume Size field - assert((m_pos + size) <= stop); + if ((m_pos + size) > stop) + return false; - if (id != 0x3B) { // CuePoint ID + if (id != mkvmuxer::kMkvCuePoint) { m_pos += size; // consume payload - assert(m_pos <= stop); + if (m_pos > stop) + return false; continue; } - assert(m_preload_count > 0); + if (m_preload_count < 1) + return false; CuePoint* const pCP = m_cue_points[m_count]; - assert(pCP); - assert((pCP->GetTimeCode() >= 0) || (-pCP->GetTimeCode() == idpos)); - if (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos)) + if (!pCP || (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos))) return false; if (!pCP->Load(pReader)) { @@ -1879,24 +1998,18 @@ bool Cues::LoadCuePoint() const { --m_preload_count; m_pos += size; // consume payload - assert(m_pos <= stop); + if (m_pos > stop) + return false; return true; // yes, we loaded a cue point } - // return (m_pos < stop); return false; // no, we did not load a cue point } bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP, const CuePoint::TrackPosition*& pTP) const { - assert(time_ns >= 0); - assert(pTrack); - - if (m_cue_points == NULL) - return false; - - if (m_count == 0) + if (time_ns < 0 || pTrack == NULL || m_cue_points == NULL || m_count == 0) return false; CuePoint** const ii = m_cue_points; @@ -1906,7 +2019,8 @@ bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP, CuePoint** j = jj; pCP = *i; - assert(pCP); + if (pCP == NULL) + return false; if (time_ns <= pCP->GetTime(m_pSegment)) { pTP = pCP->Find(pTrack); @@ -1920,10 +2034,12 @@ bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP, //[j, jj) > time_ns CuePoint** const k = i + (j - i) / 2; - assert(k < jj); + if (k >= jj) + return false; CuePoint* const pCP = *k; - assert(pCP); + if (pCP == NULL) + return false; const long long t = pCP->GetTime(m_pSegment); @@ -1932,16 +2048,17 @@ bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP, else j = k; - assert(i <= j); + if (i > j) + return false; } - assert(i == j); - assert(i <= jj); - assert(i > ii); + if (i != j || i > jj || i <= ii) + return false; pCP = *--i; - assert(pCP); - assert(pCP->GetTime(m_pSegment) <= time_ns); + + if (pCP == NULL || pCP->GetTime(m_pSegment) > time_ns) + return false; // TODO: here and elsewhere, it's probably not correct to search // for the cue point with this time, and then search for a matching @@ -1956,55 +2073,50 @@ bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP, } const CuePoint* Cues::GetFirst() const { - if (m_cue_points == NULL) - return NULL; - - if (m_count == 0) + if (m_cue_points == NULL || m_count == 0) return NULL; CuePoint* const* const pp = m_cue_points; - assert(pp); + if (pp == NULL) + return NULL; CuePoint* const pCP = pp[0]; - assert(pCP); - assert(pCP->GetTimeCode() >= 0); + if (pCP == NULL || pCP->GetTimeCode() < 0) + return NULL; return pCP; } const CuePoint* Cues::GetLast() const { - if (m_cue_points == NULL) - return NULL; - - if (m_count <= 0) + if (m_cue_points == NULL || m_count <= 0) return NULL; const long index = m_count - 1; CuePoint* const* const pp = m_cue_points; - assert(pp); + if (pp == NULL) + return NULL; CuePoint* const pCP = pp[index]; - assert(pCP); - assert(pCP->GetTimeCode() >= 0); + if (pCP == NULL || pCP->GetTimeCode() < 0) + return NULL; return pCP; } const CuePoint* Cues::GetNext(const CuePoint* pCurr) const { - if (pCurr == NULL) + if (pCurr == NULL || pCurr->GetTimeCode() < 0 || + m_cue_points == NULL || m_count < 1) { return NULL; - - assert(pCurr->GetTimeCode() >= 0); - assert(m_cue_points); - assert(m_count >= 1); + } long index = pCurr->m_index; - assert(index < m_count); + if (index >= m_count) + return NULL; CuePoint* const* const pp = m_cue_points; - assert(pp); - assert(pp[index] == pCurr); + if (pp == NULL || pp[index] != pCurr) + return NULL; ++index; @@ -2012,18 +2124,16 @@ const CuePoint* Cues::GetNext(const CuePoint* pCurr) const { return NULL; CuePoint* const pNext = pp[index]; - assert(pNext); - assert(pNext->GetTimeCode() >= 0); + + if (pNext == NULL || pNext->GetTimeCode() < 0) + return NULL; return pNext; } const BlockEntry* Cues::GetBlock(const CuePoint* pCP, const CuePoint::TrackPosition* pTP) const { - if (pCP == NULL) - return NULL; - - if (pTP == NULL) + if (pCP == NULL || pTP == NULL) return NULL; return m_pSegment->GetBlock(*pCP, *pTP); @@ -2070,11 +2180,15 @@ const BlockEntry* Segment::GetBlock(const CuePoint& cp, // assert(Cluster::HasBlockEntries(this, tp.m_pos)); Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1); - assert(pCluster); + if (pCluster == NULL) + return NULL; const ptrdiff_t idx = i - m_clusters; - PreloadCluster(pCluster, idx); + if (!PreloadCluster(pCluster, idx)) { + delete pCluster; + return NULL; + } assert(m_clusters); assert(m_clusterPreloadCount > 0); assert(m_clusters[idx] == pCluster); @@ -2125,12 +2239,15 @@ const Cluster* Segment::FindOrPreloadCluster(long long requested_pos) { // assert(Cluster::HasBlockEntries(this, tp.m_pos)); Cluster* const pCluster = Cluster::Create(this, -1, requested_pos); - //-1); - assert(pCluster); + if (pCluster == NULL) + return NULL; const ptrdiff_t idx = i - m_clusters; - PreloadCluster(pCluster, idx); + if (!PreloadCluster(pCluster, idx)) { + delete pCluster; + return NULL; + } assert(m_clusters); assert(m_clusterPreloadCount > 0); assert(m_clusters[idx] == pCluster); @@ -2168,9 +2285,8 @@ bool CuePoint::Load(IMkvReader* pReader) { { long len; - const long long id = ReadUInt(pReader, pos_, len); - assert(id == 0x3B); // CuePoint ID - if (id != 0x3B) + const long long id = ReadID(pReader, pos_, len); + if (id != mkvmuxer::kMkvCuePoint) return false; pos_ += len; // consume ID @@ -2193,7 +2309,7 @@ bool CuePoint::Load(IMkvReader* pReader) { while (pos < stop) { long len; - const long long id = ReadUInt(pReader, pos, len); + const long long id = ReadID(pReader, pos, len); if ((id < 0) || (pos + len > stop)) { return false; } @@ -2210,10 +2326,10 @@ bool CuePoint::Load(IMkvReader* pReader) { return false; } - if (id == 0x33) // CueTime ID + if (id == mkvmuxer::kMkvCueTime) m_timecode = UnserializeUInt(pReader, pos, size); - else if (id == 0x37) // CueTrackPosition(s) ID + else if (id == mkvmuxer::kMkvCueTrackPositions) ++m_track_positions_count; pos += size; // consume payload @@ -2227,7 +2343,9 @@ bool CuePoint::Load(IMkvReader* pReader) { // << " timecode=" << m_timecode // << endl; - m_track_positions = new TrackPosition[m_track_positions_count]; + m_track_positions = new (std::nothrow) TrackPosition[m_track_positions_count]; + if (m_track_positions == NULL) + return false; // Now parse track positions @@ -2237,9 +2355,9 @@ bool CuePoint::Load(IMkvReader* pReader) { while (pos < stop) { long len; - const long long id = ReadUInt(pReader, pos, len); - assert(id >= 0); - assert((pos + len) <= stop); + const long long id = ReadID(pReader, pos, len); + if (id < 0 || (pos + len) > stop) + return false; pos += len; // consume ID @@ -2250,7 +2368,7 @@ bool CuePoint::Load(IMkvReader* pReader) { pos += len; // consume Size field assert((pos + size) <= stop); - if (id == 0x37) { // CueTrackPosition(s) ID + if (id == mkvmuxer::kMkvCueTrackPositions) { TrackPosition& tp = *p++; if (!tp.Parse(pReader, pos, size)) { return false; @@ -2258,7 +2376,8 @@ bool CuePoint::Load(IMkvReader* pReader) { } pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return false; } assert(size_t(p - m_track_positions) == m_track_positions_count); @@ -2281,7 +2400,7 @@ bool CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_, while (pos < stop) { long len; - const long long id = ReadUInt(pReader, pos, len); + const long long id = ReadID(pReader, pos, len); if ((id < 0) || ((pos + len) > stop)) { return false; } @@ -2298,13 +2417,11 @@ bool CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_, return false; } - if (id == 0x77) // CueTrack ID + if (id == mkvmuxer::kMkvCueTrack) m_track = UnserializeUInt(pReader, pos, size); - - else if (id == 0x71) // CueClusterPos ID + else if (id == mkvmuxer::kMkvCueClusterPosition) m_pos = UnserializeUInt(pReader, pos, size); - - else if (id == 0x1378) // CueBlockNumber + else if (id == mkvmuxer::kMkvCueBlockNumber) m_block = UnserializeUInt(pReader, pos, size); pos += size; // consume payload @@ -2437,9 +2554,8 @@ const Cluster* Segment::GetNext(const Cluster* pCurr) { if (result != 0) return NULL; - const long long id = ReadUInt(m_pReader, pos, len); - assert(id == 0x0F43B675); // Cluster ID - if (id != 0x0F43B675) + const long long id = ReadID(m_pReader, pos, len); + if (id != mkvmuxer::kMkvCluster) return NULL; pos += len; // consume ID @@ -2474,8 +2590,9 @@ const Cluster* Segment::GetNext(const Cluster* pCurr) { const long long idpos = pos; // pos of next (potential) cluster - const long long id = ReadUInt(m_pReader, idpos, len); - assert(id > 0); // TODO + const long long id = ReadID(m_pReader, idpos, len); + if (id < 0) + return NULL; pos += len; // consume ID @@ -2495,7 +2612,7 @@ const Cluster* Segment::GetNext(const Cluster* pCurr) { if (size == 0) // weird continue; - if (id == 0x0F43B675) { // Cluster ID + if (id == mkvmuxer::kMkvCluster) { const long long off_next_ = idpos - m_start; long long pos_; @@ -2553,11 +2670,15 @@ const Cluster* Segment::GetNext(const Cluster* pCurr) { assert(i == j); Cluster* const pNext = Cluster::Create(this, -1, off_next); - assert(pNext); + if (pNext == NULL) + return NULL; const ptrdiff_t idx_next = i - m_clusters; // insertion position - PreloadCluster(pNext, idx_next); + if (!PreloadCluster(pNext, idx_next)) { + delete pNext; + return NULL; + } assert(m_clusters); assert(idx_next < m_clusterSize); assert(m_clusters[idx_next] == pNext); @@ -2641,7 +2762,7 @@ long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult, const long long id = ReadUInt(m_pReader, pos, len); - if (id != 0x0F43B675) // weird: not Cluster ID + if (id != mkvmuxer::kMkvCluster) return -1; pos += len; // consume ID @@ -2687,7 +2808,8 @@ long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult, // Pos now points to start of payload pos += size; // consume payload (that is, the current cluster) - assert((segment_stop < 0) || (pos <= segment_stop)); + if (segment_stop >= 0 && pos > segment_stop) + return E_FILE_FORMAT_INVALID; // By consuming the payload, we are assuming that the curr // cluster isn't interesting. That is, we don't bother checking @@ -2755,7 +2877,7 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { const long long idpos = pos; // absolute const long long idoff = pos - m_start; // relative - const long long id = ReadUInt(m_pReader, idpos, len); // absolute + const long long id = ReadID(m_pReader, idpos, len); // absolute if (id < 0) // error return static_cast<long>(id); @@ -2805,7 +2927,7 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { return E_FILE_FORMAT_INVALID; } - if (id == 0x0C53BB6B) { // Cues ID + if (id == mkvmuxer::kMkvCues) { if (size == unknown_size) return E_FILE_FORMAT_INVALID; @@ -2818,22 +2940,26 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { const long long element_size = element_stop - element_start; if (m_pCues == NULL) { - m_pCues = new Cues(this, pos, size, element_start, element_size); - assert(m_pCues); // TODO + m_pCues = new (std::nothrow) + Cues(this, pos, size, element_start, element_size); + if (m_pCues == NULL) + return false; } pos += size; // consume payload - assert((segment_stop < 0) || (pos <= segment_stop)); + if (segment_stop >= 0 && pos > segment_stop) + return E_FILE_FORMAT_INVALID; continue; } - if (id != 0x0F43B675) { // not a Cluster ID + if (id != mkvmuxer::kMkvCluster) { // not a Cluster ID if (size == unknown_size) return E_FILE_FORMAT_INVALID; pos += size; // consume payload - assert((segment_stop < 0) || (pos <= segment_stop)); + if (segment_stop >= 0 && pos > segment_stop) + return E_FILE_FORMAT_INVALID; continue; } @@ -2905,12 +3031,15 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { Cluster* const pNext = Cluster::Create(this, -1, // preloaded off_next); - // element_size); - assert(pNext); + if (pNext == NULL) + return -1; const ptrdiff_t idx_next = i - m_clusters; // insertion position - PreloadCluster(pNext, idx_next); + if (!PreloadCluster(pNext, idx_next)) { + delete pNext; + return -1; + } assert(m_clusters); assert(idx_next < m_clusterSize); assert(m_clusters[idx_next] == pNext); @@ -2953,7 +3082,7 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { return E_BUFFER_NOT_FULL; const long long idpos = pos; - const long long id = ReadUInt(m_pReader, idpos, len); + const long long id = ReadID(m_pReader, idpos, len); if (id < 0) // error (or underflow) return static_cast<long>(id); @@ -2962,10 +3091,7 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { // that we have exhausted the sub-element's inside the cluster // whose ID we parsed earlier. - if (id == 0x0F43B675) // Cluster ID - break; - - if (id == 0x0C53BB6B) // Cues ID + if (id == mkvmuxer::kMkvCluster || id == mkvmuxer::kMkvCues) break; pos += len; // consume ID (of sub-element) @@ -3012,7 +3138,8 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { return E_FILE_FORMAT_INVALID; pos += size; // consume payload of sub-element - assert((segment_stop < 0) || (pos <= segment_stop)); + if (segment_stop >= 0 && pos > segment_stop) + return E_FILE_FORMAT_INVALID; } // determine cluster size cluster_size = pos - payload_pos; @@ -3022,7 +3149,8 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { } pos += cluster_size; // consume payload - assert((segment_stop < 0) || (pos <= segment_stop)); + if (segment_stop >= 0 && pos > segment_stop) + return E_FILE_FORMAT_INVALID; return 2; // try to find a cluster that follows next } @@ -3131,7 +3259,7 @@ long Chapters::Parse() { if (size == 0) // weird continue; - if (id == 0x05B9) { // EditionEntry ID + if (id == mkvmuxer::kMkvEditionEntry) { status = ParseEdition(pos, size); if (status < 0) // error @@ -3139,10 +3267,12 @@ long Chapters::Parse() { } pos += size; - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; return 0; } @@ -3242,10 +3372,10 @@ long Chapters::Edition::Parse(IMkvReader* pReader, long long pos, if (status < 0) // error return status; - if (size == 0) // weird + if (size == 0) continue; - if (id == 0x36) { // Atom ID + if (id == mkvmuxer::kMkvChapterAtom) { status = ParseAtom(pReader, pos, size); if (status < 0) // error @@ -3253,10 +3383,12 @@ long Chapters::Edition::Parse(IMkvReader* pReader, long long pos, } pos += size; - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; return 0; } @@ -3373,20 +3505,20 @@ long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) { if (status < 0) // error return status; - if (size == 0) // weird + if (size == 0) // 0 length payload, skip. continue; - if (id == 0x00) { // Display ID + if (id == mkvmuxer::kMkvChapterDisplay) { status = ParseDisplay(pReader, pos, size); if (status < 0) // error return status; - } else if (id == 0x1654) { // StringUID ID + } else if (id == mkvmuxer::kMkvChapterStringUID) { status = UnserializeString(pReader, pos, size, m_string_uid); if (status < 0) // error return status; - } else if (id == 0x33C4) { // UID ID + } else if (id == mkvmuxer::kMkvChapterUID) { long long val; status = UnserializeInt(pReader, pos, size, val); @@ -3394,14 +3526,14 @@ long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) { return status; m_uid = static_cast<unsigned long long>(val); - } else if (id == 0x11) { // TimeStart ID + } else if (id == mkvmuxer::kMkvChapterTimeStart) { const long long val = UnserializeUInt(pReader, pos, size); if (val < 0) // error return static_cast<long>(val); m_start_timecode = val; - } else if (id == 0x12) { // TimeEnd ID + } else if (id == mkvmuxer::kMkvChapterTimeEnd) { const long long val = UnserializeUInt(pReader, pos, size); if (val < 0) // error @@ -3411,10 +3543,12 @@ long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) { } pos += size; - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; return 0; } @@ -3524,20 +3658,20 @@ long Chapters::Display::Parse(IMkvReader* pReader, long long pos, if (status < 0) // error return status; - if (size == 0) // weird + if (size == 0) // No payload. continue; - if (id == 0x05) { // ChapterString ID + if (id == mkvmuxer::kMkvChapString) { status = UnserializeString(pReader, pos, size, m_string); if (status) return status; - } else if (id == 0x037C) { // ChapterLanguage ID + } else if (id == mkvmuxer::kMkvChapLanguage) { status = UnserializeString(pReader, pos, size, m_language); if (status) return status; - } else if (id == 0x037E) { // ChapterCountry ID + } else if (id == mkvmuxer::kMkvChapCountry) { status = UnserializeString(pReader, pos, size, m_country); if (status) @@ -3545,10 +3679,12 @@ long Chapters::Display::Parse(IMkvReader* pReader, long long pos, } pos += size; - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; return 0; } @@ -3588,7 +3724,7 @@ long Tags::Parse() { if (size == 0) // 0 length tag, read another continue; - if (id == 0x3373) { // Tag ID + if (id == mkvmuxer::kMkvTag) { status = ParseTag(pos, size); if (status < 0) @@ -3596,14 +3732,12 @@ long Tags::Parse() { } pos += size; - assert(pos <= stop); if (pos > stop) - return -1; + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); if (pos != stop) - return -1; + return E_FILE_FORMAT_INVALID; return 0; } @@ -3706,7 +3840,7 @@ long Tags::Tag::Parse(IMkvReader* pReader, long long pos, long long size) { if (size == 0) // 0 length tag, read another continue; - if (id == 0x27C8) { // SimpleTag ID + if (id == mkvmuxer::kMkvSimpleTag) { status = ParseSimpleTag(pReader, pos, size); if (status < 0) @@ -3714,14 +3848,12 @@ long Tags::Tag::Parse(IMkvReader* pReader, long long pos, long long size) { } pos += size; - assert(pos <= stop); if (pos > stop) - return -1; + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); if (pos != stop) - return -1; + return E_FILE_FORMAT_INVALID; return 0; } @@ -3799,12 +3931,12 @@ long Tags::SimpleTag::Parse(IMkvReader* pReader, long long pos, if (size == 0) // weird continue; - if (id == 0x5A3) { // TagName ID + if (id == mkvmuxer::kMkvTagName) { status = UnserializeString(pReader, pos, size, m_tag_name); if (status) return status; - } else if (id == 0x487) { // TagString ID + } else if (id == mkvmuxer::kMkvTagString) { status = UnserializeString(pReader, pos, size, m_tag_string); if (status) @@ -3812,14 +3944,12 @@ long Tags::SimpleTag::Parse(IMkvReader* pReader, long long pos, } pos += size; - assert(pos <= stop); if (pos > stop) - return -1; + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); if (pos != stop) - return -1; + return E_FILE_FORMAT_INVALID; return 0; } @@ -3866,12 +3996,12 @@ long SegmentInfo::Parse() { if (status < 0) // error return status; - if (id == 0x0AD7B1) { // Timecode Scale + if (id == mkvmuxer::kMkvTimecodeScale) { m_timecodeScale = UnserializeUInt(pReader, pos, size); if (m_timecodeScale <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x0489) { // Segment duration + } else if (id == mkvmuxer::kMkvDuration) { const long status = UnserializeFloat(pReader, pos, size, m_duration); if (status < 0) @@ -3879,19 +4009,19 @@ long SegmentInfo::Parse() { if (m_duration < 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x0D80) { // MuxingApp + } else if (id == mkvmuxer::kMkvMuxingApp) { const long status = UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8); if (status) return status; - } else if (id == 0x1741) { // WritingApp + } else if (id == mkvmuxer::kMkvWritingApp) { const long status = UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8); if (status) return status; - } else if (id == 0x3BA9) { // Title + } else if (id == mkvmuxer::kMkvTitle) { const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8); if (status) @@ -3899,10 +4029,17 @@ long SegmentInfo::Parse() { } pos += size; - assert(pos <= stop); + + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + const double rollover_check = m_duration * m_timecodeScale; + if (rollover_check > LLONG_MAX) + return E_FILE_FORMAT_INVALID; + + if (pos != stop) + return E_FILE_FORMAT_INVALID; return 0; } @@ -4039,15 +4176,15 @@ long ContentEncoding::ParseContentEncAESSettingsEntry( if (status < 0) // error return status; - if (id == 0x7E8) { - // AESSettingsCipherMode + if (id == mkvmuxer::kMkvAESSettingsCipherMode) { aes->cipher_mode = UnserializeUInt(pReader, pos, size); if (aes->cipher_mode != 1) return E_FILE_FORMAT_INVALID; } pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } return 0; @@ -4070,14 +4207,15 @@ long ContentEncoding::ParseContentEncodingEntry(long long start, long long size, if (status < 0) // error return status; - if (id == 0x1034) // ContentCompression ID + if (id == mkvmuxer::kMkvContentCompression) ++compression_count; - if (id == 0x1035) // ContentEncryption ID + if (id == mkvmuxer::kMkvContentEncryption) ++encryption_count; pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } if (compression_count <= 0 && encryption_count <= 0) @@ -4108,19 +4246,15 @@ long ContentEncoding::ParseContentEncodingEntry(long long start, long long size, if (status < 0) // error return status; - if (id == 0x1031) { - // ContentEncodingOrder + if (id == mkvmuxer::kMkvContentEncodingOrder) { encoding_order_ = UnserializeUInt(pReader, pos, size); - } else if (id == 0x1032) { - // ContentEncodingScope + } else if (id == mkvmuxer::kMkvContentEncodingScope) { encoding_scope_ = UnserializeUInt(pReader, pos, size); if (encoding_scope_ < 1) return -1; - } else if (id == 0x1033) { - // ContentEncodingType + } else if (id == mkvmuxer::kMkvContentEncodingType) { encoding_type_ = UnserializeUInt(pReader, pos, size); - } else if (id == 0x1034) { - // ContentCompression ID + } else if (id == mkvmuxer::kMkvContentCompression) { ContentCompression* const compression = new (std::nothrow) ContentCompression(); if (!compression) @@ -4132,8 +4266,7 @@ long ContentEncoding::ParseContentEncodingEntry(long long start, long long size, return status; } *compression_entries_end_++ = compression; - } else if (id == 0x1035) { - // ContentEncryption ID + } else if (id == mkvmuxer::kMkvContentEncryption) { ContentEncryption* const encryption = new (std::nothrow) ContentEncryption(); if (!encryption) @@ -4148,10 +4281,12 @@ long ContentEncoding::ParseContentEncodingEntry(long long start, long long size, } pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; return 0; } @@ -4172,21 +4307,18 @@ long ContentEncoding::ParseCompressionEntry(long long start, long long size, if (status < 0) // error return status; - if (id == 0x254) { - // ContentCompAlgo + if (id == mkvmuxer::kMkvContentCompAlgo) { long long algo = UnserializeUInt(pReader, pos, size); if (algo < 0) return E_FILE_FORMAT_INVALID; compression->algo = algo; valid = true; - } else if (id == 0x255) { - // ContentCompSettings + } else if (id == mkvmuxer::kMkvContentCompSettings) { if (size <= 0) return E_FILE_FORMAT_INVALID; const size_t buflen = static_cast<size_t>(size); - typedef unsigned char* buf_t; - const buf_t buf = new (std::nothrow) unsigned char[buflen]; + unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen); if (buf == NULL) return -1; @@ -4202,7 +4334,8 @@ long ContentEncoding::ParseCompressionEntry(long long start, long long size, } pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } // ContentCompAlgo is mandatory @@ -4227,13 +4360,11 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size, if (status < 0) // error return status; - if (id == 0x7E1) { - // ContentEncAlgo + if (id == mkvmuxer::kMkvContentEncAlgo) { encryption->algo = UnserializeUInt(pReader, pos, size); if (encryption->algo != 5) return E_FILE_FORMAT_INVALID; - } else if (id == 0x7E2) { - // ContentEncKeyID + } else if (id == mkvmuxer::kMkvContentEncKeyID) { delete[] encryption->key_id; encryption->key_id = NULL; encryption->key_id_len = 0; @@ -4242,8 +4373,7 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size, return E_FILE_FORMAT_INVALID; const size_t buflen = static_cast<size_t>(size); - typedef unsigned char* buf_t; - const buf_t buf = new (std::nothrow) unsigned char[buflen]; + unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen); if (buf == NULL) return -1; @@ -4256,8 +4386,7 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size, encryption->key_id = buf; encryption->key_id_len = buflen; - } else if (id == 0x7E3) { - // ContentSignature + } else if (id == mkvmuxer::kMkvContentSignature) { delete[] encryption->signature; encryption->signature = NULL; encryption->signature_len = 0; @@ -4266,8 +4395,7 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size, return E_FILE_FORMAT_INVALID; const size_t buflen = static_cast<size_t>(size); - typedef unsigned char* buf_t; - const buf_t buf = new (std::nothrow) unsigned char[buflen]; + unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen); if (buf == NULL) return -1; @@ -4280,8 +4408,7 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size, encryption->signature = buf; encryption->signature_len = buflen; - } else if (id == 0x7E4) { - // ContentSigKeyID + } else if (id == mkvmuxer::kMkvContentSigKeyID) { delete[] encryption->sig_key_id; encryption->sig_key_id = NULL; encryption->sig_key_id_len = 0; @@ -4290,8 +4417,7 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size, return E_FILE_FORMAT_INVALID; const size_t buflen = static_cast<size_t>(size); - typedef unsigned char* buf_t; - const buf_t buf = new (std::nothrow) unsigned char[buflen]; + unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen); if (buf == NULL) return -1; @@ -4304,14 +4430,11 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size, encryption->sig_key_id = buf; encryption->sig_key_id_len = buflen; - } else if (id == 0x7E5) { - // ContentSigAlgo + } else if (id == mkvmuxer::kMkvContentSigAlgo) { encryption->sig_algo = UnserializeUInt(pReader, pos, size); - } else if (id == 0x7E6) { - // ContentSigHashAlgo + } else if (id == mkvmuxer::kMkvContentSigHashAlgo) { encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size); - } else if (id == 0x7E7) { - // ContentEncAESSettings + } else if (id == mkvmuxer::kMkvContentEncAESSettings) { const long status = ParseContentEncAESSettingsEntry( pos, size, pReader, &encryption->aes_settings); if (status) @@ -4319,7 +4442,8 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size, } pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } return 0; @@ -4418,7 +4542,7 @@ int Track::Info::CopyStr(char* Info::*str, Info& dst_) const { const size_t len = strlen(src); - dst = new (std::nothrow) char[len + 1]; + dst = SafeArrayAlloc<char>(1, len + 1); if (dst == NULL) return -1; @@ -4469,7 +4593,7 @@ int Track::Info::Copy(Info& dst) const { if (dst.codecPrivateSize != 0) return -1; - dst.codecPrivate = new (std::nothrow) unsigned char[codecPrivateSize]; + dst.codecPrivate = SafeArrayAlloc<unsigned char>(1, codecPrivateSize); if (dst.codecPrivate == NULL) return -1; @@ -4797,11 +4921,12 @@ long Track::ParseContentEncodingsEntry(long long start, long long size) { return status; // pos now designates start of element - if (id == 0x2240) // ContentEncoding ID + if (id == mkvmuxer::kMkvContentEncoding) ++count; pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } if (count <= 0) @@ -4821,7 +4946,7 @@ long Track::ParseContentEncodingsEntry(long long start, long long size) { return status; // pos now designates start of element - if (id == 0x2240) { // ContentEncoding ID + if (id == mkvmuxer::kMkvContentEncoding) { ContentEncoding* const content_encoding = new (std::nothrow) ContentEncoding(); if (!content_encoding) @@ -4837,10 +4962,12 @@ long Track::ParseContentEncodingsEntry(long long start, long long size) { } pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; return 0; } @@ -4892,37 +5019,37 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info, if (status < 0) // error return status; - if (id == 0x30) { // pixel width + if (id == mkvmuxer::kMkvPixelWidth) { width = UnserializeUInt(pReader, pos, size); if (width <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x3A) { // pixel height + } else if (id == mkvmuxer::kMkvPixelHeight) { height = UnserializeUInt(pReader, pos, size); if (height <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x14B0) { // display width + } else if (id == mkvmuxer::kMkvDisplayWidth) { display_width = UnserializeUInt(pReader, pos, size); if (display_width <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x14BA) { // display height + } else if (id == mkvmuxer::kMkvDisplayHeight) { display_height = UnserializeUInt(pReader, pos, size); if (display_height <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x14B2) { // display unit + } else if (id == mkvmuxer::kMkvDisplayUnit) { display_unit = UnserializeUInt(pReader, pos, size); if (display_unit < 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x13B8) { // stereo mode + } else if (id == mkvmuxer::kMkvStereoMode) { stereo_mode = UnserializeUInt(pReader, pos, size); if (stereo_mode < 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x0383E3) { // frame rate + } else if (id == mkvmuxer::kMkvFrameRate) { const long status = UnserializeFloat(pReader, pos, size, rate); if (status < 0) @@ -4933,10 +5060,12 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info, } pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; VideoTrack* const pTrack = new (std::nothrow) VideoTrack(pSegment, element_start, element_size); @@ -5110,7 +5239,7 @@ long AudioTrack::Parse(Segment* pSegment, const Info& info, if (status < 0) // error return status; - if (id == 0x35) { // Sample Rate + if (id == mkvmuxer::kMkvSamplingFrequency) { status = UnserializeFloat(pReader, pos, size, rate); if (status < 0) @@ -5118,12 +5247,12 @@ long AudioTrack::Parse(Segment* pSegment, const Info& info, if (rate <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x1F) { // Channel Count + } else if (id == mkvmuxer::kMkvChannels) { channels = UnserializeUInt(pReader, pos, size); if (channels <= 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x2264) { // Bit Depth + } else if (id == mkvmuxer::kMkvBitDepth) { bit_depth = UnserializeUInt(pReader, pos, size); if (bit_depth <= 0) @@ -5131,10 +5260,12 @@ long AudioTrack::Parse(Segment* pSegment, const Info& info, } pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; AudioTrack* const pTrack = new (std::nothrow) AudioTrack(pSegment, element_start, element_size); @@ -5194,14 +5325,16 @@ long Tracks::Parse() { if (size == 0) // weird continue; - if (id == 0x2E) // TrackEntry ID + if (id == mkvmuxer::kMkvTrackEntry) ++count; pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; if (count <= 0) return 0; // success @@ -5234,13 +5367,12 @@ long Tracks::Parse() { const long long element_size = payload_stop - element_start; - if (id == 0x2E) { // TrackEntry ID + if (id == mkvmuxer::kMkvTrackEntry) { Track*& pTrack = *m_trackEntriesEnd; pTrack = NULL; const long status = ParseTrackEntry(pos, payload_size, element_start, element_size, pTrack); - if (status) return status; @@ -5249,10 +5381,12 @@ long Tracks::Parse() { } pos = payload_stop; - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; return 0; // success } @@ -5309,16 +5443,16 @@ long Tracks::ParseTrackEntry(long long track_start, long long track_size, const long long start = pos; - if (id == 0x60) { // VideoSettings ID + if (id == mkvmuxer::kMkvVideo) { v.start = start; v.size = size; - } else if (id == 0x61) { // AudioSettings ID + } else if (id == mkvmuxer::kMkvAudio) { a.start = start; a.size = size; - } else if (id == 0x2D80) { // ContentEncodings ID + } else if (id == mkvmuxer::kMkvContentEncodings) { e.start = start; e.size = size; - } else if (id == 0x33C5) { // Track UID + } else if (id == mkvmuxer::kMkvTrackUID) { if (size > 8) return E_FILE_FORMAT_INVALID; @@ -5340,49 +5474,49 @@ long Tracks::ParseTrackEntry(long long track_start, long long track_size, ++pos_; } - } else if (id == 0x57) { // Track Number + } else if (id == mkvmuxer::kMkvTrackNumber) { const long long num = UnserializeUInt(pReader, pos, size); if ((num <= 0) || (num > 127)) return E_FILE_FORMAT_INVALID; info.number = static_cast<long>(num); - } else if (id == 0x03) { // Track Type + } else if (id == mkvmuxer::kMkvTrackType) { const long long type = UnserializeUInt(pReader, pos, size); if ((type <= 0) || (type > 254)) return E_FILE_FORMAT_INVALID; info.type = static_cast<long>(type); - } else if (id == 0x136E) { // Track Name + } else if (id == mkvmuxer::kMkvName) { const long status = UnserializeString(pReader, pos, size, info.nameAsUTF8); if (status) return status; - } else if (id == 0x02B59C) { // Track Language + } else if (id == mkvmuxer::kMkvLanguage) { const long status = UnserializeString(pReader, pos, size, info.language); if (status) return status; - } else if (id == 0x03E383) { // Default Duration + } else if (id == mkvmuxer::kMkvDefaultDuration) { const long long duration = UnserializeUInt(pReader, pos, size); if (duration < 0) return E_FILE_FORMAT_INVALID; info.defaultDuration = static_cast<unsigned long long>(duration); - } else if (id == 0x06) { // CodecID + } else if (id == mkvmuxer::kMkvCodecID) { const long status = UnserializeString(pReader, pos, size, info.codecId); if (status) return status; - } else if (id == 0x1C) { // lacing + } else if (id == mkvmuxer::kMkvFlagLacing) { lacing = UnserializeUInt(pReader, pos, size); if ((lacing < 0) || (lacing > 1)) return E_FILE_FORMAT_INVALID; - } else if (id == 0x23A2) { // Codec Private + } else if (id == mkvmuxer::kMkvCodecPrivate) { delete[] info.codecPrivate; info.codecPrivate = NULL; info.codecPrivateSize = 0; @@ -5390,9 +5524,7 @@ long Tracks::ParseTrackEntry(long long track_start, long long track_size, const size_t buflen = static_cast<size_t>(size); if (buflen) { - typedef unsigned char* buf_t; - - const buf_t buf = new (std::nothrow) unsigned char[buflen]; + unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen); if (buf == NULL) return -1; @@ -5407,23 +5539,25 @@ long Tracks::ParseTrackEntry(long long track_start, long long track_size, info.codecPrivate = buf; info.codecPrivateSize = buflen; } - } else if (id == 0x058688) { // Codec Name + } else if (id == mkvmuxer::kMkvCodecName) { const long status = UnserializeString(pReader, pos, size, info.codecNameAsUTF8); if (status) return status; - } else if (id == 0x16AA) { // Codec Delay + } else if (id == mkvmuxer::kMkvCodecDelay) { info.codecDelay = UnserializeUInt(pReader, pos, size); - } else if (id == 0x16BB) { // Seek Pre Roll + } else if (id == mkvmuxer::kMkvSeekPreRoll) { info.seekPreRoll = UnserializeUInt(pReader, pos, size); } pos += size; // consume payload - assert(pos <= track_stop); + if (pos > track_stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == track_stop); + if (pos != track_stop) + return E_FILE_FORMAT_INVALID; if (info.number <= 0) // not specified return E_FILE_FORMAT_INVALID; @@ -5552,97 +5686,87 @@ const Track* Tracks::GetTrackByIndex(unsigned long idx) const { } long Cluster::Load(long long& pos, long& len) const { - assert(m_pSegment); - assert(m_pos >= m_element_start); + if (m_pSegment == NULL) + return E_PARSE_FAILED; if (m_timecode >= 0) // at least partially loaded return 0; - assert(m_pos == m_element_start); - assert(m_element_size < 0); + if (m_pos != m_element_start || m_element_size >= 0) + return E_PARSE_FAILED; IMkvReader* const pReader = m_pSegment->m_pReader; - long long total, avail; - const int status = pReader->Length(&total, &avail); if (status < 0) // error return status; - assert((total < 0) || (avail <= total)); - assert((total < 0) || (m_pos <= total)); // TODO: verify this + if (total >= 0 && (avail > total || m_pos > total)) + return E_FILE_FORMAT_INVALID; pos = m_pos; long long cluster_size = -1; - { - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } - if (result < 0) // error or underflow - return static_cast<long>(result); + long long result = GetUIntLength(pReader, pos, len); - if (result > 0) // underflow (weird) - return E_BUFFER_NOT_FULL; + if (result < 0) // error or underflow + return static_cast<long>(result); - // if ((pos + len) > segment_stop) - // return E_FILE_FORMAT_INVALID; + if (result > 0) + return E_BUFFER_NOT_FULL; - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; - const long long id_ = ReadUInt(pReader, pos, len); + const long long id_ = ReadID(pReader, pos, len); - if (id_ < 0) // error - return static_cast<long>(id_); + if (id_ < 0) // error + return static_cast<long>(id_); - if (id_ != 0x0F43B675) // Cluster ID - return E_FILE_FORMAT_INVALID; + if (id_ != mkvmuxer::kMkvCluster) + return E_FILE_FORMAT_INVALID; - pos += len; // consume id + pos += len; // consume id - // read cluster size + // read cluster size - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } - result = GetUIntLength(pReader, pos, len); + result = GetUIntLength(pReader, pos, len); - if (result < 0) // error - return static_cast<long>(result); + if (result < 0) // error + return static_cast<long>(result); - if (result > 0) // weird - return E_BUFFER_NOT_FULL; + if (result > 0) + return E_BUFFER_NOT_FULL; - // if ((pos + len) > segment_stop) - // return E_FILE_FORMAT_INVALID; + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; + const long long size = ReadUInt(pReader, pos, len); - const long long size = ReadUInt(pReader, pos, len); + if (size < 0) // error + return static_cast<long>(cluster_size); - if (size < 0) // error - return static_cast<long>(cluster_size); + if (size == 0) + return E_FILE_FORMAT_INVALID; - if (size == 0) - return E_FILE_FORMAT_INVALID; // TODO: verify this + pos += len; // consume length of size of element - pos += len; // consume length of size of element + const long long unknown_size = (1LL << (7 * len)) - 1; - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size != unknown_size) - cluster_size = size; - } + if (size != unknown_size) + cluster_size = size; // pos points to start of payload long long timecode = -1; @@ -5667,7 +5791,7 @@ long Cluster::Load(long long& pos, long& len) const { if (result < 0) // error return static_cast<long>(result); - if (result > 0) // weird + if (result > 0) return E_BUFFER_NOT_FULL; if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) @@ -5676,7 +5800,7 @@ long Cluster::Load(long long& pos, long& len) const { if ((pos + len) > avail) return E_BUFFER_NOT_FULL; - const long long id = ReadUInt(pReader, pos, len); + const long long id = ReadID(pReader, pos, len); if (id < 0) // error return static_cast<long>(id); @@ -5688,10 +5812,10 @@ long Cluster::Load(long long& pos, long& len) const { // that we have exhausted the sub-element's inside the cluster // whose ID we parsed earlier. - if (id == 0x0F43B675) // Cluster ID + if (id == mkvmuxer::kMkvCluster) break; - if (id == 0x0C53BB6B) // Cues ID + if (id == mkvmuxer::kMkvCues) break; pos += len; // consume ID field @@ -5708,7 +5832,7 @@ long Cluster::Load(long long& pos, long& len) const { if (result < 0) // error return static_cast<long>(result); - if (result > 0) // weird + if (result > 0) return E_BUFFER_NOT_FULL; if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) @@ -5734,13 +5858,13 @@ long Cluster::Load(long long& pos, long& len) const { // pos now points to start of payload - if (size == 0) // weird + if (size == 0) continue; if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) return E_FILE_FORMAT_INVALID; - if (id == 0x67) { // TimeCode ID + if (id == mkvmuxer::kMkvTimecode) { len = static_cast<long>(size); if ((pos + size) > avail) @@ -5755,19 +5879,21 @@ long Cluster::Load(long long& pos, long& len) const { if (bBlock) break; - } else if (id == 0x20) { // BlockGroup ID + } else if (id == mkvmuxer::kMkvBlockGroup) { bBlock = true; break; - } else if (id == 0x23) { // SimpleBlock ID + } else if (id == mkvmuxer::kMkvSimpleBlock) { bBlock = true; break; } pos += size; // consume payload - assert((cluster_stop < 0) || (pos <= cluster_stop)); + if (cluster_stop >= 0 && pos > cluster_stop) + return E_FILE_FORMAT_INVALID; } - assert((cluster_stop < 0) || (pos <= cluster_stop)); + if (cluster_stop >= 0 && pos > cluster_stop) + return E_FILE_FORMAT_INVALID; if (timecode < 0) // no timecode found return E_FILE_FORMAT_INVALID; @@ -5790,10 +5916,8 @@ long Cluster::Parse(long long& pos, long& len) const { if (status < 0) return status; - assert(m_pos >= m_element_start); - assert(m_timecode >= 0); - // assert(m_size > 0); - // assert(m_element_size > m_size); + if (m_pos < m_element_start || m_timecode < 0) + return E_PARSE_FAILED; const long long cluster_stop = (m_element_size < 0) ? -1 : m_element_start + m_element_size; @@ -5810,7 +5934,8 @@ long Cluster::Parse(long long& pos, long& len) const { if (status < 0) // error return status; - assert((total < 0) || (avail <= total)); + if (total >= 0 && avail > total) + return E_FILE_FORMAT_INVALID; pos = m_pos; @@ -5837,7 +5962,7 @@ long Cluster::Parse(long long& pos, long& len) const { if (result < 0) // error return static_cast<long>(result); - if (result > 0) // weird + if (result > 0) return E_BUFFER_NOT_FULL; if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) @@ -5846,19 +5971,16 @@ long Cluster::Parse(long long& pos, long& len) const { if ((pos + len) > avail) return E_BUFFER_NOT_FULL; - const long long id = ReadUInt(pReader, pos, len); + const long long id = ReadID(pReader, pos, len); - if (id < 0) // error - return static_cast<long>(id); - - if (id == 0) // weird + if (id < 0) return E_FILE_FORMAT_INVALID; // This is the distinguished set of ID's we use to determine // that we have exhausted the sub-element's inside the cluster // whose ID we parsed earlier. - if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) { // Cluster or Cues ID + if ((id == mkvmuxer::kMkvCluster) || (id == mkvmuxer::kMkvCues)) { if (m_element_size < 0) m_element_size = pos - m_element_start; @@ -5879,7 +6001,7 @@ long Cluster::Parse(long long& pos, long& len) const { if (result < 0) // error return static_cast<long>(result); - if (result > 0) // weird + if (result > 0) return E_BUFFER_NOT_FULL; if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) @@ -5905,7 +6027,7 @@ long Cluster::Parse(long long& pos, long& len) const { // pos now points to start of payload - if (size == 0) // weird + if (size == 0) continue; // const long long block_start = pos; @@ -5913,8 +6035,10 @@ long Cluster::Parse(long long& pos, long& len) const { if (cluster_stop >= 0) { if (block_stop > cluster_stop) { - if ((id == 0x20) || (id == 0x23)) + if (id == mkvmuxer::kMkvBlockGroup || + id == mkvmuxer::kMkvSimpleBlock) { return E_FILE_FORMAT_INVALID; + } pos = cluster_stop; break; @@ -5930,42 +6054,48 @@ long Cluster::Parse(long long& pos, long& len) const { Cluster* const this_ = const_cast<Cluster*>(this); - if (id == 0x20) // BlockGroup + if (id == mkvmuxer::kMkvBlockGroup) return this_->ParseBlockGroup(size, pos, len); - if (id == 0x23) // SimpleBlock + if (id == mkvmuxer::kMkvSimpleBlock) return this_->ParseSimpleBlock(size, pos, len); pos += size; // consume payload - assert((cluster_stop < 0) || (pos <= cluster_stop)); + if (cluster_stop >= 0 && pos > cluster_stop) + return E_FILE_FORMAT_INVALID; } - assert(m_element_size > 0); + if (m_element_size < 1) + return E_FILE_FORMAT_INVALID; m_pos = pos; - assert((cluster_stop < 0) || (m_pos <= cluster_stop)); + if (cluster_stop >= 0 && m_pos > cluster_stop) + return E_FILE_FORMAT_INVALID; if (m_entries_count > 0) { const long idx = m_entries_count - 1; const BlockEntry* const pLast = m_entries[idx]; - assert(pLast); + if (pLast == NULL) + return E_PARSE_FAILED; const Block* const pBlock = pLast->GetBlock(); - assert(pBlock); + if (pBlock == NULL) + return E_PARSE_FAILED; const long long start = pBlock->m_start; if ((total >= 0) && (start > total)) - return -1; // defend against trucated stream + return E_PARSE_FAILED; // defend against trucated stream const long long size = pBlock->m_size; const long long stop = start + size; - assert((cluster_stop < 0) || (stop <= cluster_stop)); + if (cluster_stop >= 0 && stop > cluster_stop) + return E_FILE_FORMAT_INVALID; if ((total >= 0) && (stop > total)) - return -1; // defend against trucated stream + return E_PARSE_FAILED; // defend against trucated stream } return 1; // no more entries @@ -6058,7 +6188,7 @@ long Cluster::ParseSimpleBlock(long long block_size, long long& pos, return E_BUFFER_NOT_FULL; } - status = CreateBlock(0x23, // simple block id + status = CreateBlock(mkvmuxer::kMkvSimpleBlock, block_start, block_size, 0); // DiscardPadding @@ -6118,12 +6248,12 @@ long Cluster::ParseBlockGroup(long long payload_size, long long& pos, if ((pos + len) > avail) return E_BUFFER_NOT_FULL; - const long long id = ReadUInt(pReader, pos, len); + const long long id = ReadID(pReader, pos, len); if (id < 0) // error return static_cast<long>(id); - if (id == 0) // not a value ID + if (id == 0) // not a valid ID return E_FILE_FORMAT_INVALID; pos += len; // consume ID field @@ -6169,14 +6299,14 @@ long Cluster::ParseBlockGroup(long long payload_size, long long& pos, if (size == unknown_size) return E_FILE_FORMAT_INVALID; - if (id == 0x35A2) { // DiscardPadding + if (id == mkvmuxer::kMkvDiscardPadding) { status = UnserializeInt(pReader, pos, size, discard_padding); if (status < 0) // error return status; } - if (id != 0x21) { // sub-part of BlockGroup is not a Block + if (id != mkvmuxer::kMkvBlock) { pos += size; // consume sub-part of block group if (pos > payload_stop) @@ -6262,12 +6392,14 @@ long Cluster::ParseBlockGroup(long long payload_size, long long& pos, } pos = block_stop; // consume block-part of block group - assert(pos <= payload_stop); + if (pos > payload_stop) + return E_FILE_FORMAT_INVALID; } - assert(pos == payload_stop); + if (pos != payload_stop) + return E_FILE_FORMAT_INVALID; - status = CreateBlock(0x20, // BlockGroup ID + status = CreateBlock(mkvmuxer::kMkvBlockGroup, payload_start, payload_size, discard_padding); if (status != 0) return status; @@ -6310,17 +6442,14 @@ long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const { return E_BUFFER_NOT_FULL; // underflow, since more remains to be parsed } -Cluster* Cluster::Create(Segment* pSegment, long idx, long long off) -// long long element_size) -{ - assert(pSegment); - assert(off >= 0); +Cluster* Cluster::Create(Segment* pSegment, long idx, long long off) { + if (!pSegment || off < 0) + return NULL; const long long element_start = pSegment->m_start + off; - Cluster* const pCluster = new Cluster(pSegment, idx, element_start); - // element_size); - assert(pCluster); + Cluster* const pCluster = + new (std::nothrow) Cluster(pSegment, idx, element_start); return pCluster; } @@ -6431,13 +6560,13 @@ long Cluster::HasBlockEntries( if ((pos + len) > avail) return E_BUFFER_NOT_FULL; - const long long id = ReadUInt(pReader, pos, len); + const long long id = ReadID(pReader, pos, len); if (id < 0) // error return static_cast<long>(id); - if (id != 0x0F43B675) // weird: not cluster ID - return -1; // generic error + if (id != mkvmuxer::kMkvCluster) + return E_PARSE_FAILED; pos += len; // consume Cluster ID field @@ -6515,7 +6644,7 @@ long Cluster::HasBlockEntries( if ((pos + len) > avail) return E_BUFFER_NOT_FULL; - const long long id = ReadUInt(pReader, pos, len); + const long long id = ReadID(pReader, pos, len); if (id < 0) // error return static_cast<long>(id); @@ -6524,10 +6653,10 @@ long Cluster::HasBlockEntries( // that we have exhausted the sub-element's inside the cluster // whose ID we parsed earlier. - if (id == 0x0F43B675) // Cluster ID + if (id == mkvmuxer::kMkvCluster) return 0; // no entries found - if (id == 0x0C53BB6B) // Cues ID + if (id == mkvmuxer::kMkvCues) return 0; // no entries found pos += len; // consume id field @@ -6579,14 +6708,15 @@ long Cluster::HasBlockEntries( if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) return E_FILE_FORMAT_INVALID; - if (id == 0x20) // BlockGroup ID + if (id == mkvmuxer::kMkvBlockGroup) return 1; // have at least one entry - if (id == 0x23) // SimpleBlock ID + if (id == mkvmuxer::kMkvSimpleBlock) return 1; // have at least one entry pos += size; // consume payload - assert((cluster_stop < 0) || (pos <= cluster_stop)); + if (cluster_stop >= 0 && pos > cluster_stop) + return E_FILE_FORMAT_INVALID; } } @@ -6656,14 +6786,17 @@ long long Cluster::GetLastTime() const { long Cluster::CreateBlock(long long id, long long pos, // absolute pos of payload long long size, long long discard_padding) { - assert((id == 0x20) || (id == 0x23)); // BlockGroup or SimpleBlock + if (id != mkvmuxer::kMkvBlockGroup && id != mkvmuxer::kMkvSimpleBlock) + return E_PARSE_FAILED; if (m_entries_count < 0) { // haven't parsed anything yet assert(m_entries == NULL); assert(m_entries_size == 0); m_entries_size = 1024; - m_entries = new BlockEntry*[m_entries_size]; + m_entries = new (std::nothrow) BlockEntry*[m_entries_size]; + if (m_entries == NULL) + return -1; m_entries_count = 0; } else { @@ -6674,8 +6807,9 @@ long Cluster::CreateBlock(long long id, if (m_entries_count >= m_entries_size) { const long entries_size = 2 * m_entries_size; - BlockEntry** const entries = new BlockEntry*[entries_size]; - assert(entries); + BlockEntry** const entries = new (std::nothrow) BlockEntry*[entries_size]; + if (entries == NULL) + return -1; BlockEntry** src = m_entries; BlockEntry** const src_end = src + m_entries_count; @@ -6692,9 +6826,9 @@ long Cluster::CreateBlock(long long id, } } - if (id == 0x20) // BlockGroup ID + if (id == mkvmuxer::kMkvBlockGroup) return CreateBlockGroup(pos, size, discard_padding); - else // SimpleBlock ID + else return CreateSimpleBlock(pos, size); } @@ -6725,9 +6859,9 @@ long Cluster::CreateBlockGroup(long long start_offset, long long size, while (pos < stop) { long len; - const long long id = ReadUInt(pReader, pos, len); - assert(id >= 0); // TODO - assert((pos + len) <= stop); + const long long id = ReadID(pReader, pos, len); + if (id < 0 || (pos + len) > stop) + return E_FILE_FORMAT_INVALID; pos += len; // consume ID @@ -6737,12 +6871,12 @@ long Cluster::CreateBlockGroup(long long start_offset, long long size, pos += len; // consume size - if (id == 0x21) { // Block ID + if (id == mkvmuxer::kMkvBlock) { if (bpos < 0) { // Block ID bpos = pos; bsize = size; } - } else if (id == 0x1B) { // Duration ID + } else if (id == mkvmuxer::kMkvBlockDuration) { if (size > 8) return E_FILE_FORMAT_INVALID; @@ -6750,7 +6884,7 @@ long Cluster::CreateBlockGroup(long long start_offset, long long size, if (duration < 0) return E_FILE_FORMAT_INVALID; - } else if (id == 0x7B) { // ReferenceBlock + } else if (id == mkvmuxer::kMkvReferenceBlock) { if (size > 8 || size <= 0) return E_FILE_FORMAT_INVALID; const long size_ = static_cast<long>(size); @@ -6764,17 +6898,19 @@ long Cluster::CreateBlockGroup(long long start_offset, long long size, if (time <= 0) // see note above prev = time; - else // weird + else next = time; } pos += size; // consume payload - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; } if (bpos < 0) return E_FILE_FORMAT_INVALID; - assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; assert(bsize >= 0); const long idx = m_entries_count; @@ -7213,7 +7349,9 @@ long Block::Parse(const Cluster* pCluster) { return E_FILE_FORMAT_INVALID; m_frame_count = 1; - m_frames = new Frame[m_frame_count]; + m_frames = new (std::nothrow) Frame[m_frame_count]; + if (m_frames == NULL) + return -1; Frame& f = m_frames[0]; f.pos = pos; @@ -7239,18 +7377,23 @@ long Block::Parse(const Cluster* pCluster) { return E_FILE_FORMAT_INVALID; ++pos; // consume frame count - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; m_frame_count = int(biased_count) + 1; - m_frames = new Frame[m_frame_count]; - assert(m_frames); + m_frames = new (std::nothrow) Frame[m_frame_count]; + if (m_frames == NULL) + return -1; + + if (!m_frames) + return E_FILE_FORMAT_INVALID; if (lacing == 1) { // Xiph Frame* pf = m_frames; Frame* const pf_end = pf + m_frame_count; - long size = 0; + long long size = 0; int frame_count = m_frame_count; while (frame_count > 1) { @@ -7277,6 +7420,8 @@ long Block::Parse(const Cluster* pCluster) { Frame& f = *pf++; assert(pf < pf_end); + if (pf >= pf_end) + return E_FILE_FORMAT_INVALID; f.pos = 0; // patch later @@ -7289,8 +7434,8 @@ long Block::Parse(const Cluster* pCluster) { --frame_count; } - assert(pf < pf_end); - assert(pos <= stop); + if (pf >= pf_end || pos > stop) + return E_FILE_FORMAT_INVALID; { Frame& f = *pf++; @@ -7318,11 +7463,17 @@ long Block::Parse(const Cluster* pCluster) { Frame& f = *pf++; assert((pos + f.len) <= stop); + if ((pos + f.len) > stop) + return E_FILE_FORMAT_INVALID; + f.pos = pos; pos += f.len; } assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; + } else if (lacing == 2) { // fixed-size lacing if (pos >= stop) return E_FILE_FORMAT_INVALID; @@ -7342,6 +7493,8 @@ long Block::Parse(const Cluster* pCluster) { while (pf != pf_end) { assert((pos + frame_size) <= stop); + if ((pos + frame_size) > stop) + return E_FILE_FORMAT_INVALID; Frame& f = *pf++; @@ -7352,13 +7505,16 @@ long Block::Parse(const Cluster* pCluster) { } assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; + } else { assert(lacing == 3); // EBML lacing if (pos >= stop) return E_FILE_FORMAT_INVALID; - long size = 0; + long long size = 0; int frame_count = m_frame_count; long long frame_size = ReadUInt(pReader, pos, len); @@ -7396,6 +7552,9 @@ long Block::Parse(const Cluster* pCluster) { return E_FILE_FORMAT_INVALID; assert(pf < pf_end); + if (pf >= pf_end) + return E_FILE_FORMAT_INVALID; + const Frame& prev = *pf++; assert(prev.len == frame_size); @@ -7403,6 +7562,8 @@ long Block::Parse(const Cluster* pCluster) { return E_FILE_FORMAT_INVALID; assert(pf < pf_end); + if (pf >= pf_end) + return E_FILE_FORMAT_INVALID; Frame& curr = *pf; @@ -7417,7 +7578,8 @@ long Block::Parse(const Cluster* pCluster) { return E_FILE_FORMAT_INVALID; pos += len; // consume length of (delta) size - assert(pos <= stop); + if (pos > stop) + return E_FILE_FORMAT_INVALID; const int exp = 7 * len - 1; const long long bias = (1LL << exp) - 1LL; @@ -7439,18 +7601,20 @@ long Block::Parse(const Cluster* pCluster) { // parse last frame if (frame_count > 0) { - assert(pos <= stop); - assert(pf < pf_end); + if (pos > stop || pf >= pf_end) + return E_FILE_FORMAT_INVALID; const Frame& prev = *pf++; assert(prev.len == frame_size); if (prev.len != frame_size) return E_FILE_FORMAT_INVALID; - assert(pf < pf_end); + if (pf >= pf_end) + return E_FILE_FORMAT_INVALID; Frame& curr = *pf++; - assert(pf == pf_end); + if (pf != pf_end) + return E_FILE_FORMAT_INVALID; curr.pos = 0; // patch later @@ -7471,6 +7635,8 @@ long Block::Parse(const Cluster* pCluster) { while (pf != pf_end) { Frame& f = *pf++; assert((pos + f.len) <= stop); + if ((pos + f.len) > stop) + return E_FILE_FORMAT_INVALID; f.pos = pos; pos += f.len; diff --git a/libvpx/third_party/libwebm/mkvparser.hpp b/libvpx/third_party/libwebm/mkvparser.hpp index aa0b4326..75ef69d7 100644 --- a/libvpx/third_party/libwebm/mkvparser.hpp +++ b/libvpx/third_party/libwebm/mkvparser.hpp @@ -9,12 +9,13 @@ #ifndef MKVPARSER_HPP #define MKVPARSER_HPP -#include <cstdlib> -#include <cstdio> #include <cstddef> +#include <cstdio> +#include <cstdlib> namespace mkvparser { +const int E_PARSE_FAILED = -1; const int E_FILE_FORMAT_INVALID = -2; const int E_BUFFER_NOT_FULL = -3; @@ -27,8 +28,11 @@ class IMkvReader { virtual ~IMkvReader(); }; +template<typename Type> Type* SafeArrayAlloc(unsigned long long num_elements, + unsigned long long element_size); long long GetUIntLength(IMkvReader*, long long, long&); long long ReadUInt(IMkvReader*, long long, long&); +long long ReadID(IMkvReader* pReader, long long pos, long& len); long long UnserializeUInt(IMkvReader*, long long pos, long long size); long UnserializeFloat(IMkvReader*, long long pos, long long size, double&); @@ -833,7 +837,7 @@ class Cues { private: bool Init() const; - void PreloadCuePoint(long&, long long) const; + bool PreloadCuePoint(long&, long long) const; mutable CuePoint** m_cue_points; mutable long m_count; @@ -999,8 +1003,8 @@ class Segment { long DoLoadClusterUnknownSize(long long&, long&); long DoParseNext(const Cluster*&, long long&, long&); - void AppendCluster(Cluster*); - void PreloadCluster(Cluster*, ptrdiff_t); + bool AppendCluster(Cluster*); + bool PreloadCluster(Cluster*, ptrdiff_t); // void ParseSeekHead(long long pos, long long size); // void ParseSeekEntry(long long pos, long long size); diff --git a/libvpx/third_party/libwebm/webmids.hpp b/libvpx/third_party/libwebm/webmids.hpp index 6874e44e..ad4ab573 100644 --- a/libvpx/third_party/libwebm/webmids.hpp +++ b/libvpx/third_party/libwebm/webmids.hpp @@ -41,6 +41,7 @@ enum MkvId { kMkvTimecodeScale = 0x2AD7B1, kMkvDuration = 0x4489, kMkvDateUTC = 0x4461, + kMkvTitle = 0x7BA9, kMkvMuxingApp = 0x4D80, kMkvWritingApp = 0x5741, // Cluster @@ -107,9 +108,16 @@ enum MkvId { kMkvContentEncodingOrder = 0x5031, kMkvContentEncodingScope = 0x5032, kMkvContentEncodingType = 0x5033, + kMkvContentCompression = 0x5034, + kMkvContentCompAlgo = 0x4254, + kMkvContentCompSettings = 0x4255, kMkvContentEncryption = 0x5035, kMkvContentEncAlgo = 0x47E1, kMkvContentEncKeyID = 0x47E2, + kMkvContentSignature = 0x47E3, + kMkvContentSigKeyID = 0x47E4, + kMkvContentSigAlgo = 0x47E5, + kMkvContentSigHashAlgo = 0x47E6, kMkvContentEncAESSettings = 0x47E7, kMkvAESSettingsCipherMode = 0x47E8, kMkvAESSettingsCipherInitData = 0x47E9, diff --git a/libvpx/third_party/x86inc/README.libvpx b/libvpx/third_party/x86inc/README.libvpx index fe5b0761..e91e305a 100644 --- a/libvpx/third_party/x86inc/README.libvpx +++ b/libvpx/third_party/x86inc/README.libvpx @@ -20,3 +20,5 @@ Copy PIC 'GLOBAL' macros from x86_abi_support.asm Use .text instead of .rodata on macho to avoid broken tables in PIC mode. Use .text with no alignment for aout Only use 'hidden' visibility with Chromium +Move '%use smartalign' for nasm out of 'INIT_CPUFLAGS' and before + 'ALIGNMODE'. diff --git a/libvpx/third_party/x86inc/x86inc.asm b/libvpx/third_party/x86inc/x86inc.asm index 77a58f29..be59de31 100644 --- a/libvpx/third_party/x86inc/x86inc.asm +++ b/libvpx/third_party/x86inc/x86inc.asm @@ -876,6 +876,10 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x)) %define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x)) +%ifdef __NASM_VER__ + %use smartalign +%endif + ; Takes an arbitrary number of cpuflags from the above list. ; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu. ; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co. @@ -912,7 +916,6 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %endif %ifdef __NASM_VER__ - %use smartalign ALIGNMODE k7 %elif ARCH_X86_64 || cpuflag(sse2) CPU amdnop diff --git a/libvpx/vp8/common/arm/armv6/intra4x4_predict_v6.asm b/libvpx/vp8/common/arm/armv6/intra4x4_predict_v6.asm deleted file mode 100644 index c5ec824b..00000000 --- a/libvpx/vp8/common/arm/armv6/intra4x4_predict_v6.asm +++ /dev/null @@ -1,611 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_intra4x4_predict_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - - -;void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, -; B_PREDICTION_MODE left_stride, int b_mode, -; unsigned char *dst, int dst_stride, -; unsigned char top_left) - -; r0: *Above -; r1: *yleft -; r2: left_stride -; r3: b_mode -; sp + #40: dst -; sp + #44: dst_stride -; sp + #48: top_left -|vp8_intra4x4_predict_armv6| PROC - push {r4-r12, lr} - - cmp r3, #10 - addlt pc, pc, r3, lsl #2 ; position independent switch - pop {r4-r12, pc} ; default - b b_dc_pred - b b_tm_pred - b b_ve_pred - b b_he_pred - b b_ld_pred - b b_rd_pred - b b_vr_pred - b b_vl_pred - b b_hd_pred - b b_hu_pred - -b_dc_pred - ; load values - ldr r8, [r0] ; Above - ldrb r4, [r1], r2 ; Left[0] - mov r9, #0 - ldrb r5, [r1], r2 ; Left[1] - ldrb r6, [r1], r2 ; Left[2] - usad8 r12, r8, r9 - ldrb r7, [r1] ; Left[3] - - ; calculate dc - add r4, r4, r5 - add r4, r4, r6 - add r4, r4, r7 - add r4, r4, r12 - add r4, r4, #4 - ldr r0, [sp, #44] ; dst_stride - mov r12, r4, asr #3 ; (expected_dc + 4) >> 3 - - add r12, r12, r12, lsl #8 - ldr r3, [sp, #40] ; dst - add r12, r12, r12, lsl #16 - - ; store values - str r12, [r3], r0 - str r12, [r3], r0 - str r12, [r3], r0 - str r12, [r3] - - pop {r4-r12, pc} - -b_tm_pred - ldr r8, [r0] ; Above - ldrb r9, [sp, #48] ; top_left - ldrb r4, [r1], r2 ; Left[0] - ldrb r5, [r1], r2 ; Left[1] - ldrb r6, [r1], r2 ; Left[2] - ldrb r7, [r1] ; Left[3] - ldr r0, [sp, #44] ; dst_stride - ldr r3, [sp, #40] ; dst - - add r9, r9, r9, lsl #16 ; [tl|tl] - uxtb16 r10, r8 ; a[2|0] - uxtb16 r11, r8, ror #8 ; a[3|1] - ssub16 r10, r10, r9 ; a[2|0] - [tl|tl] - ssub16 r11, r11, r9 ; a[3|1] - [tl|tl] - - add r4, r4, r4, lsl #16 ; l[0|0] - add r5, r5, r5, lsl #16 ; l[1|1] - add r6, r6, r6, lsl #16 ; l[2|2] - add r7, r7, r7, lsl #16 ; l[3|3] - - sadd16 r1, r4, r10 ; l[0|0] + a[2|0] - [tl|tl] - sadd16 r2, r4, r11 ; l[0|0] + a[3|1] - [tl|tl] - usat16 r1, #8, r1 - usat16 r2, #8, r2 - - sadd16 r4, r5, r10 ; l[1|1] + a[2|0] - [tl|tl] - sadd16 r5, r5, r11 ; l[1|1] + a[3|1] - [tl|tl] - - add r12, r1, r2, lsl #8 ; [3|2|1|0] - str r12, [r3], r0 - - usat16 r4, #8, r4 - usat16 r5, #8, r5 - - sadd16 r1, r6, r10 ; l[2|2] + a[2|0] - [tl|tl] - sadd16 r2, r6, r11 ; l[2|2] + a[3|1] - [tl|tl] - - add r12, r4, r5, lsl #8 ; [3|2|1|0] - str r12, [r3], r0 - - usat16 r1, #8, r1 - usat16 r2, #8, r2 - - sadd16 r4, r7, r10 ; l[3|3] + a[2|0] - [tl|tl] - sadd16 r5, r7, r11 ; l[3|3] + a[3|1] - [tl|tl] - - add r12, r1, r2, lsl #8 ; [3|2|1|0] - - usat16 r4, #8, r4 - usat16 r5, #8, r5 - - str r12, [r3], r0 - - add r12, r4, r5, lsl #8 ; [3|2|1|0] - str r12, [r3] - - pop {r4-r12, pc} - -b_ve_pred - ldr r8, [r0] ; a[3|2|1|0] - ldr r11, c00FF00FF - ldrb r9, [sp, #48] ; top_left - ldrb r10, [r0, #4] ; a[4] - - ldr r0, c00020002 - - uxtb16 r4, r8 ; a[2|0] - uxtb16 r5, r8, ror #8 ; a[3|1] - ldr r2, [sp, #44] ; dst_stride - pkhbt r9, r9, r5, lsl #16 ; a[1|-1] - - add r9, r9, r4, lsl #1 ;[a[1]+2*a[2] | tl+2*a[0] ] - uxtab16 r9, r9, r5 ;[a[1]+2*a[2]+a[3] | tl+2*a[0]+a[1] ] - ldr r3, [sp, #40] ; dst - uxtab16 r9, r9, r0 ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2] - - add r0, r0, r10, lsl #16 ;[a[4]+2 | 2] - add r0, r0, r4, asr #16 ;[a[4]+2 | a[2]+2] - add r0, r0, r5, lsl #1 ;[a[4]+2*a[3]+2 | a[2]+2*a[1]+2] - uadd16 r4, r4, r0 ;[a[4]+2*a[3]+a[2]+2|a[2]+2*a[1]+a[0]+2] - - and r9, r11, r9, asr #2 - and r4, r11, r4, asr #2 - add r9, r9, r4, lsl #8 - - ; store values - str r9, [r3], r2 - str r9, [r3], r2 - str r9, [r3], r2 - str r9, [r3] - - pop {r4-r12, pc} - - -b_he_pred - ldrb r4, [r1], r2 ; Left[0] - ldrb r8, [sp, #48] ; top_left - ldrb r5, [r1], r2 ; Left[1] - ldrb r6, [r1], r2 ; Left[2] - ldrb r7, [r1] ; Left[3] - - add r8, r8, r4 ; tl + l[0] - add r9, r4, r5 ; l[0] + l[1] - add r10, r5, r6 ; l[1] + l[2] - add r11, r6, r7 ; l[2] + l[3] - - mov r0, #2<<14 - - add r8, r8, r9 ; tl + 2*l[0] + l[1] - add r4, r9, r10 ; l[0] + 2*l[1] + l[2] - add r5, r10, r11 ; l[1] + 2*l[2] + l[3] - add r6, r11, r7, lsl #1 ; l[2] + 2*l[3] + l[3] - - - add r8, r0, r8, lsl #14 ; (tl + 2*l[0] + l[1])>>2 in top half - add r9, r0, r4, lsl #14 ; (l[0] + 2*l[1] + l[2])>>2 in top half - add r10,r0, r5, lsl #14 ; (l[1] + 2*l[2] + l[3])>>2 in top half - add r11,r0, r6, lsl #14 ; (l[2] + 2*l[3] + l[3])>>2 in top half - - pkhtb r8, r8, r8, asr #16 ; l[-|0|-|0] - pkhtb r9, r9, r9, asr #16 ; l[-|1|-|1] - pkhtb r10, r10, r10, asr #16 ; l[-|2|-|2] - pkhtb r11, r11, r11, asr #16 ; l[-|3|-|3] - - ldr r0, [sp, #44] ; dst_stride - ldr r3, [sp, #40] ; dst - - add r8, r8, r8, lsl #8 ; l[0|0|0|0] - add r9, r9, r9, lsl #8 ; l[1|1|1|1] - add r10, r10, r10, lsl #8 ; l[2|2|2|2] - add r11, r11, r11, lsl #8 ; l[3|3|3|3] - - ; store values - str r8, [r3], r0 - str r9, [r3], r0 - str r10, [r3], r0 - str r11, [r3] - - pop {r4-r12, pc} - -b_ld_pred - ldr r4, [r0] ; Above[0-3] - ldr r12, c00020002 - ldr r5, [r0, #4] ; Above[4-7] - ldr lr, c00FF00FF - - uxtb16 r6, r4 ; a[2|0] - uxtb16 r7, r4, ror #8 ; a[3|1] - uxtb16 r8, r5 ; a[6|4] - uxtb16 r9, r5, ror #8 ; a[7|5] - pkhtb r10, r6, r8 ; a[2|4] - pkhtb r11, r7, r9 ; a[3|5] - - add r4, r6, r7, lsl #1 ; [a2+2*a3 | a0+2*a1] - add r4, r4, r10, ror #16 ; [a2+2*a3+a4 | a0+2*a1+a2] - uxtab16 r4, r4, r12 ; [a2+2*a3+a4+2 | a0+2*a1+a2+2] - - add r5, r7, r10, ror #15 ; [a3+2*a4 | a1+2*a2] - add r5, r5, r11, ror #16 ; [a3+2*a4+a5 | a1+2*a2+a3] - uxtab16 r5, r5, r12 ; [a3+2*a4+a5+2 | a1+2*a2+a3+2] - - pkhtb r7, r9, r8, asr #16 - add r6, r8, r9, lsl #1 ; [a6+2*a7 | a4+2*a5] - uadd16 r6, r6, r7 ; [a6+2*a7+a7 | a4+2*a5+a6] - uxtab16 r6, r6, r12 ; [a6+2*a7+a7+2 | a4+2*a5+a6+2] - - uxth r7, r9 ; [ a5] - add r7, r7, r8, asr #15 ; [ a5+2*a6] - add r7, r7, r9, asr #16 ; [ a5+2*a6+a7] - uxtah r7, r7, r12 ; [ a5+2*a6+a7+2] - - ldr r0, [sp, #44] ; dst_stride - ldr r3, [sp, #40] ; dst - - ; scale down - and r4, lr, r4, asr #2 - and r5, lr, r5, asr #2 - and r6, lr, r6, asr #2 - mov r7, r7, asr #2 - - add r8, r4, r5, lsl #8 ; [3|2|1|0] - str r8, [r3], r0 - - mov r9, r8, lsr #8 - add r9, r9, r6, lsl #24 ; [4|3|2|1] - str r9, [r3], r0 - - mov r10, r9, lsr #8 - add r10, r10, r7, lsl #24 ; [5|4|3|2] - str r10, [r3], r0 - - mov r6, r6, lsr #16 - mov r11, r10, lsr #8 - add r11, r11, r6, lsl #24 ; [6|5|4|3] - str r11, [r3] - - pop {r4-r12, pc} - -b_rd_pred - ldrb r7, [r1], r2 ; l[0] = pp[3] - ldr lr, [r0] ; Above = pp[8|7|6|5] - ldrb r8, [sp, #48] ; tl = pp[4] - ldrb r6, [r1], r2 ; l[1] = pp[2] - ldrb r5, [r1], r2 ; l[2] = pp[1] - ldrb r4, [r1], r2 ; l[3] = pp[0] - - - uxtb16 r9, lr ; p[7|5] - uxtb16 r10, lr, ror #8 ; p[8|6] - add r4, r4, r6, lsl #16 ; p[2|0] - add r5, r5, r7, lsl #16 ; p[3|1] - add r6, r6, r8, lsl #16 ; p[4|2] - pkhbt r7, r7, r9, lsl #16 ; p[5|3] - pkhbt r8, r8, r10, lsl #16 ; p[6|4] - - ldr r12, c00020002 - ldr lr, c00FF00FF - - add r4, r4, r5, lsl #1 ; [p2+2*p3 | p0+2*p1] - add r4, r4, r6 ; [p2+2*p3+p4 | p0+2*p1+p2] - uxtab16 r4, r4, r12 ; [p2+2*p3+p4+2 | p0+2*p1+p2+2] - - add r5, r5, r6, lsl #1 ; [p3+2*p4 | p1+2*p2] - add r5, r5, r7 ; [p3+2*p4+p5 | p1+2*p2+p3] - uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2] - - add r6, r7, r8, lsl #1 ; [p5+2*p6 | p3+2*p4] - add r6, r6, r9 ; [p5+2*p6+p7 | p3+2*p4+p5] - uxtab16 r6, r6, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2] - - add r7, r8, r9, lsl #1 ; [p6+2*p7 | p4+2*p5] - add r7, r7, r10 ; [p6+2*p7+p8 | p4+2*p5+p6] - uxtab16 r7, r7, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2] - - ldr r0, [sp, #44] ; dst_stride - ldr r3, [sp, #40] ; dst - - ; scale down - and r7, lr, r7, asr #2 - and r6, lr, r6, asr #2 - and r5, lr, r5, asr #2 - and r4, lr, r4, asr #2 - - add r8, r6, r7, lsl #8 ; [6|5|4|3] - str r8, [r3], r0 - - mov r9, r8, lsl #8 ; [5|4|3|-] - uxtab r9, r9, r4, ror #16 ; [5|4|3|2] - str r9, [r3], r0 - - mov r10, r9, lsl #8 ; [4|3|2|-] - uxtab r10, r10, r5 ; [4|3|2|1] - str r10, [r3], r0 - - mov r11, r10, lsl #8 ; [3|2|1|-] - uxtab r11, r11, r4 ; [3|2|1|0] - str r11, [r3] - - pop {r4-r12, pc} - -b_vr_pred - ldrb r7, [r1], r2 ; l[0] = pp[3] - ldr lr, [r0] ; Above = pp[8|7|6|5] - ldrb r8, [sp, #48] ; tl = pp[4] - ldrb r6, [r1], r2 ; l[1] = pp[2] - ldrb r5, [r1], r2 ; l[2] = pp[1] - ldrb r4, [r1] ; l[3] = pp[0] - - add r5, r5, r7, lsl #16 ; p[3|1] - add r6, r6, r8, lsl #16 ; p[4|2] - uxtb16 r9, lr ; p[7|5] - uxtb16 r10, lr, ror #8 ; p[8|6] - pkhbt r7, r7, r9, lsl #16 ; p[5|3] - pkhbt r8, r8, r10, lsl #16 ; p[6|4] - - ldr r4, c00010001 - ldr r12, c00020002 - ldr lr, c00FF00FF - - add r5, r5, r6, lsl #1 ; [p3+2*p4 | p1+2*p2] - add r5, r5, r7 ; [p3+2*p4+p5 | p1+2*p2+p3] - uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2] - - add r6, r6, r7, lsl #1 ; [p4+2*p5 | p2+2*p3] - add r6, r6, r8 ; [p4+2*p5+p6 | p2+2*p3+p4] - uxtab16 r6, r6, r12 ; [p4+2*p5+p6+2 | p2+2*p3+p4+2] - - uadd16 r11, r8, r9 ; [p6+p7 | p4+p5] - uhadd16 r11, r11, r4 ; [(p6+p7+1)>>1 | (p4+p5+1)>>1] - ; [F|E] - - add r7, r7, r8, lsl #1 ; [p5+2*p6 | p3+2*p4] - add r7, r7, r9 ; [p5+2*p6+p7 | p3+2*p4+p5] - uxtab16 r7, r7, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2] - - uadd16 r2, r9, r10 ; [p7+p8 | p5+p6] - uhadd16 r2, r2, r4 ; [(p7+p8+1)>>1 | (p5+p6+1)>>1] - ; [J|I] - - add r8, r8, r9, lsl #1 ; [p6+2*p7 | p4+2*p5] - add r8, r8, r10 ; [p6+2*p7+p8 | p4+2*p5+p6] - uxtab16 r8, r8, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2] - - ldr r0, [sp, #44] ; dst_stride - ldr r3, [sp, #40] ; dst - - ; scale down - and r5, lr, r5, asr #2 ; [B|A] - and r6, lr, r6, asr #2 ; [D|C] - and r7, lr, r7, asr #2 ; [H|G] - and r8, lr, r8, asr #2 ; [L|K] - - add r12, r11, r2, lsl #8 ; [J|F|I|E] - str r12, [r3], r0 - - add r12, r7, r8, lsl #8 ; [L|H|K|G] - str r12, [r3], r0 - - pkhbt r2, r6, r2, lsl #16 ; [-|I|-|C] - add r2, r2, r11, lsl #8 ; [F|I|E|C] - - pkhtb r12, r6, r5 ; [-|D|-|A] - pkhtb r10, r7, r5, asr #16 ; [-|H|-|B] - str r2, [r3], r0 - add r12, r12, r10, lsl #8 ; [H|D|B|A] - str r12, [r3] - - pop {r4-r12, pc} - -b_vl_pred - ldr r4, [r0] ; [3|2|1|0] = Above[0-3] - ldr r12, c00020002 - ldr r5, [r0, #4] ; [7|6|5|4] = Above[4-7] - ldr lr, c00FF00FF - ldr r2, c00010001 - - mov r0, r4, lsr #16 ; [-|-|3|2] - add r0, r0, r5, lsl #16 ; [5|4|3|2] - uxtb16 r6, r4 ; [2|0] - uxtb16 r7, r4, ror #8 ; [3|1] - uxtb16 r8, r0 ; [4|2] - uxtb16 r9, r0, ror #8 ; [5|3] - uxtb16 r10, r5 ; [6|4] - uxtb16 r11, r5, ror #8 ; [7|5] - - uadd16 r4, r6, r7 ; [p2+p3 | p0+p1] - uhadd16 r4, r4, r2 ; [(p2+p3+1)>>1 | (p0+p1+1)>>1] - ; [B|A] - - add r5, r6, r7, lsl #1 ; [p2+2*p3 | p0+2*p1] - add r5, r5, r8 ; [p2+2*p3+p4 | p0+2*p1+p2] - uxtab16 r5, r5, r12 ; [p2+2*p3+p4+2 | p0+2*p1+p2+2] - - uadd16 r6, r7, r8 ; [p3+p4 | p1+p2] - uhadd16 r6, r6, r2 ; [(p3+p4+1)>>1 | (p1+p2+1)>>1] - ; [F|E] - - add r7, r7, r8, lsl #1 ; [p3+2*p4 | p1+2*p2] - add r7, r7, r9 ; [p3+2*p4+p5 | p1+2*p2+p3] - uxtab16 r7, r7, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2] - - add r8, r8, r9, lsl #1 ; [p4+2*p5 | p2+2*p3] - add r8, r8, r10 ; [p4+2*p5+p6 | p2+2*p3+p4] - uxtab16 r8, r8, r12 ; [p4+2*p5+p6+2 | p2+2*p3+p4+2] - - add r9, r9, r10, lsl #1 ; [p5+2*p6 | p3+2*p4] - add r9, r9, r11 ; [p5+2*p6+p7 | p3+2*p4+p5] - uxtab16 r9, r9, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2] - - ldr r0, [sp, #44] ; dst_stride - ldr r3, [sp, #40] ; dst - - ; scale down - and r5, lr, r5, asr #2 ; [D|C] - and r7, lr, r7, asr #2 ; [H|G] - and r8, lr, r8, asr #2 ; [I|D] - and r9, lr, r9, asr #2 ; [J|H] - - add r10, r4, r6, lsl #8 ; [F|B|E|A] - str r10, [r3], r0 - - add r5, r5, r7, lsl #8 ; [H|C|G|D] - str r5, [r3], r0 - - pkhtb r12, r8, r4, asr #16 ; [-|I|-|B] - pkhtb r10, r9, r8 ; [-|J|-|D] - - add r12, r6, r12, lsl #8 ; [I|F|B|E] - str r12, [r3], r0 - - add r10, r7, r10, lsl #8 ; [J|H|D|G] - str r10, [r3] - - pop {r4-r12, pc} - -b_hd_pred - ldrb r7, [r1], r2 ; l[0] = pp[3] - ldr lr, [r0] ; Above = pp[8|7|6|5] - ldrb r8, [sp, #48] ; tl = pp[4] - ldrb r6, [r1], r2 ; l[1] = pp[2] - ldrb r5, [r1], r2 ; l[2] = pp[1] - ldrb r4, [r1] ; l[3] = pp[0] - - uxtb16 r9, lr ; p[7|5] - uxtb16 r10, lr, ror #8 ; p[8|6] - - add r4, r4, r5, lsl #16 ; p[1|0] - add r5, r5, r6, lsl #16 ; p[2|1] - add r6, r6, r7, lsl #16 ; p[3|2] - add r7, r7, r8, lsl #16 ; p[4|3] - - ldr r12, c00020002 - ldr lr, c00FF00FF - ldr r2, c00010001 - - pkhtb r8, r7, r9 ; p[4|5] - pkhtb r1, r9, r10 ; p[7|6] - pkhbt r10, r8, r10, lsl #16 ; p[6|5] - - uadd16 r11, r4, r5 ; [p1+p2 | p0+p1] - uhadd16 r11, r11, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1] - ; [B|A] - - add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1] - add r4, r4, r6 ; [p1+2*p2+p3 | p0+2*p1+p2] - uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2] - - uadd16 r0, r6, r7 ; [p3+p4 | p2+p3] - uhadd16 r0, r0, r2 ; [(p3+p4+1)>>1 | (p2+p3+1)>>1] - ; [F|E] - - add r5, r6, r7, lsl #1 ; [p3+2*p4 | p2+2*p3] - add r5, r5, r8, ror #16 ; [p3+2*p4+p5 | p2+2*p3+p4] - uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p2+2*p3+p4+2] - - add r6, r12, r8, ror #16 ; [p5+2 | p4+2] - add r6, r6, r10, lsl #1 ; [p5+2+2*p6 | p4+2+2*p5] - uxtab16 r6, r6, r1 ; [p5+2+2*p6+p7 | p4+2+2*p5+p6] - - ; scale down - and r4, lr, r4, asr #2 ; [D|C] - and r5, lr, r5, asr #2 ; [H|G] - and r6, lr, r6, asr #2 ; [J|I] - - ldr lr, [sp, #44] ; dst_stride - ldr r3, [sp, #40] ; dst - - pkhtb r2, r0, r6 ; [-|F|-|I] - pkhtb r12, r6, r5, asr #16 ; [-|J|-|H] - add r12, r12, r2, lsl #8 ; [F|J|I|H] - add r2, r0, r5, lsl #8 ; [H|F|G|E] - mov r12, r12, ror #24 ; [J|I|H|F] - str r12, [r3], lr - - mov r7, r11, asr #16 ; [-|-|-|B] - str r2, [r3], lr - add r7, r7, r0, lsl #16 ; [-|E|-|B] - add r7, r7, r4, asr #8 ; [-|E|D|B] - add r7, r7, r5, lsl #24 ; [G|E|D|B] - str r7, [r3], lr - - add r5, r11, r4, lsl #8 ; [D|B|C|A] - str r5, [r3] - - pop {r4-r12, pc} - - - -b_hu_pred - ldrb r4, [r1], r2 ; Left[0] - ldr r12, c00020002 - ldrb r5, [r1], r2 ; Left[1] - ldr lr, c00FF00FF - ldrb r6, [r1], r2 ; Left[2] - ldr r2, c00010001 - ldrb r7, [r1] ; Left[3] - - add r4, r4, r5, lsl #16 ; [1|0] - add r5, r5, r6, lsl #16 ; [2|1] - add r9, r6, r7, lsl #16 ; [3|2] - - uadd16 r8, r4, r5 ; [p1+p2 | p0+p1] - uhadd16 r8, r8, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1] - ; [B|A] - - add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1] - add r4, r4, r9 ; [p1+2*p2+p3 | p0+2*p1+p2] - uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2] - ldr r2, [sp, #44] ; dst_stride - ldr r3, [sp, #40] ; dst - and r4, lr, r4, asr #2 ; [D|C] - - add r10, r6, r7 ; [p2+p3] - add r11, r10, r7, lsl #1 ; [p2+3*p3] - add r10, r10, #1 - add r11, r11, #2 - mov r10, r10, asr #1 ; [E] - mov r11, r11, asr #2 ; [F] - - add r9, r7, r9, asr #8 ; [-|-|G|G] - add r0, r8, r4, lsl #8 ; [D|B|C|A] - add r7, r9, r9, lsl #16 ; [G|G|G|G] - - str r0, [r3], r2 - - mov r1, r8, asr #16 ; [-|-|-|B] - add r1, r1, r4, asr #8 ; [-|-|D|B] - add r1, r1, r10, lsl #16 ; [-|E|D|B] - add r1, r1, r11, lsl #24 ; [F|E|D|B] - str r1, [r3], r2 - - add r10, r11, lsl #8 ; [-|-|F|E] - add r10, r10, r9, lsl #16 ; [G|G|F|E] - str r10, [r3], r2 - - str r7, [r3] - - pop {r4-r12, pc} - - ENDP - -; constants -c00010001 - DCD 0x00010001 -c00020002 - DCD 0x00020002 -c00FF00FF - DCD 0x00FF00FF - - END diff --git a/libvpx/vp8/common/arm/neon/reconintra_neon.c b/libvpx/vp8/common/arm/neon/reconintra_neon.c deleted file mode 100644 index af52cd5e..00000000 --- a/libvpx/vp8/common/arm/neon/reconintra_neon.c +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "vp8/common/blockd.h" - -void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char * yleft, - int left_stride, - unsigned char * ypred_ptr, - int y_stride) { - const int mode = x->mode_info_context->mbmi.mode; - int i; - - switch (mode) { - case DC_PRED: - { - int shift = x->up_available + x->left_available; - uint8x16_t v_expected_dc = vdupq_n_u8(128); - - if (shift) { - unsigned int average = 0; - int expected_dc; - if (x->up_available) { - const uint8x16_t v_above = vld1q_u8(yabove_row); - const uint16x8_t a = vpaddlq_u8(v_above); - const uint32x4_t b = vpaddlq_u16(a); - const uint64x2_t c = vpaddlq_u32(b); - const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)), - vreinterpret_u32_u64(vget_high_u64(c))); - average = vget_lane_u32(d, 0); - } - if (x->left_available) { - for (i = 0; i < 16; ++i) { - average += yleft[0]; - yleft += left_stride; - } - } - shift += 3; - expected_dc = (average + (1 << (shift - 1))) >> shift; - v_expected_dc = vmovq_n_u8((uint8_t)expected_dc); - } - for (i = 0; i < 16; ++i) { - vst1q_u8(ypred_ptr, v_expected_dc); - ypred_ptr += y_stride; - } - } - break; - case V_PRED: - { - const uint8x16_t v_above = vld1q_u8(yabove_row); - for (i = 0; i < 16; ++i) { - vst1q_u8(ypred_ptr, v_above); - ypred_ptr += y_stride; - } - } - break; - case H_PRED: - { - for (i = 0; i < 16; ++i) { - const uint8x16_t v_yleft = vmovq_n_u8((uint8_t)yleft[0]); - yleft += left_stride; - vst1q_u8(ypred_ptr, v_yleft); - ypred_ptr += y_stride; - } - } - break; - case TM_PRED: - { - const uint16x8_t v_ytop_left = vmovq_n_u16((int16_t)yabove_row[-1]); - const uint8x16_t v_above = vld1q_u8(yabove_row); - for (i = 0; i < 16; ++i) { - const uint8x8_t v_yleft = vmov_n_u8((int8_t)yleft[0]); - const uint16x8_t a_lo = vaddl_u8(vget_low_u8(v_above), v_yleft); - const uint16x8_t a_hi = vaddl_u8(vget_high_u8(v_above), v_yleft); - const int16x8_t b_lo = vsubq_s16(vreinterpretq_s16_u16(a_lo), - vreinterpretq_s16_u16(v_ytop_left)); - const int16x8_t b_hi = vsubq_s16(vreinterpretq_s16_u16(a_hi), - vreinterpretq_s16_u16(v_ytop_left)); - const uint8x8_t pred_lo = vqmovun_s16(b_lo); - const uint8x8_t pred_hi = vqmovun_s16(b_hi); - - vst1q_u8(ypred_ptr, vcombine_u8(pred_lo, pred_hi)); - ypred_ptr += y_stride; - yleft += left_stride; - } - } - break; - } -} - -void vp8_build_intra_predictors_mbuv_s_neon(MACROBLOCKD *x, - unsigned char * uabove_row, - unsigned char * vabove_row, - unsigned char * uleft, - unsigned char * vleft, - int left_stride, - unsigned char * upred_ptr, - unsigned char * vpred_ptr, - int pred_stride) { - const int mode = x->mode_info_context->mbmi.uv_mode; - int i; - - switch (mode) { - case DC_PRED: - { - int shift = x->up_available + x->left_available; - uint8x8_t v_expected_udc = vdup_n_u8(128); - uint8x8_t v_expected_vdc = vdup_n_u8(128); - - if (shift) { - unsigned int average_u = 0; - unsigned int average_v = 0; - int expected_udc; - int expected_vdc; - if (x->up_available) { - const uint8x8_t v_uabove = vld1_u8(uabove_row); - const uint8x8_t v_vabove = vld1_u8(vabove_row); - const uint16x8_t a = vpaddlq_u8(vcombine_u8(v_uabove, v_vabove)); - const uint32x4_t b = vpaddlq_u16(a); - const uint64x2_t c = vpaddlq_u32(b); - average_u = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 0); - average_v = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 2); - } - if (x->left_available) { - for (i = 0; i < 8; ++i) { - average_u += uleft[0]; - uleft += left_stride; - average_v += vleft[0]; - vleft += left_stride; - } - } - shift += 2; - expected_udc = (average_u + (1 << (shift - 1))) >> shift; - expected_vdc = (average_v + (1 << (shift - 1))) >> shift; - v_expected_udc = vmov_n_u8((uint8_t)expected_udc); - v_expected_vdc = vmov_n_u8((uint8_t)expected_vdc); - } - for (i = 0; i < 8; ++i) { - vst1_u8(upred_ptr, v_expected_udc); - upred_ptr += pred_stride; - vst1_u8(vpred_ptr, v_expected_vdc); - vpred_ptr += pred_stride; - } - } - break; - case V_PRED: - { - const uint8x8_t v_uabove = vld1_u8(uabove_row); - const uint8x8_t v_vabove = vld1_u8(vabove_row); - for (i = 0; i < 8; ++i) { - vst1_u8(upred_ptr, v_uabove); - upred_ptr += pred_stride; - vst1_u8(vpred_ptr, v_vabove); - vpred_ptr += pred_stride; - } - } - break; - case H_PRED: - { - for (i = 0; i < 8; ++i) { - const uint8x8_t v_uleft = vmov_n_u8((uint8_t)uleft[0]); - const uint8x8_t v_vleft = vmov_n_u8((uint8_t)vleft[0]); - uleft += left_stride; - vleft += left_stride; - vst1_u8(upred_ptr, v_uleft); - upred_ptr += pred_stride; - vst1_u8(vpred_ptr, v_vleft); - vpred_ptr += pred_stride; - } - } - break; - case TM_PRED: - { - const uint16x8_t v_utop_left = vmovq_n_u16((int16_t)uabove_row[-1]); - const uint16x8_t v_vtop_left = vmovq_n_u16((int16_t)vabove_row[-1]); - const uint8x8_t v_uabove = vld1_u8(uabove_row); - const uint8x8_t v_vabove = vld1_u8(vabove_row); - for (i = 0; i < 8; ++i) { - const uint8x8_t v_uleft = vmov_n_u8((int8_t)uleft[0]); - const uint8x8_t v_vleft = vmov_n_u8((int8_t)vleft[0]); - const uint16x8_t a_u = vaddl_u8(v_uabove, v_uleft); - const uint16x8_t a_v = vaddl_u8(v_vabove, v_vleft); - const int16x8_t b_u = vsubq_s16(vreinterpretq_s16_u16(a_u), - vreinterpretq_s16_u16(v_utop_left)); - const int16x8_t b_v = vsubq_s16(vreinterpretq_s16_u16(a_v), - vreinterpretq_s16_u16(v_vtop_left)); - const uint8x8_t pred_u = vqmovun_s16(b_u); - const uint8x8_t pred_v = vqmovun_s16(b_v); - - vst1_u8(upred_ptr, pred_u); - vst1_u8(vpred_ptr, pred_v); - upred_ptr += pred_stride; - vpred_ptr += pred_stride; - uleft += left_stride; - vleft += left_stride; - } - } - break; - } -} diff --git a/libvpx/vp8/common/common.h b/libvpx/vp8/common/common.h index ba3d9f54..e58a9cc2 100644 --- a/libvpx/vp8/common/common.h +++ b/libvpx/vp8/common/common.h @@ -22,9 +22,6 @@ extern "C" { #endif -#define MIN(x, y) (((x) < (y)) ? (x) : (y)) -#define MAX(x, y) (((x) > (y)) ? (x) : (y)) - /* Only need this for fixed-size arrays, for structs just assign. */ #define vp8_copy( Dest, Src) { \ diff --git a/libvpx/vp8/common/findnearmv.h b/libvpx/vp8/common/findnearmv.h index 3c8c0506..155847ca 100644 --- a/libvpx/vp8/common/findnearmv.h +++ b/libvpx/vp8/common/findnearmv.h @@ -12,6 +12,7 @@ #ifndef VP8_COMMON_FINDNEARMV_H_ #define VP8_COMMON_FINDNEARMV_H_ +#include "./vpx_config.h" #include "mv.h" #include "blockd.h" #include "modecont.h" @@ -22,8 +23,8 @@ extern "C" { #endif -static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, - const int *ref_frame_sign_bias) +static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe, + int_mv *mvp, const int *ref_frame_sign_bias) { if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) { @@ -34,7 +35,7 @@ static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, #define LEFT_TOP_MARGIN (16 << 3) #define RIGHT_BOTTOM_MARGIN (16 << 3) -static void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) +static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) { if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN)) mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN; @@ -47,8 +48,9 @@ static void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; } -static void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge, int mb_to_right_edge, - int mb_to_top_edge, int mb_to_bottom_edge) +static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge, + int mb_to_right_edge, int mb_to_top_edge, + int mb_to_bottom_edge) { mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ? mb_to_left_edge : mv->as_mv.col; @@ -59,9 +61,10 @@ static void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge, int mb_to_right_edge, mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->as_mv.row; } -static unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge, - int mb_to_right_edge, int mb_to_top_edge, - int mb_to_bottom_edge) +static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge, + int mb_to_right_edge, + int mb_to_top_edge, + int mb_to_bottom_edge) { unsigned int need_to_clamp; need_to_clamp = (mv->as_mv.col < mb_to_left_edge); @@ -101,7 +104,7 @@ vp8_prob *vp8_mv_ref_probs( extern const unsigned char vp8_mbsplit_offset[4][16]; -static int left_block_mv(const MODE_INFO *cur_mb, int b) +static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b) { if (!(b & 3)) { @@ -116,7 +119,7 @@ static int left_block_mv(const MODE_INFO *cur_mb, int b) return (cur_mb->bmi + b - 1)->mv.as_int; } -static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) +static INLINE int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) { if (!(b >> 2)) { @@ -130,7 +133,7 @@ static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) return (cur_mb->bmi + (b - 4))->mv.as_int; } -static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) +static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) { if (!(b & 3)) { @@ -156,7 +159,8 @@ static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) return (cur_mb->bmi + b - 1)->as_mode; } -static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi_stride) +static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, + int mi_stride) { if (!(b >> 2)) { diff --git a/libvpx/vp8/common/invtrans.h b/libvpx/vp8/common/invtrans.h index affe57e3..9cfea8d5 100644 --- a/libvpx/vp8/common/invtrans.h +++ b/libvpx/vp8/common/invtrans.h @@ -12,7 +12,7 @@ #ifndef VP8_COMMON_INVTRANS_H_ #define VP8_COMMON_INVTRANS_H_ -#include "vpx_config.h" +#include "./vpx_config.h" #include "vp8_rtcd.h" #include "blockd.h" #include "onyxc_int.h" @@ -37,7 +37,7 @@ static void eob_adjust(char *eobs, short *diff) } } -static void vp8_inverse_transform_mby(MACROBLOCKD *xd) +static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd) { short *DQC = xd->dequant_y1; diff --git a/libvpx/vp8/common/mips/msa/reconintra_msa.c b/libvpx/vp8/common/mips/msa/reconintra_msa.c deleted file mode 100644 index 57f705d2..00000000 --- a/libvpx/vp8/common/mips/msa/reconintra_msa.c +++ /dev/null @@ -1,342 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vp8_rtcd.h" -#include "vp8/common/blockd.h" -#include "vp8/common/mips/msa/vp8_macros_msa.h" - -static void intra_predict_vert_8x8_msa(uint8_t *src, uint8_t *dst, - int32_t dst_stride) -{ - uint64_t out = LD(src); - - SD4(out, out, out, out, dst, dst_stride); - dst += (4 * dst_stride); - SD4(out, out, out, out, dst, dst_stride); -} - -static void intra_predict_vert_16x16_msa(uint8_t *src, uint8_t *dst, - int32_t dst_stride) -{ - v16u8 out = LD_UB(src); - - ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); - dst += (8 * dst_stride); - ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); -} - -static void intra_predict_horiz_8x8_msa(uint8_t *src, int32_t src_stride, - uint8_t *dst, int32_t dst_stride) -{ - uint64_t out0, out1, out2, out3, out4, out5, out6, out7; - - out0 = src[0 * src_stride] * 0x0101010101010101ull; - out1 = src[1 * src_stride] * 0x0101010101010101ull; - out2 = src[2 * src_stride] * 0x0101010101010101ull; - out3 = src[3 * src_stride] * 0x0101010101010101ull; - out4 = src[4 * src_stride] * 0x0101010101010101ull; - out5 = src[5 * src_stride] * 0x0101010101010101ull; - out6 = src[6 * src_stride] * 0x0101010101010101ull; - out7 = src[7 * src_stride] * 0x0101010101010101ull; - - SD4(out0, out1, out2, out3, dst, dst_stride); - dst += (4 * dst_stride); - SD4(out4, out5, out6, out7, dst, dst_stride); -} - -static void intra_predict_horiz_16x16_msa(uint8_t *src, int32_t src_stride, - uint8_t *dst, int32_t dst_stride) -{ - uint32_t row; - uint8_t inp0, inp1, inp2, inp3; - v16u8 src0, src1, src2, src3; - - for (row = 4; row--;) - { - inp0 = src[0]; - src += src_stride; - inp1 = src[0]; - src += src_stride; - inp2 = src[0]; - src += src_stride; - inp3 = src[0]; - src += src_stride; - - src0 = (v16u8)__msa_fill_b(inp0); - src1 = (v16u8)__msa_fill_b(inp1); - src2 = (v16u8)__msa_fill_b(inp2); - src3 = (v16u8)__msa_fill_b(inp3); - - ST_UB4(src0, src1, src2, src3, dst, dst_stride); - dst += (4 * dst_stride); - } -} - -static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left, - int32_t src_stride_left, - uint8_t *dst, int32_t dst_stride, - uint8_t is_above, uint8_t is_left) -{ - uint32_t row, addition = 0; - uint64_t out; - v16u8 src_above, store; - v8u16 sum_above; - v4u32 sum_top; - v2u64 sum; - - if (is_left && is_above) - { - src_above = LD_UB(src_top); - - sum_above = __msa_hadd_u_h(src_above, src_above); - sum_top = __msa_hadd_u_w(sum_above, sum_above); - sum = __msa_hadd_u_d(sum_top, sum_top); - addition = __msa_copy_u_w((v4i32)sum, 0); - - for (row = 0; row < 8; ++row) - { - addition += src_left[row * src_stride_left]; - } - - addition = (addition + 8) >> 4; - store = (v16u8)__msa_fill_b(addition); - } - else if (is_left) - { - for (row = 0; row < 8; ++row) - { - addition += src_left[row * src_stride_left]; - } - - addition = (addition + 4) >> 3; - store = (v16u8)__msa_fill_b(addition); - } - else if (is_above) - { - src_above = LD_UB(src_top); - - sum_above = __msa_hadd_u_h(src_above, src_above); - sum_top = __msa_hadd_u_w(sum_above, sum_above); - sum = __msa_hadd_u_d(sum_top, sum_top); - sum = (v2u64)__msa_srari_d((v2i64)sum, 3); - store = (v16u8)__msa_splati_b((v16i8)sum, 0); - } - else - { - store = (v16u8)__msa_ldi_b(128); - } - - out = __msa_copy_u_d((v2i64)store, 0); - - SD4(out, out, out, out, dst, dst_stride); - dst += (4 * dst_stride); - SD4(out, out, out, out, dst, dst_stride); -} - -static void intra_predict_dc_16x16_msa(uint8_t *src_top, uint8_t *src_left, - int32_t src_stride_left, - uint8_t *dst, int32_t dst_stride, - uint8_t is_above, uint8_t is_left) -{ - uint32_t row; - uint32_t addition = 0; - v16u8 src_above, out; - v8u16 sum_above; - v4u32 sum_top; - v2u64 sum; - - if (is_left && is_above) - { - src_above = LD_UB(src_top); - - sum_above = __msa_hadd_u_h(src_above, src_above); - sum_top = __msa_hadd_u_w(sum_above, sum_above); - sum = __msa_hadd_u_d(sum_top, sum_top); - sum_top = (v4u32)__msa_pckev_w((v4i32)sum, (v4i32)sum); - sum = __msa_hadd_u_d(sum_top, sum_top); - addition = __msa_copy_u_w((v4i32)sum, 0); - - for (row = 0; row < 16; ++row) - { - addition += src_left[row * src_stride_left]; - } - - addition = (addition + 16) >> 5; - out = (v16u8)__msa_fill_b(addition); - } - else if (is_left) - { - for (row = 0; row < 16; ++row) - { - addition += src_left[row * src_stride_left]; - } - - addition = (addition + 8) >> 4; - out = (v16u8)__msa_fill_b(addition); - } - else if (is_above) - { - src_above = LD_UB(src_top); - - sum_above = __msa_hadd_u_h(src_above, src_above); - sum_top = __msa_hadd_u_w(sum_above, sum_above); - sum = __msa_hadd_u_d(sum_top, sum_top); - sum_top = (v4u32)__msa_pckev_w((v4i32)sum, (v4i32)sum); - sum = __msa_hadd_u_d(sum_top, sum_top); - sum = (v2u64)__msa_srari_d((v2i64)sum, 4); - out = (v16u8)__msa_splati_b((v16i8)sum, 0); - } - else - { - out = (v16u8)__msa_ldi_b(128); - } - - ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); - dst += (8 * dst_stride); - ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); -} - -void vp8_build_intra_predictors_mby_s_msa(struct macroblockd *x, - unsigned char *yabove_row, - unsigned char *yleft, - int left_stride, - unsigned char *ypred_ptr, - int y_stride) -{ - uint32_t row, col; - uint8_t ytop_left = yabove_row[-1]; - - switch (x->mode_info_context->mbmi.mode) - { - case DC_PRED: - intra_predict_dc_16x16_msa(yabove_row, yleft, left_stride, - ypred_ptr, y_stride, - x->up_available, x->left_available); - break; - - case V_PRED: - intra_predict_vert_16x16_msa(yabove_row, ypred_ptr, y_stride); - break; - - case H_PRED: - intra_predict_horiz_16x16_msa(yleft, left_stride, ypred_ptr, - y_stride); - break; - - case TM_PRED: - for (row = 0; row < 16; ++row) - { - for (col = 0; col < 16; ++col) - { - int pred = yleft[row * left_stride] + yabove_row[col] - - ytop_left; - - if (pred < 0) - pred = 0; - - if (pred > 255) - pred = 255; - - ypred_ptr[col] = pred; - } - - ypred_ptr += y_stride; - } - break; - - case B_PRED: - case NEARESTMV: - case NEARMV: - case ZEROMV: - case NEWMV: - case SPLITMV: - case MB_MODE_COUNT: - break; - } -} - -void vp8_build_intra_predictors_mbuv_s_msa(struct macroblockd *x, - unsigned char *uabove_row, - unsigned char *vabove_row, - unsigned char *uleft, - unsigned char *vleft, - int left_stride, - unsigned char *upred_ptr, - unsigned char *vpred_ptr, - int pred_stride) -{ - uint32_t row, col; - uint8_t utop_left = uabove_row[-1]; - uint8_t vtop_left = vabove_row[-1]; - - switch (x->mode_info_context->mbmi.uv_mode) - { - case DC_PRED: - intra_predict_dc_8x8_msa(uabove_row, uleft, left_stride, - upred_ptr, pred_stride, - x->up_available, x->left_available); - intra_predict_dc_8x8_msa(vabove_row, vleft, left_stride, - vpred_ptr, pred_stride, - x->up_available, x->left_available); - break; - - case V_PRED: - intra_predict_vert_8x8_msa(uabove_row, upred_ptr, pred_stride); - intra_predict_vert_8x8_msa(vabove_row, vpred_ptr, pred_stride); - break; - - case H_PRED: - intra_predict_horiz_8x8_msa(uleft, left_stride, upred_ptr, - pred_stride); - intra_predict_horiz_8x8_msa(vleft, left_stride, vpred_ptr, - pred_stride); - break; - - case TM_PRED: - for (row = 0; row < 8; ++row) - { - for (col = 0; col < 8; ++col) - { - int predu = uleft[row * left_stride] + uabove_row[col] - - utop_left; - int predv = vleft[row * left_stride] + vabove_row[col] - - vtop_left; - - if (predu < 0) - predu = 0; - - if (predu > 255) - predu = 255; - - if (predv < 0) - predv = 0; - - if (predv > 255) - predv = 255; - - upred_ptr[col] = predu; - vpred_ptr[col] = predv; - } - - upred_ptr += pred_stride; - vpred_ptr += pred_stride; - } - break; - - case B_PRED: - case NEARESTMV: - case NEARMV: - case ZEROMV: - case NEWMV: - case SPLITMV: - case MB_MODE_COUNT: - break; - } -} diff --git a/libvpx/vp8/common/onyx.h b/libvpx/vp8/common/onyx.h index f39b675c..febe8150 100644 --- a/libvpx/vp8/common/onyx.h +++ b/libvpx/vp8/common/onyx.h @@ -65,7 +65,7 @@ extern "C" #include <assert.h> - static void Scale2Ratio(int mode, int *hr, int *hs) + static INLINE void Scale2Ratio(int mode, int *hr, int *hs) { switch (mode) { diff --git a/libvpx/vp8/common/postproc.c b/libvpx/vp8/common/postproc.c index a4e6ae17..322b6138 100644 --- a/libvpx/vp8/common/postproc.c +++ b/libvpx/vp8/common/postproc.c @@ -675,6 +675,7 @@ void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v, } } +#if CONFIG_POSTPROC_VISUALIZER static void constrain_line (int x_0, int *x_1, int y_0, int *y_1, int width, int height) { int dx; @@ -717,6 +718,7 @@ static void constrain_line (int x_0, int *x_1, int y_0, int *y_1, int width, int *x_1 = ((0-y_0)*dx)/dy + x_0; } } +#endif // CONFIG_POSTPROC_VISUALIZER #if CONFIG_POSTPROC int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags) diff --git a/libvpx/vp8/common/reconintra.c b/libvpx/vp8/common/reconintra.c index 0a6c51b3..356655da 100644 --- a/libvpx/vp8/common/reconintra.c +++ b/libvpx/vp8/common/reconintra.c @@ -9,272 +9,109 @@ */ -#include "vpx_config.h" -#include "vp8_rtcd.h" +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "./vp8_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/vpx_once.h" #include "blockd.h" +#include "vp8/common/reconintra.h" +#include "vp8/common/reconintra4x4.h" -void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char * yleft, - int left_stride, - unsigned char * ypred_ptr, - int y_stride) +enum { + SIZE_16, + SIZE_8, + NUM_SIZES, +}; + +typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left); + +static intra_pred_fn pred[4][NUM_SIZES]; +static intra_pred_fn dc_pred[2][2][NUM_SIZES]; + +static void vp8_init_intra_predictors_internal(void) { - unsigned char yleft_col[16]; - unsigned char ytop_left = yabove_row[-1]; - int r, c, i; +#define INIT_SIZE(sz) \ + pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \ + pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \ + pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \ + \ + dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \ + dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \ + dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \ + dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz + + INIT_SIZE(16); + INIT_SIZE(8); + vp8_init_intra4x4_predictors_internal(); +} + +void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, + unsigned char * yabove_row, + unsigned char * yleft, + int left_stride, + unsigned char * ypred_ptr, + int y_stride) +{ + MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode; + DECLARE_ALIGNED(16, uint8_t, yleft_col[16]); + int i; + intra_pred_fn fn; for (i = 0; i < 16; i++) { yleft_col[i] = yleft[i* left_stride]; } - /* for Y */ - switch (x->mode_info_context->mbmi.mode) - { - case DC_PRED: - { - int expected_dc; - int shift; - int average = 0; - - - if (x->up_available || x->left_available) - { - if (x->up_available) - { - for (i = 0; i < 16; i++) - { - average += yabove_row[i]; - } - } - - if (x->left_available) - { - - for (i = 0; i < 16; i++) - { - average += yleft_col[i]; - } - - } - - - - shift = 3 + x->up_available + x->left_available; - expected_dc = (average + (1 << (shift - 1))) >> shift; - } - else - { - expected_dc = 128; - } - - /*memset(ypred_ptr, expected_dc, 256);*/ - for (r = 0; r < 16; r++) - { - memset(ypred_ptr, expected_dc, 16); - ypred_ptr += y_stride; - } - } - break; - case V_PRED: + if (mode == DC_PRED) { - - for (r = 0; r < 16; r++) - { - - ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0]; - ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1]; - ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2]; - ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3]; - ypred_ptr += y_stride; - } + fn = dc_pred[x->left_available][x->up_available][SIZE_16]; } - break; - case H_PRED: + else { - - for (r = 0; r < 16; r++) - { - - memset(ypred_ptr, yleft_col[r], 16); - ypred_ptr += y_stride; - } - + fn = pred[mode][SIZE_16]; } - break; - case TM_PRED: - { - - for (r = 0; r < 16; r++) - { - for (c = 0; c < 16; c++) - { - int pred = yleft_col[r] + yabove_row[ c] - ytop_left; - - if (pred < 0) - pred = 0; - if (pred > 255) - pred = 255; - - ypred_ptr[c] = pred; - } - - ypred_ptr += y_stride; - } - - } - break; - case B_PRED: - case NEARESTMV: - case NEARMV: - case ZEROMV: - case NEWMV: - case SPLITMV: - case MB_MODE_COUNT: - break; - } + fn(ypred_ptr, y_stride, yabove_row, yleft_col); } -void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x, - unsigned char * uabove_row, - unsigned char * vabove_row, - unsigned char * uleft, - unsigned char * vleft, - int left_stride, - unsigned char * upred_ptr, - unsigned char * vpred_ptr, - int pred_stride) +void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x, + unsigned char * uabove_row, + unsigned char * vabove_row, + unsigned char * uleft, + unsigned char * vleft, + int left_stride, + unsigned char * upred_ptr, + unsigned char * vpred_ptr, + int pred_stride) { + MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode; unsigned char uleft_col[8]; - unsigned char utop_left = uabove_row[-1]; unsigned char vleft_col[8]; - unsigned char vtop_left = vabove_row[-1]; - - int i, j; + int i; + intra_pred_fn fn; for (i = 0; i < 8; i++) { - uleft_col[i] = uleft [i* left_stride]; - vleft_col[i] = vleft [i* left_stride]; + uleft_col[i] = uleft[i * left_stride]; + vleft_col[i] = vleft[i * left_stride]; } - switch (x->mode_info_context->mbmi.uv_mode) - { - case DC_PRED: - { - int expected_udc; - int expected_vdc; - int shift; - int Uaverage = 0; - int Vaverage = 0; - - if (x->up_available) - { - for (i = 0; i < 8; i++) - { - Uaverage += uabove_row[i]; - Vaverage += vabove_row[i]; - } - } - - if (x->left_available) - { - for (i = 0; i < 8; i++) - { - Uaverage += uleft_col[i]; - Vaverage += vleft_col[i]; - } - } - - if (!x->up_available && !x->left_available) - { - expected_udc = 128; - expected_vdc = 128; - } - else - { - shift = 2 + x->up_available + x->left_available; - expected_udc = (Uaverage + (1 << (shift - 1))) >> shift; - expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift; - } - - - /*memset(upred_ptr,expected_udc,64);*/ - /*memset(vpred_ptr,expected_vdc,64);*/ - for (i = 0; i < 8; i++) - { - memset(upred_ptr, expected_udc, 8); - memset(vpred_ptr, expected_vdc, 8); - upred_ptr += pred_stride; - vpred_ptr += pred_stride; - } - } - break; - case V_PRED: + if (uvmode == DC_PRED) { - for (i = 0; i < 8; i++) - { - memcpy(upred_ptr, uabove_row, 8); - memcpy(vpred_ptr, vabove_row, 8); - upred_ptr += pred_stride; - vpred_ptr += pred_stride; - } - + fn = dc_pred[x->left_available][x->up_available][SIZE_8]; } - break; - case H_PRED: + else { - for (i = 0; i < 8; i++) - { - memset(upred_ptr, uleft_col[i], 8); - memset(vpred_ptr, vleft_col[i], 8); - upred_ptr += pred_stride; - vpred_ptr += pred_stride; - } + fn = pred[uvmode][SIZE_8]; } - break; - case TM_PRED: - { - for (i = 0; i < 8; i++) - { - for (j = 0; j < 8; j++) - { - int predu = uleft_col[i] + uabove_row[j] - utop_left; - int predv = vleft_col[i] + vabove_row[j] - vtop_left; - - if (predu < 0) - predu = 0; - - if (predu > 255) - predu = 255; - - if (predv < 0) - predv = 0; - - if (predv > 255) - predv = 255; - - upred_ptr[j] = predu; - vpred_ptr[j] = predv; - } - - upred_ptr += pred_stride; - vpred_ptr += pred_stride; - } + fn(upred_ptr, pred_stride, uabove_row, uleft_col); + fn(vpred_ptr, pred_stride, vabove_row, vleft_col); +} - } - break; - case B_PRED: - case NEARESTMV: - case NEARMV: - case ZEROMV: - case NEWMV: - case SPLITMV: - case MB_MODE_COUNT: - break; - } +void vp8_init_intra_predictors(void) +{ + once(vp8_init_intra_predictors_internal); } diff --git a/libvpx/vp8/common/reconintra.h b/libvpx/vp8/common/reconintra.h new file mode 100644 index 00000000..b6225a66 --- /dev/null +++ b/libvpx/vp8/common/reconintra.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_RECONINTRA_H_ +#define VP8_COMMON_RECONINTRA_H_ + +#include "vp8/common/blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, + unsigned char *yabove_row, + unsigned char *yleft, + int left_stride, + unsigned char *ypred_ptr, + int y_stride); + +void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x, + unsigned char * uabove_row, + unsigned char * vabove_row, + unsigned char * uleft, + unsigned char * vleft, + int left_stride, + unsigned char * upred_ptr, + unsigned char * vpred_ptr, + int pred_stride); + +void vp8_init_intra_predictors(void); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_RECONINTRA_H_ diff --git a/libvpx/vp8/common/reconintra4x4.c b/libvpx/vp8/common/reconintra4x4.c index 3d4f2c40..35ad891e 100644 --- a/libvpx/vp8/common/reconintra4x4.c +++ b/libvpx/vp8/common/reconintra4x4.c @@ -8,290 +8,47 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <string.h> #include "vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vp8_rtcd.h" #include "blockd.h" -void vp8_intra4x4_predict_c(unsigned char *Above, - unsigned char *yleft, int left_stride, - int _b_mode, - unsigned char *dst, int dst_stride, - unsigned char top_left) +typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left); + +static intra_pred_fn pred[10]; + +void vp8_init_intra4x4_predictors_internal(void) +{ + pred[B_DC_PRED] = vpx_dc_predictor_4x4; + pred[B_TM_PRED] = vpx_tm_predictor_4x4; + pred[B_VE_PRED] = vpx_ve_predictor_4x4; + pred[B_HE_PRED] = vpx_he_predictor_4x4; + pred[B_LD_PRED] = vpx_d45e_predictor_4x4; + pred[B_RD_PRED] = vpx_d135_predictor_4x4; + pred[B_VR_PRED] = vpx_d117_predictor_4x4; + pred[B_VL_PRED] = vpx_d63f_predictor_4x4; + pred[B_HD_PRED] = vpx_d153_predictor_4x4; + pred[B_HU_PRED] = vpx_d207_predictor_4x4; +} + +void vp8_intra4x4_predict(unsigned char *above, + unsigned char *yleft, int left_stride, + B_PREDICTION_MODE b_mode, + unsigned char *dst, int dst_stride, + unsigned char top_left) { - int i, r, c; - B_PREDICTION_MODE b_mode = (B_PREDICTION_MODE)_b_mode; unsigned char Left[4]; + unsigned char Aboveb[12], *Above = Aboveb + 4; + Left[0] = yleft[0]; Left[1] = yleft[left_stride]; Left[2] = yleft[2 * left_stride]; Left[3] = yleft[3 * left_stride]; + memcpy(Above, above, 8); + Above[-1] = top_left; - switch (b_mode) - { - case B_DC_PRED: - { - int expected_dc = 0; - - for (i = 0; i < 4; i++) - { - expected_dc += Above[i]; - expected_dc += Left[i]; - } - - expected_dc = (expected_dc + 4) >> 3; - - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - dst[c] = expected_dc; - } - - dst += dst_stride; - } - } - break; - case B_TM_PRED: - { - /* prediction similar to true_motion prediction */ - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - int pred = Above[c] - top_left + Left[r]; - - if (pred < 0) - pred = 0; - - if (pred > 255) - pred = 255; - - dst[c] = pred; - } - - dst += dst_stride; - } - } - break; - - case B_VE_PRED: - { - - unsigned int ap[4]; - ap[0] = (top_left + 2 * Above[0] + Above[1] + 2) >> 2; - ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2; - ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2; - ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2; - - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - - dst[c] = ap[c]; - } - - dst += dst_stride; - } - - } - break; - - - case B_HE_PRED: - { - - unsigned int lp[4]; - lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2; - lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2; - lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2; - lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2; - - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - dst[c] = lp[r]; - } - - dst += dst_stride; - } - } - break; - case B_LD_PRED: - { - unsigned char *ptr = Above; - dst[0 * dst_stride + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2; - dst[0 * dst_stride + 1] = - dst[1 * dst_stride + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2; - dst[0 * dst_stride + 2] = - dst[1 * dst_stride + 1] = - dst[2 * dst_stride + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2; - dst[0 * dst_stride + 3] = - dst[1 * dst_stride + 2] = - dst[2 * dst_stride + 1] = - dst[3 * dst_stride + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2; - dst[1 * dst_stride + 3] = - dst[2 * dst_stride + 2] = - dst[3 * dst_stride + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2; - dst[2 * dst_stride + 3] = - dst[3 * dst_stride + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2; - dst[3 * dst_stride + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2; - - } - break; - case B_RD_PRED: - { - - unsigned char pp[9]; - - pp[0] = Left[3]; - pp[1] = Left[2]; - pp[2] = Left[1]; - pp[3] = Left[0]; - pp[4] = top_left; - pp[5] = Above[0]; - pp[6] = Above[1]; - pp[7] = Above[2]; - pp[8] = Above[3]; - - dst[3 * dst_stride + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - dst[3 * dst_stride + 1] = - dst[2 * dst_stride + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - dst[3 * dst_stride + 2] = - dst[2 * dst_stride + 1] = - dst[1 * dst_stride + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - dst[3 * dst_stride + 3] = - dst[2 * dst_stride + 2] = - dst[1 * dst_stride + 1] = - dst[0 * dst_stride + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - dst[2 * dst_stride + 3] = - dst[1 * dst_stride + 2] = - dst[0 * dst_stride + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - dst[1 * dst_stride + 3] = - dst[0 * dst_stride + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - dst[0 * dst_stride + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; - - } - break; - case B_VR_PRED: - { - - unsigned char pp[9]; - - pp[0] = Left[3]; - pp[1] = Left[2]; - pp[2] = Left[1]; - pp[3] = Left[0]; - pp[4] = top_left; - pp[5] = Above[0]; - pp[6] = Above[1]; - pp[7] = Above[2]; - pp[8] = Above[3]; - - - dst[3 * dst_stride + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - dst[2 * dst_stride + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - dst[3 * dst_stride + 1] = - dst[1 * dst_stride + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - dst[2 * dst_stride + 1] = - dst[0 * dst_stride + 0] = (pp[4] + pp[5] + 1) >> 1; - dst[3 * dst_stride + 2] = - dst[1 * dst_stride + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - dst[2 * dst_stride + 2] = - dst[0 * dst_stride + 1] = (pp[5] + pp[6] + 1) >> 1; - dst[3 * dst_stride + 3] = - dst[1 * dst_stride + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - dst[2 * dst_stride + 3] = - dst[0 * dst_stride + 2] = (pp[6] + pp[7] + 1) >> 1; - dst[1 * dst_stride + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; - dst[0 * dst_stride + 3] = (pp[7] + pp[8] + 1) >> 1; - - } - break; - case B_VL_PRED: - { - - unsigned char *pp = Above; - - dst[0 * dst_stride + 0] = (pp[0] + pp[1] + 1) >> 1; - dst[1 * dst_stride + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - dst[2 * dst_stride + 0] = - dst[0 * dst_stride + 1] = (pp[1] + pp[2] + 1) >> 1; - dst[1 * dst_stride + 1] = - dst[3 * dst_stride + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - dst[2 * dst_stride + 1] = - dst[0 * dst_stride + 2] = (pp[2] + pp[3] + 1) >> 1; - dst[3 * dst_stride + 1] = - dst[1 * dst_stride + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - dst[0 * dst_stride + 3] = - dst[2 * dst_stride + 2] = (pp[3] + pp[4] + 1) >> 1; - dst[1 * dst_stride + 3] = - dst[3 * dst_stride + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - dst[2 * dst_stride + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - dst[3 * dst_stride + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - } - break; - - case B_HD_PRED: - { - unsigned char pp[9]; - pp[0] = Left[3]; - pp[1] = Left[2]; - pp[2] = Left[1]; - pp[3] = Left[0]; - pp[4] = top_left; - pp[5] = Above[0]; - pp[6] = Above[1]; - pp[7] = Above[2]; - pp[8] = Above[3]; - - - dst[3 * dst_stride + 0] = (pp[0] + pp[1] + 1) >> 1; - dst[3 * dst_stride + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - dst[2 * dst_stride + 0] = - dst[3 * dst_stride + 2] = (pp[1] + pp[2] + 1) >> 1; - dst[2 * dst_stride + 1] = - dst[3 * dst_stride + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - dst[2 * dst_stride + 2] = - dst[1 * dst_stride + 0] = (pp[2] + pp[3] + 1) >> 1; - dst[2 * dst_stride + 3] = - dst[1 * dst_stride + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - dst[1 * dst_stride + 2] = - dst[0 * dst_stride + 0] = (pp[3] + pp[4] + 1) >> 1; - dst[1 * dst_stride + 3] = - dst[0 * dst_stride + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - dst[0 * dst_stride + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - dst[0 * dst_stride + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - } - break; - - - case B_HU_PRED: - { - unsigned char *pp = Left; - dst[0 * dst_stride + 0] = (pp[0] + pp[1] + 1) >> 1; - dst[0 * dst_stride + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - dst[0 * dst_stride + 2] = - dst[1 * dst_stride + 0] = (pp[1] + pp[2] + 1) >> 1; - dst[0 * dst_stride + 3] = - dst[1 * dst_stride + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - dst[1 * dst_stride + 2] = - dst[2 * dst_stride + 0] = (pp[2] + pp[3] + 1) >> 1; - dst[1 * dst_stride + 3] = - dst[2 * dst_stride + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2; - dst[2 * dst_stride + 2] = - dst[2 * dst_stride + 3] = - dst[3 * dst_stride + 0] = - dst[3 * dst_stride + 1] = - dst[3 * dst_stride + 2] = - dst[3 * dst_stride + 3] = pp[3]; - } - break; - - default: - break; - - } + pred[b_mode](dst, dst_stride, Above, Left); } diff --git a/libvpx/vp8/common/reconintra4x4.h b/libvpx/vp8/common/reconintra4x4.h index ed59c9ed..869841ee 100644 --- a/libvpx/vp8/common/reconintra4x4.h +++ b/libvpx/vp8/common/reconintra4x4.h @@ -18,7 +18,7 @@ extern "C" { #endif static void intra_prediction_down_copy(MACROBLOCKD *xd, - unsigned char *above_right_src) + unsigned char *above_right_src) { int dst_stride = xd->dst.y_stride; unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16; @@ -33,6 +33,14 @@ static void intra_prediction_down_copy(MACROBLOCKD *xd, *dst_ptr2 = *src_ptr; } +void vp8_intra4x4_predict(unsigned char *Above, + unsigned char *yleft, int left_stride, + B_PREDICTION_MODE b_mode, + unsigned char *dst, int dst_stride, + unsigned char top_left); + +void vp8_init_intra4x4_predictors_internal(void); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp8/common/rtcd_defs.pl b/libvpx/vp8/common/rtcd_defs.pl index 7924ae75..6799c278 100644 --- a/libvpx/vp8/common/rtcd_defs.pl +++ b/libvpx/vp8/common/rtcd_defs.pl @@ -152,16 +152,6 @@ specialize qw/vp8_copy_mem8x4 mmx media neon dspr2 msa/; $vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6; $vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2; -add_proto qw/void vp8_build_intra_predictors_mby_s/, "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride"; -specialize qw/vp8_build_intra_predictors_mby_s sse2 ssse3 neon msa/; - -add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"; -specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3 neon msa/; - -add_proto qw/void vp8_intra4x4_predict/, "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"; -specialize qw/vp8_intra4x4_predict media/; -$vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6; - # # Postproc # diff --git a/libvpx/vp8/common/setupintrarecon.h b/libvpx/vp8/common/setupintrarecon.h index 608f4a9a..1857c4e2 100644 --- a/libvpx/vp8/common/setupintrarecon.h +++ b/libvpx/vp8/common/setupintrarecon.h @@ -11,6 +11,7 @@ #ifndef VP8_COMMON_SETUPINTRARECON_H_ #define VP8_COMMON_SETUPINTRARECON_H_ +#include "./vpx_config.h" #include "vpx_scale/yv12config.h" #ifdef __cplusplus @@ -19,12 +20,11 @@ extern "C" { extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf); -static -void setup_intra_recon_left(unsigned char *y_buffer, - unsigned char *u_buffer, - unsigned char *v_buffer, - int y_stride, - int uv_stride) +static INLINE void setup_intra_recon_left(unsigned char *y_buffer, + unsigned char *u_buffer, + unsigned char *v_buffer, + int y_stride, + int uv_stride) { int i; diff --git a/libvpx/vp8/common/x86/recon_sse2.asm b/libvpx/vp8/common/x86/recon_sse2.asm index 7141f832..cb89537f 100644 --- a/libvpx/vp8/common/x86/recon_sse2.asm +++ b/libvpx/vp8/common/x86/recon_sse2.asm @@ -114,1002 +114,3 @@ sym(vp8_copy_mem16x16_sse2): UNSHADOW_ARGS pop rbp ret - - -;void vp8_intra_pred_uv_dc_mmx2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride, -; ) -global sym(vp8_intra_pred_uv_dc_mmx2) PRIVATE -sym(vp8_intra_pred_uv_dc_mmx2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - ; from top - mov rdi, arg(2) ;above; - mov rsi, arg(3) ;left; - movsxd rax, dword ptr arg(4) ;left_stride; - pxor mm0, mm0 - movq mm1, [rdi] - lea rdi, [rax*3] - psadbw mm1, mm0 - ; from left - movzx ecx, byte [rsi] - movzx edx, byte [rsi+rax*1] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - - movzx edx, byte [rsi+rdi] - lea rsi, [rsi+rax*4] - add ecx, edx - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - - ; add up - pextrw edx, mm1, 0x0 - lea edx, [edx+ecx+8] - sar edx, 4 - movd mm1, edx - movsxd rcx, dword ptr arg(1) ;dst_stride - pshufw mm1, mm1, 0x0 - mov rdi, arg(0) ;dst; - packuswb mm1, mm1 - - ; write out - lea rax, [rcx*3] - lea rdx, [rdi+rcx*4] - - movq [rdi ], mm1 - movq [rdi+rcx ], mm1 - movq [rdi+rcx*2], mm1 - movq [rdi+rax ], mm1 - movq [rdx ], mm1 - movq [rdx+rcx ], mm1 - movq [rdx+rcx*2], mm1 - movq [rdx+rax ], mm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_uv_dctop_mmx2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride, -; ) -global sym(vp8_intra_pred_uv_dctop_mmx2) PRIVATE -sym(vp8_intra_pred_uv_dctop_mmx2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;arg(3), arg(4) not used - - ; from top - mov rsi, arg(2) ;above; - pxor mm0, mm0 - movq mm1, [rsi] - psadbw mm1, mm0 - - ; add up - paddw mm1, [GLOBAL(dc_4)] - psraw mm1, 3 - pshufw mm1, mm1, 0x0 - packuswb mm1, mm1 - - ; write out - mov rdi, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride - lea rax, [rcx*3] - - movq [rdi ], mm1 - movq [rdi+rcx ], mm1 - movq [rdi+rcx*2], mm1 - movq [rdi+rax ], mm1 - lea rdi, [rdi+rcx*4] - movq [rdi ], mm1 - movq [rdi+rcx ], mm1 - movq [rdi+rcx*2], mm1 - movq [rdi+rax ], mm1 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_uv_dcleft_mmx2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride, -; ) -global sym(vp8_intra_pred_uv_dcleft_mmx2) PRIVATE -sym(vp8_intra_pred_uv_dcleft_mmx2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - ;arg(2) not used - - ; from left - mov rsi, arg(3) ;left; - movsxd rax, dword ptr arg(4) ;left_stride; - lea rdi, [rax*3] - movzx ecx, byte [rsi] - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - lea edx, [ecx+edx+4] - - ; add up - shr edx, 3 - movd mm1, edx - pshufw mm1, mm1, 0x0 - packuswb mm1, mm1 - - ; write out - mov rdi, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride - lea rax, [rcx*3] - - movq [rdi ], mm1 - movq [rdi+rcx ], mm1 - movq [rdi+rcx*2], mm1 - movq [rdi+rax ], mm1 - lea rdi, [rdi+rcx*4] - movq [rdi ], mm1 - movq [rdi+rcx ], mm1 - movq [rdi+rcx*2], mm1 - movq [rdi+rax ], mm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_uv_dc128_mmx( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride, -; ) -global sym(vp8_intra_pred_uv_dc128_mmx) PRIVATE -sym(vp8_intra_pred_uv_dc128_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - ; end prolog - - ;arg(2), arg(3), arg(4) not used - - ; write out - movq mm1, [GLOBAL(dc_128)] - mov rax, arg(0) ;dst; - movsxd rdx, dword ptr arg(1) ;dst_stride - lea rcx, [rdx*3] - - movq [rax ], mm1 - movq [rax+rdx ], mm1 - movq [rax+rdx*2], mm1 - movq [rax+rcx ], mm1 - lea rax, [rax+rdx*4] - movq [rax ], mm1 - movq [rax+rdx ], mm1 - movq [rax+rdx*2], mm1 - movq [rax+rcx ], mm1 - - ; begin epilog - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_uv_tm_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride, -; ) -%macro vp8_intra_pred_uv_tm 1 -global sym(vp8_intra_pred_uv_tm_%1) PRIVATE -sym(vp8_intra_pred_uv_tm_%1): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rsi - push rdi - push rbx - ; end prolog - - ; read top row - mov edx, 4 - mov rsi, arg(2) ;above - movsxd rax, dword ptr arg(4) ;left_stride; - pxor xmm0, xmm0 -%ifidn %1, ssse3 - movdqa xmm2, [GLOBAL(dc_1024)] -%endif - movq xmm1, [rsi] - punpcklbw xmm1, xmm0 - - ; set up left ptrs ans subtract topleft - movd xmm3, [rsi-1] - mov rsi, arg(3) ;left; -%ifidn %1, sse2 - punpcklbw xmm3, xmm0 - pshuflw xmm3, xmm3, 0x0 - punpcklqdq xmm3, xmm3 -%else - pshufb xmm3, xmm2 -%endif - psubw xmm1, xmm3 - - ; set up dest ptrs - mov rdi, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride - -.vp8_intra_pred_uv_tm_%1_loop: - mov bl, [rsi] - movd xmm3, ebx - - mov bl, [rsi+rax] - movd xmm5, ebx -%ifidn %1, sse2 - punpcklbw xmm3, xmm0 - punpcklbw xmm5, xmm0 - pshuflw xmm3, xmm3, 0x0 - pshuflw xmm5, xmm5, 0x0 - punpcklqdq xmm3, xmm3 - punpcklqdq xmm5, xmm5 -%else - pshufb xmm3, xmm2 - pshufb xmm5, xmm2 -%endif - paddw xmm3, xmm1 - paddw xmm5, xmm1 - packuswb xmm3, xmm5 - movq [rdi ], xmm3 - movhps[rdi+rcx], xmm3 - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rcx*2] - dec edx - jnz .vp8_intra_pred_uv_tm_%1_loop - - ; begin epilog - pop rbx - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret -%endmacro - -vp8_intra_pred_uv_tm sse2 -vp8_intra_pred_uv_tm ssse3 - -;void vp8_intra_pred_uv_ve_mmx( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride, -; ) -global sym(vp8_intra_pred_uv_ve_mmx) PRIVATE -sym(vp8_intra_pred_uv_ve_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - ; end prolog - - ; arg(3), arg(4) not used - - ; read from top - mov rax, arg(2) ;src; - - movq mm1, [rax] - - ; write out - mov rax, arg(0) ;dst; - movsxd rdx, dword ptr arg(1) ;dst_stride - lea rcx, [rdx*3] - - movq [rax ], mm1 - movq [rax+rdx ], mm1 - movq [rax+rdx*2], mm1 - movq [rax+rcx ], mm1 - lea rax, [rax+rdx*4] - movq [rax ], mm1 - movq [rax+rdx ], mm1 - movq [rax+rdx*2], mm1 - movq [rax+rcx ], mm1 - - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_uv_ho_mmx2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -%macro vp8_intra_pred_uv_ho 1 -global sym(vp8_intra_pred_uv_ho_%1) PRIVATE -sym(vp8_intra_pred_uv_ho_%1): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx -%ifidn %1, ssse3 - GET_GOT rbx -%endif - ; end prolog - - ;arg(2) not used - - ; read from left and write out -%ifidn %1, mmx2 - mov edx, 4 -%endif - mov rsi, arg(3) ;left - movsxd rax, dword ptr arg(4) ;left_stride; - mov rdi, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride -%ifidn %1, ssse3 - lea rdx, [rcx*3] - movdqa xmm2, [GLOBAL(dc_00001111)] -%endif - -%ifidn %1, mmx2 -.vp8_intra_pred_uv_ho_%1_loop: - mov bl, [rsi] - movd mm0, ebx - - mov bl, [rsi+rax] - movd mm1, ebx - - punpcklbw mm0, mm0 - punpcklbw mm1, mm1 - pshufw mm0, mm0, 0x0 - pshufw mm1, mm1, 0x0 - movq [rdi ], mm0 - movq [rdi+rcx], mm1 - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rcx*2] - dec edx - jnz .vp8_intra_pred_uv_ho_%1_loop -%else - mov bl, [rsi] - movd xmm0, ebx - - mov bl, [rsi+rax] - movd xmm3, ebx - - mov bl, [rsi+rax*2] - movd xmm1, ebx - - lea rbx, [rax*3] - mov bl, [rsi+rbx] - movd xmm4, ebx - - punpcklbw xmm0, xmm3 - punpcklbw xmm1, xmm4 - pshufb xmm0, xmm2 - pshufb xmm1, xmm2 - movq [rdi ], xmm0 - movhps [rdi+rcx], xmm0 - movq [rdi+rcx*2], xmm1 - movhps [rdi+rdx], xmm1 - lea rsi, [rsi+rax*4] - lea rdi, [rdi+rcx*4] - - mov bl, [rsi] - movd xmm0, ebx - - mov bl, [rsi+rax] - movd xmm3, ebx - - mov bl, [rsi+rax*2] - movd xmm1, ebx - - lea rbx, [rax*3] - mov bl, [rsi+rbx] - movd xmm4, ebx - - punpcklbw xmm0, xmm3 - punpcklbw xmm1, xmm4 - pshufb xmm0, xmm2 - pshufb xmm1, xmm2 - movq [rdi ], xmm0 - movhps [rdi+rcx], xmm0 - movq [rdi+rcx*2], xmm1 - movhps [rdi+rdx], xmm1 -%endif - - ; begin epilog -%ifidn %1, ssse3 - RESTORE_GOT -%endif - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret -%endmacro - -vp8_intra_pred_uv_ho mmx2 -vp8_intra_pred_uv_ho ssse3 - -;void vp8_intra_pred_y_dc_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -global sym(vp8_intra_pred_y_dc_sse2) PRIVATE -sym(vp8_intra_pred_y_dc_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - ; from top - mov rdi, arg(2) ;above - mov rsi, arg(3) ;left - movsxd rax, dword ptr arg(4) ;left_stride; - - pxor xmm0, xmm0 - movdqa xmm1, [rdi] - psadbw xmm1, xmm0 - movq xmm2, xmm1 - punpckhqdq xmm1, xmm1 - paddw xmm1, xmm2 - - ; from left - lea rdi, [rax*3] - - movzx ecx, byte [rsi] - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - - ; add up - pextrw edx, xmm1, 0x0 - lea edx, [edx+ecx+16] - sar edx, 5 - movd xmm1, edx - ; FIXME use pshufb for ssse3 version - pshuflw xmm1, xmm1, 0x0 - punpcklqdq xmm1, xmm1 - packuswb xmm1, xmm1 - - ; write out - mov rsi, 2 - mov rdi, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride - lea rax, [rcx*3] - -.label - movdqa [rdi ], xmm1 - movdqa [rdi+rcx ], xmm1 - movdqa [rdi+rcx*2], xmm1 - movdqa [rdi+rax ], xmm1 - lea rdi, [rdi+rcx*4] - movdqa [rdi ], xmm1 - movdqa [rdi+rcx ], xmm1 - movdqa [rdi+rcx*2], xmm1 - movdqa [rdi+rax ], xmm1 - lea rdi, [rdi+rcx*4] - dec rsi - jnz .label - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_y_dctop_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -global sym(vp8_intra_pred_y_dctop_sse2) PRIVATE -sym(vp8_intra_pred_y_dctop_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - GET_GOT rbx - ; end prolog - - ;arg(3), arg(4) not used - - ; from top - mov rcx, arg(2) ;above; - pxor xmm0, xmm0 - movdqa xmm1, [rcx] - psadbw xmm1, xmm0 - movdqa xmm2, xmm1 - punpckhqdq xmm1, xmm1 - paddw xmm1, xmm2 - - ; add up - paddw xmm1, [GLOBAL(dc_8)] - psraw xmm1, 4 - ; FIXME use pshufb for ssse3 version - pshuflw xmm1, xmm1, 0x0 - punpcklqdq xmm1, xmm1 - packuswb xmm1, xmm1 - - ; write out - mov rsi, 2 - mov rdx, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride - lea rax, [rcx*3] - -.label - movdqa [rdx ], xmm1 - movdqa [rdx+rcx ], xmm1 - movdqa [rdx+rcx*2], xmm1 - movdqa [rdx+rax ], xmm1 - lea rdx, [rdx+rcx*4] - movdqa [rdx ], xmm1 - movdqa [rdx+rcx ], xmm1 - movdqa [rdx+rcx*2], xmm1 - movdqa [rdx+rax ], xmm1 - lea rdx, [rdx+rcx*4] - dec rsi - jnz .label - - ; begin epilog - RESTORE_GOT - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_y_dcleft_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -global sym(vp8_intra_pred_y_dcleft_sse2) PRIVATE -sym(vp8_intra_pred_y_dcleft_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - ;arg(2) not used - - ; from left - mov rsi, arg(3) ;left; - movsxd rax, dword ptr arg(4) ;left_stride; - - lea rdi, [rax*3] - movzx ecx, byte [rsi] - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - lea edx, [ecx+edx+8] - - ; add up - shr edx, 4 - movd xmm1, edx - ; FIXME use pshufb for ssse3 version - pshuflw xmm1, xmm1, 0x0 - punpcklqdq xmm1, xmm1 - packuswb xmm1, xmm1 - - ; write out - mov rsi, 2 - mov rdi, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride - lea rax, [rcx*3] - -.label - movdqa [rdi ], xmm1 - movdqa [rdi+rcx ], xmm1 - movdqa [rdi+rcx*2], xmm1 - movdqa [rdi+rax ], xmm1 - lea rdi, [rdi+rcx*4] - movdqa [rdi ], xmm1 - movdqa [rdi+rcx ], xmm1 - movdqa [rdi+rcx*2], xmm1 - movdqa [rdi+rax ], xmm1 - lea rdi, [rdi+rcx*4] - dec rsi - jnz .label - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_y_dc128_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -global sym(vp8_intra_pred_y_dc128_sse2) PRIVATE -sym(vp8_intra_pred_y_dc128_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - GET_GOT rbx - ; end prolog - - ;arg(2), arg(3), arg(4) not used - - ; write out - mov rsi, 2 - movdqa xmm1, [GLOBAL(dc_128)] - mov rax, arg(0) ;dst; - movsxd rdx, dword ptr arg(1) ;dst_stride - lea rcx, [rdx*3] - -.label - movdqa [rax ], xmm1 - movdqa [rax+rdx ], xmm1 - movdqa [rax+rdx*2], xmm1 - movdqa [rax+rcx ], xmm1 - lea rax, [rax+rdx*4] - movdqa [rax ], xmm1 - movdqa [rax+rdx ], xmm1 - movdqa [rax+rdx*2], xmm1 - movdqa [rax+rcx ], xmm1 - lea rax, [rax+rdx*4] - dec rsi - jnz .label - - ; begin epilog - RESTORE_GOT - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_y_tm_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -%macro vp8_intra_pred_y_tm 1 -global sym(vp8_intra_pred_y_tm_%1) PRIVATE -sym(vp8_intra_pred_y_tm_%1): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - push rbx - GET_GOT rbx - ; end prolog - - ; read top row - mov edx, 8 - mov rsi, arg(2) ;above - movsxd rax, dword ptr arg(4) ;left_stride; - pxor xmm0, xmm0 -%ifidn %1, ssse3 - movdqa xmm3, [GLOBAL(dc_1024)] -%endif - movdqa xmm1, [rsi] - movdqa xmm2, xmm1 - punpcklbw xmm1, xmm0 - punpckhbw xmm2, xmm0 - - ; set up left ptrs ans subtract topleft - movd xmm4, [rsi-1] - mov rsi, arg(3) ;left -%ifidn %1, sse2 - punpcklbw xmm4, xmm0 - pshuflw xmm4, xmm4, 0x0 - punpcklqdq xmm4, xmm4 -%else - pshufb xmm4, xmm3 -%endif - psubw xmm1, xmm4 - psubw xmm2, xmm4 - - ; set up dest ptrs - mov rdi, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride -vp8_intra_pred_y_tm_%1_loop: - mov bl, [rsi] - movd xmm4, ebx - - mov bl, [rsi+rax] - movd xmm5, ebx -%ifidn %1, sse2 - punpcklbw xmm4, xmm0 - punpcklbw xmm5, xmm0 - pshuflw xmm4, xmm4, 0x0 - pshuflw xmm5, xmm5, 0x0 - punpcklqdq xmm4, xmm4 - punpcklqdq xmm5, xmm5 -%else - pshufb xmm4, xmm3 - pshufb xmm5, xmm3 -%endif - movdqa xmm6, xmm4 - movdqa xmm7, xmm5 - paddw xmm4, xmm1 - paddw xmm6, xmm2 - paddw xmm5, xmm1 - paddw xmm7, xmm2 - packuswb xmm4, xmm6 - packuswb xmm5, xmm7 - movdqa [rdi ], xmm4 - movdqa [rdi+rcx], xmm5 - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rcx*2] - dec edx - jnz vp8_intra_pred_y_tm_%1_loop - - ; begin epilog - RESTORE_GOT - pop rbx - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -%endmacro - -vp8_intra_pred_y_tm sse2 -vp8_intra_pred_y_tm ssse3 - -;void vp8_intra_pred_y_ve_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -global sym(vp8_intra_pred_y_ve_sse2) PRIVATE -sym(vp8_intra_pred_y_ve_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - ; end prolog - - ;arg(3), arg(4) not used - - mov rax, arg(2) ;above; - mov rsi, 2 - movsxd rdx, dword ptr arg(1) ;dst_stride - - ; read from top - movdqa xmm1, [rax] - - ; write out - mov rax, arg(0) ;dst; - lea rcx, [rdx*3] - -.label - movdqa [rax ], xmm1 - movdqa [rax+rdx ], xmm1 - movdqa [rax+rdx*2], xmm1 - movdqa [rax+rcx ], xmm1 - lea rax, [rax+rdx*4] - movdqa [rax ], xmm1 - movdqa [rax+rdx ], xmm1 - movdqa [rax+rdx*2], xmm1 - movdqa [rax+rcx ], xmm1 - lea rax, [rax+rdx*4] - dec rsi - jnz .label - - ; begin epilog - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_y_ho_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride, -; ) -global sym(vp8_intra_pred_y_ho_sse2) PRIVATE -sym(vp8_intra_pred_y_ho_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx - ; end prolog - - ;arg(2) not used - - ; read from left and write out - mov edx, 8 - mov rsi, arg(3) ;left; - movsxd rax, dword ptr arg(4) ;left_stride; - mov rdi, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride - -vp8_intra_pred_y_ho_sse2_loop: - mov bl, [rsi] - movd xmm0, ebx - mov bl, [rsi+rax] - movd xmm1, ebx - - ; FIXME use pshufb for ssse3 version - punpcklbw xmm0, xmm0 - punpcklbw xmm1, xmm1 - pshuflw xmm0, xmm0, 0x0 - pshuflw xmm1, xmm1, 0x0 - punpcklqdq xmm0, xmm0 - punpcklqdq xmm1, xmm1 - movdqa [rdi ], xmm0 - movdqa [rdi+rcx], xmm1 - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rcx*2] - dec edx - jnz vp8_intra_pred_y_ho_sse2_loop - - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -dc_128: - times 16 db 128 -dc_4: - times 4 dw 4 -align 16 -dc_8: - times 8 dw 8 -align 16 -dc_1024: - times 8 dw 0x400 -align 16 -dc_00001111: - times 8 db 0 - times 8 db 1 diff --git a/libvpx/vp8/common/x86/recon_wrapper_sse2.c b/libvpx/vp8/common/x86/recon_wrapper_sse2.c deleted file mode 100644 index 65f4251a..00000000 --- a/libvpx/vp8/common/x86/recon_wrapper_sse2.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/blockd.h" - -#define build_intra_predictors_mbuv_prototype(sym) \ - void sym(unsigned char *dst, int dst_stride, \ - const unsigned char *above, \ - const unsigned char *left, int left_stride) -typedef build_intra_predictors_mbuv_prototype((*build_intra_predictors_mbuv_fn_t)); - -extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc_mmx2); -extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dctop_mmx2); -extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dcleft_mmx2); -extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc128_mmx); -extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_mmx2); -extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_ssse3); -extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ve_mmx); -extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_sse2); -extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_ssse3); - -static void vp8_build_intra_predictors_mbuv_x86(MACROBLOCKD *x, - unsigned char * uabove_row, - unsigned char * vabove_row, - unsigned char *dst_u, - unsigned char *dst_v, - int dst_stride, - unsigned char * uleft, - unsigned char * vleft, - int left_stride, - build_intra_predictors_mbuv_fn_t tm_func, - build_intra_predictors_mbuv_fn_t ho_func) -{ - int mode = x->mode_info_context->mbmi.uv_mode; - build_intra_predictors_mbuv_fn_t fn; - - switch (mode) { - case V_PRED: fn = vp8_intra_pred_uv_ve_mmx; break; - case H_PRED: fn = ho_func; break; - case TM_PRED: fn = tm_func; break; - case DC_PRED: - if (x->up_available) { - if (x->left_available) { - fn = vp8_intra_pred_uv_dc_mmx2; break; - } else { - fn = vp8_intra_pred_uv_dctop_mmx2; break; - } - } else if (x->left_available) { - fn = vp8_intra_pred_uv_dcleft_mmx2; break; - } else { - fn = vp8_intra_pred_uv_dc128_mmx; break; - } - break; - default: return; - } - - fn(dst_u, dst_stride, uabove_row, uleft, left_stride); - fn(dst_v, dst_stride, vabove_row, vleft, left_stride); -} - -void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x, - unsigned char * uabove_row, - unsigned char * vabove_row, - unsigned char * uleft, - unsigned char * vleft, - int left_stride, - unsigned char * upred_ptr, - unsigned char * vpred_ptr, - int pred_stride) -{ - vp8_build_intra_predictors_mbuv_x86(x, - uabove_row, vabove_row, - upred_ptr, - vpred_ptr, pred_stride, - uleft, - vleft, - left_stride, - vp8_intra_pred_uv_tm_sse2, - vp8_intra_pred_uv_ho_mmx2); -} - -void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x, - unsigned char * uabove_row, - unsigned char * vabove_row, - unsigned char * uleft, - unsigned char * vleft, - int left_stride, - unsigned char * upred_ptr, - unsigned char * vpred_ptr, - int pred_stride) -{ - vp8_build_intra_predictors_mbuv_x86(x, - uabove_row, vabove_row, - upred_ptr, - vpred_ptr, pred_stride, - uleft, - vleft, - left_stride, - vp8_intra_pred_uv_tm_ssse3, - vp8_intra_pred_uv_ho_ssse3); -} - -#define build_intra_predictors_mby_prototype(sym) \ - void sym(unsigned char *dst, int dst_stride, \ - const unsigned char *above, \ - const unsigned char *left, int left_stride) -typedef build_intra_predictors_mby_prototype((*build_intra_predictors_mby_fn_t)); - -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dc_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dctop_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dcleft_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dc128_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_ho_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_ve_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_tm_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_tm_ssse3); - -static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char *dst_y, - int dst_stride, - unsigned char * yleft, - int left_stride, - build_intra_predictors_mby_fn_t tm_func) -{ - int mode = x->mode_info_context->mbmi.mode; - build_intra_predictors_mbuv_fn_t fn; - - switch (mode) { - case V_PRED: fn = vp8_intra_pred_y_ve_sse2; break; - case H_PRED: fn = vp8_intra_pred_y_ho_sse2; break; - case TM_PRED: fn = tm_func; break; - case DC_PRED: - if (x->up_available) { - if (x->left_available) { - fn = vp8_intra_pred_y_dc_sse2; break; - } else { - fn = vp8_intra_pred_y_dctop_sse2; break; - } - } else if (x->left_available) { - fn = vp8_intra_pred_y_dcleft_sse2; break; - } else { - fn = vp8_intra_pred_y_dc128_sse2; break; - } - break; - default: return; - } - - fn(dst_y, dst_stride, yabove_row, yleft, left_stride); - return; -} - -void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char * yleft, - int left_stride, - unsigned char * ypred_ptr, - int y_stride) -{ - vp8_build_intra_predictors_mby_x86(x, yabove_row, ypred_ptr, - y_stride, yleft, left_stride, - vp8_intra_pred_y_tm_sse2); -} - -void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char * yleft, - int left_stride, - unsigned char * ypred_ptr, - int y_stride) -{ - vp8_build_intra_predictors_mby_x86(x, yabove_row, ypred_ptr, - y_stride, yleft, left_stride, - vp8_intra_pred_y_tm_ssse3); - -} diff --git a/libvpx/vp8/decoder/dboolhuff.c b/libvpx/vp8/decoder/dboolhuff.c index b874d4c4..8a7e3320 100644 --- a/libvpx/vp8/decoder/dboolhuff.c +++ b/libvpx/vp8/decoder/dboolhuff.c @@ -11,6 +11,7 @@ #include "dboolhuff.h" #include "vp8/common/common.h" +#include "vpx_dsp/vpx_dsp_common.h" int vp8dx_start_decode(BOOL_DECODER *br, const unsigned char *source, @@ -48,7 +49,7 @@ void vp8dx_bool_decoder_fill(BOOL_DECODER *br) unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1]; if (br->decrypt_cb) { - size_t n = MIN(sizeof(decrypted), bytes_left); + size_t n = VPXMIN(sizeof(decrypted), bytes_left); br->decrypt_cb(br->decrypt_state, bufptr, decrypted, (int)n); bufptr = decrypted; } diff --git a/libvpx/vp8/decoder/dboolhuff.h b/libvpx/vp8/decoder/dboolhuff.h index 51c5adc2..cc9eaaf4 100644 --- a/libvpx/vp8/decoder/dboolhuff.h +++ b/libvpx/vp8/decoder/dboolhuff.h @@ -15,7 +15,7 @@ #include <stddef.h> #include <limits.h> -#include "vpx_config.h" +#include "./vpx_config.h" #include "vpx_ports/mem.h" #include "vpx/vp8dx.h" #include "vpx/vpx_integer.h" @@ -95,7 +95,7 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { return bit; } -static int vp8_decode_value(BOOL_DECODER *br, int bits) +static INLINE int vp8_decode_value(BOOL_DECODER *br, int bits) { int z = 0; int bit; @@ -108,7 +108,7 @@ static int vp8_decode_value(BOOL_DECODER *br, int bits) return z; } -static int vp8dx_bool_error(BOOL_DECODER *br) +static INLINE int vp8dx_bool_error(BOOL_DECODER *br) { /* Check if we have reached the end of the buffer. * diff --git a/libvpx/vp8/decoder/decodeframe.c b/libvpx/vp8/decoder/decodeframe.c index 56e167db..f0d76037 100644 --- a/libvpx/vp8/decoder/decodeframe.c +++ b/libvpx/vp8/decoder/decodeframe.c @@ -23,6 +23,7 @@ #include "vp8/common/entropymode.h" #include "vp8/common/quant_common.h" #include "vpx_scale/vpx_scale.h" +#include "vp8/common/reconintra.h" #include "vp8/common/setupintrarecon.h" #include "decodemv.h" @@ -34,6 +35,7 @@ #include "vp8/common/threading.h" #include "decoderthreading.h" #include "dboolhuff.h" +#include "vpx_dsp/vpx_dsp_common.h" #include <assert.h> #include <stdio.h> @@ -1021,7 +1023,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) const unsigned char *clear = data; if (pbi->decrypt_cb) { - int n = (int)MIN(sizeof(clear_buffer), data_end - data); + int n = (int)VPXMIN(sizeof(clear_buffer), data_end - data); pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n); clear = clear_buffer; } diff --git a/libvpx/vp8/decoder/error_concealment.c b/libvpx/vp8/decoder/error_concealment.c index bb6d443c..0b846a08 100644 --- a/libvpx/vp8/decoder/error_concealment.c +++ b/libvpx/vp8/decoder/error_concealment.c @@ -16,6 +16,7 @@ #include "vpx_mem/vpx_mem.h" #include "vp8/common/findnearmv.h" #include "vp8/common/common.h" +#include "vpx_dsp/vpx_dsp_common.h" #define FLOOR(x,q) ((x) & -(1 << (q))) @@ -93,13 +94,13 @@ static void assign_overlap(OVERLAP_NODE* overlaps, */ static int block_overlap(int b1_row, int b1_col, int b2_row, int b2_col) { - const int int_top = MAX(b1_row, b2_row); // top - const int int_left = MAX(b1_col, b2_col); // left + const int int_top = VPXMAX(b1_row, b2_row); // top + const int int_left = VPXMAX(b1_col, b2_col); // left /* Since each block is 4x4 pixels, adding 4 (Q3) to the left/top edge * gives us the right/bottom edge. */ - const int int_right = MIN(b1_col + (4<<3), b2_col + (4<<3)); // right - const int int_bottom = MIN(b1_row + (4<<3), b2_row + (4<<3)); // bottom + const int int_right = VPXMIN(b1_col + (4<<3), b2_col + (4<<3)); // right + const int int_bottom = VPXMIN(b1_row + (4<<3), b2_row + (4<<3)); // bottom return (int_bottom - int_top) * (int_right - int_left); } @@ -124,7 +125,7 @@ static void calculate_overlaps_mb(B_OVERLAP *b_overlaps, union b_mode_info *bmi, /* If the block partly overlaps any previous MB, these coordinates * can be < 0. We don't want to access blocks in previous MBs. */ - const int blk_idx = MAX(rel_ol_blk_row,0) * 4 + MAX(rel_ol_blk_col,0); + const int blk_idx = VPXMAX(rel_ol_blk_row,0) * 4 + VPXMAX(rel_ol_blk_col,0); /* Upper left overlapping block */ B_OVERLAP *b_ol_ul = &(b_overlaps[blk_idx]); @@ -132,8 +133,8 @@ static void calculate_overlaps_mb(B_OVERLAP *b_overlaps, union b_mode_info *bmi, * which the motion compensated block overlaps */ /* Avoid calculating overlaps for blocks in later MBs */ - int end_row = MIN(4 + mb_row * 4 - first_blk_row, 2); - int end_col = MIN(4 + mb_col * 4 - first_blk_col, 2); + int end_row = VPXMIN(4 + mb_row * 4 - first_blk_row, 2); + int end_col = VPXMIN(4 + mb_col * 4 - first_blk_col, 2); int row, col; /* Check if new_row and new_col are evenly divisible by 4 (Q3), @@ -208,8 +209,8 @@ void vp8_calculate_overlaps(MB_OVERLAP *overlap_ul, overlap_mb_row = FLOOR((overlap_b_row << 3) / 4, 3) >> 3; overlap_mb_col = FLOOR((overlap_b_col << 3) / 4, 3) >> 3; - end_row = MIN(mb_rows - overlap_mb_row, 2); - end_col = MIN(mb_cols - overlap_mb_col, 2); + end_row = VPXMIN(mb_rows - overlap_mb_row, 2); + end_col = VPXMIN(mb_cols - overlap_mb_col, 2); /* Don't calculate overlap for MBs we don't overlap */ /* Check if the new block row starts at the last block row of the MB */ diff --git a/libvpx/vp8/decoder/onyxd_if.c b/libvpx/vp8/decoder/onyxd_if.c index 9015fcbb..3468268a 100644 --- a/libvpx/vp8/decoder/onyxd_if.c +++ b/libvpx/vp8/decoder/onyxd_if.c @@ -25,9 +25,12 @@ #include <assert.h> #include "vp8/common/quant_common.h" +#include "vp8/common/reconintra.h" +#include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx_scale/vpx_scale.h" #include "vp8/common/systemdependent.h" +#include "vpx_ports/vpx_once.h" #include "vpx_ports/vpx_timer.h" #include "detokenize.h" #if CONFIG_ERROR_CONCEALMENT @@ -42,6 +45,17 @@ extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi); static int get_free_fb (VP8_COMMON *cm); static void ref_cnt_fb (int *buf, int *idx, int new_idx); +static void initialize_dec(void) { + static volatile int init_done = 0; + + if (!init_done) + { + vpx_dsp_rtcd(); + vp8_init_intra_predictors(); + init_done = 1; + } +} + static void remove_decompressor(VP8D_COMP *pbi) { #if CONFIG_ERROR_CONCEALMENT @@ -105,6 +119,8 @@ static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf) vp8_setup_block_dptrs(&pbi->mb); + once(initialize_dec); + return pbi; } diff --git a/libvpx/vp8/decoder/threading.c b/libvpx/vp8/decoder/threading.c index 6801532f..7c7184c7 100644 --- a/libvpx/vp8/decoder/threading.c +++ b/libvpx/vp8/decoder/threading.c @@ -24,6 +24,7 @@ #include "detokenize.h" #include "vp8/common/reconintra4x4.h" #include "vp8/common/reconinter.h" +#include "vp8/common/reconintra.h" #include "vp8/common/setupintrarecon.h" #if CONFIG_ERROR_CONCEALMENT #include "error_concealment.h" diff --git a/libvpx/vp8/decoder/treereader.h b/libvpx/vp8/decoder/treereader.h index 35ee6960..f7d23c36 100644 --- a/libvpx/vp8/decoder/treereader.h +++ b/libvpx/vp8/decoder/treereader.h @@ -12,6 +12,7 @@ #ifndef VP8_DECODER_TREEREADER_H_ #define VP8_DECODER_TREEREADER_H_ +#include "./vpx_config.h" #include "vp8/common/treecoder.h" #include "dboolhuff.h" @@ -28,7 +29,7 @@ typedef BOOL_DECODER vp8_reader; /* Intent of tree data structure is to make decoding trivial. */ -static int vp8_treed_read( +static INLINE int vp8_treed_read( vp8_reader *const r, /* !!! must return a 0 or 1 !!! */ vp8_tree t, const vp8_prob *const p diff --git a/libvpx/vp8/encoder/bitstream.c b/libvpx/vp8/encoder/bitstream.c index ea279b32..f3d91b55 100644 --- a/libvpx/vp8/encoder/bitstream.c +++ b/libvpx/vp8/encoder/bitstream.c @@ -407,6 +407,7 @@ static void pack_tokens_into_partitions(VP8_COMP *cpi, unsigned char *cx_data, } +#if CONFIG_MULTITHREAD static void pack_mb_row_tokens(VP8_COMP *cpi, vp8_writer *w) { int mb_row; @@ -421,6 +422,7 @@ static void pack_mb_row_tokens(VP8_COMP *cpi, vp8_writer *w) } } +#endif // CONFIG_MULTITHREAD static void write_mv_ref ( @@ -1675,7 +1677,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest if (cpi->b_multi_threaded) pack_mb_row_tokens(cpi, &cpi->bc[1]); else -#endif +#endif // CONFIG_MULTITHREAD vp8_pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count); vp8_stop_encode(&cpi->bc[1]); diff --git a/libvpx/vp8/encoder/encodeframe.c b/libvpx/vp8/encoder/encodeframe.c index d381d8dd..b0aaa2f0 100644 --- a/libvpx/vp8/encoder/encodeframe.c +++ b/libvpx/vp8/encoder/encodeframe.c @@ -700,6 +700,7 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi) vp8_zero(x->count_mb_ref_frame_usage); } +#if CONFIG_MULTITHREAD static void sum_coef_counts(MACROBLOCK *x, MACROBLOCK *x_thread) { int i = 0; @@ -729,6 +730,7 @@ static void sum_coef_counts(MACROBLOCK *x, MACROBLOCK *x_thread) } while (++i < BLOCK_TYPES); } +#endif // CONFIG_MULTITHREAD void vp8_encode_frame(VP8_COMP *cpi) { @@ -927,7 +929,7 @@ void vp8_encode_frame(VP8_COMP *cpi) } else -#endif +#endif // CONFIG_MULTITHREAD { /* for each macroblock row in image */ diff --git a/libvpx/vp8/encoder/encodeintra.c b/libvpx/vp8/encoder/encodeintra.c index 938cc7ec..44be959c 100644 --- a/libvpx/vp8/encoder/encodeintra.c +++ b/libvpx/vp8/encoder/encodeintra.c @@ -13,6 +13,7 @@ #include "vp8_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "vp8/encoder/quantize.h" +#include "vp8/common/reconintra.h" #include "vp8/common/reconintra4x4.h" #include "encodemb.h" #include "vp8/common/invtrans.h" diff --git a/libvpx/vp8/encoder/mcomp.c b/libvpx/vp8/encoder/mcomp.c index f848e8fb..768c764c 100644 --- a/libvpx/vp8/encoder/mcomp.c +++ b/libvpx/vp8/encoder/mcomp.c @@ -20,6 +20,7 @@ #include <math.h> #include "vp8/common/findnearmv.h" #include "vp8/common/common.h" +#include "vpx_dsp/vpx_dsp_common.h" #ifdef VP8_ENTROPY_STATS static int mv_ref_ct [31] [4] [2]; @@ -223,14 +224,14 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, unsigned int quarteriters = 4; int thismse; - int minc = MAX(x->mv_col_min * 4, - (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); - int maxc = MIN(x->mv_col_max * 4, - (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); - int minr = MAX(x->mv_row_min * 4, - (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); - int maxr = MIN(x->mv_row_max * 4, - (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); + int minc = VPXMAX(x->mv_col_min * 4, + (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); + int maxc = VPXMIN(x->mv_col_max * 4, + (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); + int minr = VPXMAX(x->mv_row_min * 4, + (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); + int maxr = VPXMIN(x->mv_row_max * 4, + (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); int y_stride; int offset; diff --git a/libvpx/vp8/encoder/mr_dissim.c b/libvpx/vp8/encoder/mr_dissim.c index 8d96445f..886cba2f 100644 --- a/libvpx/vp8/encoder/mr_dissim.c +++ b/libvpx/vp8/encoder/mr_dissim.c @@ -13,6 +13,7 @@ #include "vpx_config.h" #include "onyx_int.h" #include "mr_dissim.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "rdopt.h" #include "vp8/common/common.h" @@ -192,11 +193,13 @@ void vp8_cal_dissimilarity(VP8_COMP *cpi) } } - mmvx = MAX(abs(min_mvx - here->mbmi.mv.as_mv.row), - abs(max_mvx - here->mbmi.mv.as_mv.row)); - mmvy = MAX(abs(min_mvy - here->mbmi.mv.as_mv.col), - abs(max_mvy - here->mbmi.mv.as_mv.col)); - dissim = MAX(mmvx, mmvy); + mmvx = VPXMAX( + abs(min_mvx - here->mbmi.mv.as_mv.row), + abs(max_mvx - here->mbmi.mv.as_mv.row)); + mmvy = VPXMAX( + abs(min_mvy - here->mbmi.mv.as_mv.col), + abs(max_mvy - here->mbmi.mv.as_mv.col)); + dissim = VPXMAX(mmvx, mmvy); } } diff --git a/libvpx/vp8/encoder/onyx_if.c b/libvpx/vp8/encoder/onyx_if.c index 5e05c8c6..df5bcf68 100644 --- a/libvpx/vp8/encoder/onyx_if.c +++ b/libvpx/vp8/encoder/onyx_if.c @@ -31,6 +31,7 @@ #include "vp8/common/postproc.h" #endif #include "vpx_mem/vpx_mem.h" +#include "vp8/common/reconintra.h" #include "vp8/common/swapyv12buffer.h" #include "vp8/common/threading.h" #include "vpx_ports/vpx_timer.h" @@ -422,6 +423,16 @@ static void setup_features(VP8_COMP *cpi) static void dealloc_raw_frame_buffers(VP8_COMP *cpi); +void vp8_initialize_enc(void) +{ + static volatile int init_done = 0; + + if (!init_done) { + vpx_dsp_rtcd(); + vp8_init_intra_predictors(); + init_done = 1; + } +} static void dealloc_compressor_data(VP8_COMP *cpi) { @@ -516,41 +527,6 @@ static void set_segment_data(VP8_COMP *cpi, signed char *feature_data, unsigned } -static void segmentation_test_function(VP8_COMP *cpi) -{ - unsigned char *seg_map; - signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; - - // Create a temporary map for segmentation data. - CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); - - // Set the segmentation Map - set_segmentation_map(cpi, seg_map); - - // Activate segmentation. - enable_segmentation(cpi); - - // Set up the quant segment data - feature_data[MB_LVL_ALT_Q][0] = 0; - feature_data[MB_LVL_ALT_Q][1] = 4; - feature_data[MB_LVL_ALT_Q][2] = 0; - feature_data[MB_LVL_ALT_Q][3] = 0; - // Set up the loop segment data - feature_data[MB_LVL_ALT_LF][0] = 0; - feature_data[MB_LVL_ALT_LF][1] = 0; - feature_data[MB_LVL_ALT_LF][2] = 0; - feature_data[MB_LVL_ALT_LF][3] = 0; - - // Initialise the feature data structure - // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1 - set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA); - - // Delete sementation map - vpx_free(seg_map); - - seg_map = 0; -} - /* A simple function to cyclically refresh the background at a lower Q */ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) { @@ -913,7 +889,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) Speed = cpi->Speed; switch (Mode) { -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY case 0: /* best quality mode */ sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; @@ -1953,7 +1929,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) * Currently this is tied to error resilliant mode */ cpi->cyclic_refresh_mode_enabled = cpi->oxcf.error_resilient_mode; - cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 5; + cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 7; if (cpi->oxcf.number_of_layers == 1) { cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 20; @@ -2065,7 +2041,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->output_pkt_list = oxcf->output_pkt_list; -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY if (cpi->pass == 1) { @@ -2227,7 +2203,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) if (cpi && (cpi->common.current_video_frame > 0)) { -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY if (cpi->pass == 2) { @@ -3018,6 +2994,7 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi) } +#if !CONFIG_REALTIME_ONLY /* 1 = key, 0 = inter */ static int decide_key_frame(VP8_COMP *cpi) { @@ -3085,7 +3062,6 @@ static int decide_key_frame(VP8_COMP *cpi) } -#if !(CONFIG_REALTIME_ONLY) static void Pass1Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned int *frame_flags) { (void) size; @@ -3131,6 +3107,7 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) #endif /* return of 0 means drop frame */ +#if !CONFIG_REALTIME_ONLY /* Function to test for conditions that indeicate we should loop * back and recode a frame. */ @@ -3180,6 +3157,7 @@ static int recode_loop_test( VP8_COMP *cpi, return force_recode; } +#endif // !CONFIG_REALTIME_ONLY static void update_reference_frames(VP8_COMP *cpi) { @@ -3601,7 +3579,7 @@ static void encode_frame_to_data_rate VP8_COMMON *cm = &cpi->common; int active_worst_qchanged = 0; -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY int q_low; int q_high; int zbin_oq_high; @@ -3640,7 +3618,7 @@ static void encode_frame_to_data_rate /* For an alt ref frame in 2 pass we skip the call to the second pass * function that sets the target bandwidth */ -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY if (cpi->pass == 2) { @@ -4149,7 +4127,7 @@ static void encode_frame_to_data_rate /* Determine initial Q to try */ Q = vp8_regulate_q(cpi, cpi->this_frame_target); -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY /* Set highest allowed value for Zbin over quant */ if (cm->frame_type == KEY_FRAME) @@ -4179,7 +4157,7 @@ static void encode_frame_to_data_rate vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY /* Limit Q range for the adaptive loop. */ bottom_index = cpi->active_best_quality; top_index = cpi->active_worst_quality; @@ -4410,7 +4388,7 @@ static void encode_frame_to_data_rate if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME && cpi->compressor_speed != 2) { -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY if (decide_key_frame(cpi)) { /* Reset all our sizing numbers and recode */ @@ -4466,9 +4444,9 @@ static void encode_frame_to_data_rate /* Assume 1 qstep = about 4% on frame size. */ over_size_percent = (int)(over_size_percent * 0.96); } -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY top_index = cpi->active_worst_quality; -#endif +#endif // !CONFIG_REALTIME_ONLY /* If we have updated the active max Q do not call * vp8_update_rate_correction_factors() this loop. */ @@ -4477,7 +4455,7 @@ static void encode_frame_to_data_rate else active_worst_qchanged = 0; -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY /* Special case handling for forced key frames */ if ( (cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced ) { @@ -5215,7 +5193,7 @@ static void encode_frame_to_data_rate } -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned char * dest_end, unsigned int *frame_flags) { @@ -5299,7 +5277,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l cpi->source = NULL; -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY /* Should we code an alternate reference frame */ if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.play_alternate && @@ -5367,7 +5345,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l else { *size = 0; -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY if (flush && cpi->pass == 1 && !cpi->twopass.first_pass_done) { @@ -5560,7 +5538,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l assert(i < NUM_YV12_BUFFERS ); } -#if !(CONFIG_REALTIME_ONLY) +#if !CONFIG_REALTIME_ONLY if (cpi->pass == 1) { diff --git a/libvpx/vp8/encoder/onyx_int.h b/libvpx/vp8/encoder/onyx_int.h index 8beba27f..317e4b9e 100644 --- a/libvpx/vp8/encoder/onyx_int.h +++ b/libvpx/vp8/encoder/onyx_int.h @@ -716,6 +716,8 @@ typedef struct VP8_COMP } rd_costs; } VP8_COMP; +void vp8_initialize_enc(void); + void vp8_alloc_compressor_data(VP8_COMP *cpi); int vp8_reverse_trans(int x); void vp8_new_framerate(VP8_COMP *cpi, double framerate); diff --git a/libvpx/vp8/encoder/pickinter.c b/libvpx/vp8/encoder/pickinter.c index 5ce98ad2..d0fff3f0 100644 --- a/libvpx/vp8/encoder/pickinter.c +++ b/libvpx/vp8/encoder/pickinter.c @@ -21,10 +21,12 @@ #include "vp8/common/findnearmv.h" #include "encodemb.h" #include "vp8/common/reconinter.h" +#include "vp8/common/reconintra.h" #include "vp8/common/reconintra4x4.h" #include "vpx_dsp/variance.h" #include "mcomp.h" #include "rdopt.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #if CONFIG_TEMPORAL_DENOISING #include "denoising.h" @@ -72,7 +74,7 @@ static int macroblock_corner_grad(unsigned char* signal, int stride, int y2 = signal[offsetx * stride + offsety + sgny]; int y3 = signal[(offsetx + sgnx) * stride + offsety]; int y4 = signal[(offsetx + sgnx) * stride + offsety + sgny]; - return MAX(MAX(abs(y1 - y2), abs(y1 - y3)), abs(y1 - y4)); + return VPXMAX(VPXMAX(abs(y1 - y2), abs(y1 - y3)), abs(y1 - y4)); } static int check_dot_artifact_candidate(VP8_COMP *cpi, @@ -813,9 +815,18 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, // Check if current macroblock is in skin area. { - const int y = x->src.y_buffer[7 * x->src.y_stride + 7]; - const int cb = x->src.u_buffer[3 * x->src.uv_stride + 3]; - const int cr = x->src.v_buffer[3 * x->src.uv_stride + 3]; + const int y = (x->src.y_buffer[7 * x->src.y_stride + 7] + + x->src.y_buffer[7 * x->src.y_stride + 8] + + x->src.y_buffer[8 * x->src.y_stride + 7] + + x->src.y_buffer[8 * x->src.y_stride + 8]) >> 2; + const int cb = (x->src.u_buffer[3 * x->src.uv_stride + 3] + + x->src.u_buffer[3 * x->src.uv_stride + 4] + + x->src.u_buffer[4 * x->src.uv_stride + 3] + + x->src.u_buffer[4 * x->src.uv_stride + 4]) >> 2; + const int cr = (x->src.v_buffer[3 * x->src.uv_stride + 3] + + x->src.v_buffer[3 * x->src.uv_stride + 4] + + x->src.v_buffer[4 * x->src.uv_stride + 3] + + x->src.v_buffer[4 * x->src.uv_stride + 4]) >> 2; x->is_skin = 0; if (!cpi->oxcf.screen_content_mode) x->is_skin = is_skin_color(y, cb, cr); @@ -824,7 +835,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, if (cpi->oxcf.noise_sensitivity) { // Under aggressive denoising mode, should we use skin map to reduce denoiser // and ZEROMV bias? Will need to revisit the accuracy of this detection for - // very noisy input. For now keep this as is (i.e., don't turn it off). + // very noisy input. For now keep this as is (i.e., don't turn it off). // if (cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive) // x->is_skin = 0; } @@ -874,7 +885,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, /* If the frame has big static background and current MB is in low * motion area, its mode decision is biased to ZEROMV mode. - * No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12). + * No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12). * At such speed settings, ZEROMV is already heavily favored. */ if (cpi->Speed < 12) { @@ -1136,8 +1147,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, #if CONFIG_MULTI_RES_ENCODING if (parent_ref_valid && (parent_ref_frame == this_ref_frame) && dissim <= 2 && - MAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row), - abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <= 4) + VPXMAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row), + abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <= + 4) { d->bmi.mv.as_int = mvp_full.as_int; mode_mv[NEWMV].as_int = mvp_full.as_int; diff --git a/libvpx/vp8/encoder/ratectrl.c b/libvpx/vp8/encoder/ratectrl.c index e8796a1f..7da3d71a 100644 --- a/libvpx/vp8/encoder/ratectrl.c +++ b/libvpx/vp8/encoder/ratectrl.c @@ -22,6 +22,7 @@ #include "vpx_mem/vpx_mem.h" #include "vp8/common/systemdependent.h" #include "encodemv.h" +#include "vpx_dsp/vpx_dsp_common.h" #define MIN_BPB_FACTOR 0.01 @@ -380,7 +381,8 @@ static void calc_iframe_target_size(VP8_COMP *cpi) int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */ /* Boost depends somewhat on frame rate: only used for 1 layer case. */ if (cpi->oxcf.number_of_layers == 1) { - kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16)); + kf_boost = VPXMAX(initial_boost, + (int)(2 * cpi->output_framerate - 16)); } else { /* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */ @@ -1591,11 +1593,38 @@ int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) { if (Q < thresh_qp && cpi->projected_frame_size > thresh_rate && pred_err_mb > thresh_pred_err_mb) { + double new_correction_factor = cpi->rate_correction_factor; + const int target_size = cpi->av_per_frame_bandwidth; + int target_bits_per_mb; // Drop this frame: advance frame counters, and set force_maxqp flag. cpi->common.current_video_frame++; cpi->frames_since_key++; // Flag to indicate we will force next frame to be encoded at max QP. cpi->force_maxqp = 1; + // Reset the buffer levels. + cpi->buffer_level = cpi->oxcf.optimal_buffer_level; + cpi->bits_off_target = cpi->oxcf.optimal_buffer_level; + // Compute a new rate correction factor, corresponding to the current + // target frame size and max_QP, and adjust the rate correction factor + // upwards, if needed. + // This is to prevent a bad state where the re-encoded frame at max_QP + // undershoots significantly, and then we end up dropping every other + // frame because the QP/rate_correction_factor may have been too low + // before the drop and then takes too long to come up. + if (target_size >= (INT_MAX >> BPER_MB_NORMBITS)) + target_bits_per_mb = + (target_size / cpi->common.MBs) << BPER_MB_NORMBITS; + else + target_bits_per_mb = + (target_size << BPER_MB_NORMBITS) / cpi->common.MBs; + // Rate correction factor based on target_size_per_mb and max_QP. + new_correction_factor = (double)target_bits_per_mb / + (double)vp8_bits_per_mb[INTER_FRAME][cpi->worst_quality]; + if (new_correction_factor > cpi->rate_correction_factor) + cpi->rate_correction_factor = + VPXMIN(2.0 * cpi->rate_correction_factor, new_correction_factor); + if (cpi->rate_correction_factor > MAX_BPB_FACTOR) + cpi->rate_correction_factor = MAX_BPB_FACTOR; return 1; } else { cpi->force_maxqp = 0; diff --git a/libvpx/vp8/encoder/rdopt.c b/libvpx/vp8/encoder/rdopt.c index fdff378b..ab0ad159 100644 --- a/libvpx/vp8/encoder/rdopt.c +++ b/libvpx/vp8/encoder/rdopt.c @@ -24,6 +24,7 @@ #include "pickinter.h" #include "vp8/common/entropymode.h" #include "vp8/common/reconinter.h" +#include "vp8/common/reconintra.h" #include "vp8/common/reconintra4x4.h" #include "vp8/common/findnearmv.h" #include "vp8/common/quant_common.h" diff --git a/libvpx/vp8/encoder/rdopt.h b/libvpx/vp8/encoder/rdopt.h index b4fcd10b..1cb1a072 100644 --- a/libvpx/vp8/encoder/rdopt.h +++ b/libvpx/vp8/encoder/rdopt.h @@ -12,13 +12,15 @@ #ifndef VP8_ENCODER_RDOPT_H_ #define VP8_ENCODER_RDOPT_H_ +#include "./vpx_config.h" + #ifdef __cplusplus extern "C" { #endif #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) -static void insertsortmv(int arr[], int len) +static INLINE void insertsortmv(int arr[], int len) { int i, j, k; @@ -41,7 +43,7 @@ static void insertsortmv(int arr[], int len) } } -static void insertsortsad(int arr[],int idx[], int len) +static INLINE void insertsortsad(int arr[],int idx[], int len) { int i, j, k; @@ -77,10 +79,10 @@ extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, extern void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate); -static void get_plane_pointers(const YV12_BUFFER_CONFIG *fb, - unsigned char *plane[3], - unsigned int recon_yoffset, - unsigned int recon_uvoffset) +static INLINE void get_plane_pointers(const YV12_BUFFER_CONFIG *fb, + unsigned char *plane[3], + unsigned int recon_yoffset, + unsigned int recon_uvoffset) { plane[0] = fb->y_buffer + recon_yoffset; plane[1] = fb->u_buffer + recon_uvoffset; @@ -88,10 +90,10 @@ static void get_plane_pointers(const YV12_BUFFER_CONFIG *fb, } -static void get_predictor_pointers(const VP8_COMP *cpi, - unsigned char *plane[4][3], - unsigned int recon_yoffset, - unsigned int recon_uvoffset) +static INLINE void get_predictor_pointers(const VP8_COMP *cpi, + unsigned char *plane[4][3], + unsigned int recon_yoffset, + unsigned int recon_uvoffset) { if (cpi->ref_frame_flags & VP8_LAST_FRAME) get_plane_pointers(&cpi->common.yv12_fb[cpi->common.lst_fb_idx], @@ -107,8 +109,8 @@ static void get_predictor_pointers(const VP8_COMP *cpi, } -static void get_reference_search_order(const VP8_COMP *cpi, - int ref_frame_map[4]) +static INLINE void get_reference_search_order(const VP8_COMP *cpi, + int ref_frame_map[4]) { int i=0; diff --git a/libvpx/vp8/encoder/treewriter.h b/libvpx/vp8/encoder/treewriter.h index cfb2730a..2debf927 100644 --- a/libvpx/vp8/encoder/treewriter.h +++ b/libvpx/vp8/encoder/treewriter.h @@ -15,6 +15,7 @@ /* Trees map alphabets into huffman-like codes suitable for an arithmetic bit coder. Timothy S Murphy 11 October 2004 */ +#include "./vpx_config.h" #include "vp8/common/treecoder.h" #include "boolhuff.h" /* for now */ @@ -46,7 +47,7 @@ typedef BOOL_CODER vp8_writer; /* Both of these return bits, not scaled bits. */ -static unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob p) +static INLINE unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob p) { /* Imitate existing calculation */ @@ -76,7 +77,7 @@ static void vp8_treed_write } while (n); } -static void vp8_write_token +static INLINE void vp8_write_token ( vp8_writer *const w, vp8_tree t, @@ -107,7 +108,7 @@ static int vp8_treed_cost( return c; } -static int vp8_cost_token +static INLINE int vp8_cost_token ( vp8_tree t, const vp8_prob *const p, diff --git a/libvpx/vp8/vp8_common.mk b/libvpx/vp8/vp8_common.mk index 3ad11c77..4c4e8562 100644 --- a/libvpx/vp8/vp8_common.mk +++ b/libvpx/vp8/vp8_common.mk @@ -45,6 +45,7 @@ VP8_COMMON_SRCS-yes += common/mv.h VP8_COMMON_SRCS-yes += common/onyxc_int.h VP8_COMMON_SRCS-yes += common/quant_common.h VP8_COMMON_SRCS-yes += common/reconinter.h +VP8_COMMON_SRCS-yes += common/reconintra.h VP8_COMMON_SRCS-yes += common/reconintra4x4.h VP8_COMMON_SRCS-yes += common/rtcd.c VP8_COMMON_SRCS-yes += common/rtcd_defs.pl @@ -88,7 +89,6 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/copy_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm -VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm @@ -118,7 +118,6 @@ VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/bilinear_filter_msa.c VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/copymem_msa.c VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct_msa.c VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/loopfilter_filters_msa.c -VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/reconintra_msa.c VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/sixtap_filter_msa.c VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp8_macros_msa.h @@ -146,7 +145,6 @@ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/idct_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/loopfilter_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/simpleloopfilter_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/sixtappredict8x4_v6$(ASM) -VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/intra4x4_predict_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequant_idct_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequantize_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/idct_blk_v6.c @@ -165,7 +163,6 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_loopfilter_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimpleverticaledge_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon.c -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/reconintra_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/shortidct4x4llm_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict_neon.c diff --git a/libvpx/vp8/vp8_cx_iface.c b/libvpx/vp8/vp8_cx_iface.c index fe88cd4b..c125ae84 100644 --- a/libvpx/vp8/vp8_cx_iface.c +++ b/libvpx/vp8/vp8_cx_iface.c @@ -17,6 +17,7 @@ #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/vpx_once.h" #include "vp8/encoder/onyx_int.h" #include "vpx/vp8cx.h" #include "vp8/encoder/firstpass.h" @@ -237,7 +238,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(cfg, ts_periodicity, 16); for (i=1; i<cfg->ts_number_layers; i++) - if (cfg->ts_target_bitrate[i] <= cfg->ts_target_bitrate[i-1] && + if (cfg->ts_target_bitrate[i] <= cfg->ts_target_bitrate[i-1] && cfg->rc_target_bitrate > 0) ERROR("ts_target_bitrate entries are not strictly increasing"); @@ -693,6 +694,8 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, else ctx->priv->enc.total_encoders = 1; + once(vp8_initialize_enc); + res = validate_config(priv, &priv->cfg, &priv->vp8_cfg, 0); if (!res) @@ -879,7 +882,8 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, } ctx->control_frame_flags = 0; - res = set_reference_and_update(ctx, flags); + if (!res) + res = set_reference_and_update(ctx, flags); /* Handle fixed keyframe intervals */ if (ctx->cfg.kf_mode == VPX_KF_AUTO @@ -1273,9 +1277,6 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = {VP8_SET_REFERENCE, vp8e_set_reference}, {VP8_COPY_REFERENCE, vp8e_get_reference}, {VP8_SET_POSTPROC, vp8e_set_previewpp}, - {VP8E_UPD_ENTROPY, vp8e_update_entropy}, - {VP8E_UPD_REFERENCE, vp8e_update_reference}, - {VP8E_USE_REFERENCE, vp8e_use_reference}, {VP8E_SET_FRAME_FLAGS, vp8e_set_frame_flags}, {VP8E_SET_TEMPORAL_LAYER_ID, vp8e_set_temporal_layer_id}, {VP8E_SET_ROI_MAP, vp8e_set_roi_map}, diff --git a/libvpx/vp8/vp8_dx_iface.c b/libvpx/vp8/vp8_dx_iface.c index 72e4770c..a12a2ad0 100644 --- a/libvpx/vp8/vp8_dx_iface.c +++ b/libvpx/vp8/vp8_dx_iface.c @@ -22,6 +22,7 @@ #include "common/common.h" #include "common/onyxd.h" #include "decoder/onyxd_int.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #if CONFIG_ERROR_CONCEALMENT #include "decoder/error_concealment.h" @@ -42,8 +43,6 @@ typedef enum } mem_seg_id_t; #define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0]))) -static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t); - struct vpx_codec_alg_priv { vpx_codec_priv_t base; @@ -68,18 +67,6 @@ struct vpx_codec_alg_priv FRAGMENT_DATA fragments; }; -static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t flags) -{ - /* Although this declaration is constant, we can't use it in the requested - * segments list because we want to define the requested segments list - * before defining the private type (so that the number of memory maps is - * known) - */ - (void)si; - (void)flags; - return sizeof(vpx_codec_alg_priv_t); -} - static void vp8_init_ctx(vpx_codec_ctx_t *ctx) { vpx_codec_alg_priv_t *priv = @@ -180,7 +167,7 @@ static vpx_codec_err_t vp8_peek_si_internal(const uint8_t *data, const uint8_t *clear = data; if (decrypt_cb) { - int n = MIN(sizeof(clear_buffer), data_sz); + int n = VPXMIN(sizeof(clear_buffer), data_sz); decrypt_cb(decrypt_state, data, clear_buffer, n); clear = clear_buffer; } @@ -259,8 +246,8 @@ static void yuvconfig2image(vpx_image_t *img, img->fmt = VPX_IMG_FMT_I420; img->w = yv12->y_stride; img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15; - img->d_w = yv12->y_width; - img->d_h = yv12->y_height; + img->d_w = img->r_w = yv12->y_width; + img->d_h = img->r_h = yv12->y_height; img->x_chroma_shift = 1; img->y_chroma_shift = 1; img->planes[VPX_PLANE_Y] = yv12->y_buffer; diff --git a/libvpx/vp9/common/vp9_alloccommon.c b/libvpx/vp9/common/vp9_alloccommon.c index ac417b69..24c6c54e 100644 --- a/libvpx/vp9/common/vp9_alloccommon.c +++ b/libvpx/vp9/common/vp9_alloccommon.c @@ -115,6 +115,8 @@ void vp9_free_context_buffers(VP9_COMMON *cm) { cm->above_context = NULL; vpx_free(cm->above_seg_context); cm->above_seg_context = NULL; + vpx_free(cm->lf.lfm); + cm->lf.lfm = NULL; } int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { @@ -149,6 +151,16 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { cm->above_context_alloc_cols = cm->mi_cols; } + vpx_free(cm->lf.lfm); + + // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The + // stride and rows are rounded up / truncated to a multiple of 8. + cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3; + cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc( + ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride, + sizeof(*cm->lf.lfm)); + if (!cm->lf.lfm) goto fail; + return 0; fail: diff --git a/libvpx/vp9/common/vp9_blockd.c b/libvpx/vp9/common/vp9_blockd.c index e8334fc8..0e104ee5 100644 --- a/libvpx/vp9/common/vp9_blockd.c +++ b/libvpx/vp9/common/vp9_blockd.c @@ -129,7 +129,6 @@ void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) { int i; for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y; xd->plane[i].subsampling_x = i ? ss_x : 0; xd->plane[i].subsampling_y = i ? ss_y : 0; } diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h index d776b440..61eb5916 100644 --- a/libvpx/vp9/common/vp9_blockd.h +++ b/libvpx/vp9/common/vp9_blockd.h @@ -14,6 +14,7 @@ #include "./vpx_config.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #include "vpx_scale/yv12config.h" @@ -119,7 +120,6 @@ struct buf_2d { struct macroblockd_plane { tran_low_t *dqcoeff; - PLANE_TYPE plane_type; int subsampling_x; int subsampling_y; struct buf_2d dst; @@ -175,7 +175,6 @@ typedef struct macroblockd { int mb_to_bottom_edge; FRAME_CONTEXT *fc; - int frame_parallel_decoding_mode; /* pointers to reference frames */ RefBuffer *block_refs[2]; @@ -200,6 +199,10 @@ typedef struct macroblockd { struct vpx_internal_error_info *error_info; } MACROBLOCKD; +static INLINE PLANE_TYPE get_plane_type(int plane) { + return (PLANE_TYPE)(plane > 0); +} + static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, PARTITION_TYPE partition) { return subsize_lookup[partition][bsize]; @@ -235,7 +238,7 @@ static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize, return TX_4X4; } else { const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss]; - return MIN(y_tx_size, max_txsize_lookup[plane_bsize]); + return VPXMIN(y_tx_size, max_txsize_lookup[plane_bsize]); } } diff --git a/libvpx/vp9/common/vp9_common_data.c b/libvpx/vp9/common/vp9_common_data.c index 0bf7cbcc..a6dae6a1 100644 --- a/libvpx/vp9/common/vp9_common_data.c +++ b/libvpx/vp9/common/vp9_common_data.c @@ -9,6 +9,7 @@ */ #include "vp9/common/vp9_common_data.h" +#include "vpx_dsp/vpx_dsp_common.h" // Log 2 conversion lookup tables for block width and height const uint8_t b_width_log2_lookup[BLOCK_SIZES] = @@ -27,7 +28,7 @@ const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8}; -// MIN(3, MIN(b_width_log2(bsize), b_height_log2(bsize))) +// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize))) const uint8_t size_group_lookup[BLOCK_SIZES] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3}; diff --git a/libvpx/vp9/common/vp9_entropy.h b/libvpx/vp9/common/vp9_entropy.h index a1746bce..21611ed6 100644 --- a/libvpx/vp9/common/vp9_entropy.h +++ b/libvpx/vp9/common/vp9_entropy.h @@ -75,21 +75,6 @@ DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high12[18]); #define EOB_MODEL_TOKEN 3 -typedef struct { - const vpx_tree_index *tree; - const vpx_prob *prob; - int len; - int base_val; - const int16_t *cost; -} vp9_extra_bit; - -// indexed by token value -extern const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS]; -#if CONFIG_VP9_HIGHBITDEPTH -extern const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS]; -extern const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS]; -#endif // CONFIG_VP9_HIGHBITDEPTH - #define DCT_MAX_VALUE 16384 #if CONFIG_VP9_HIGHBITDEPTH #define DCT_MAX_VALUE_HIGH10 65536 diff --git a/libvpx/vp9/common/vp9_loopfilter.c b/libvpx/vp9/common/vp9_loopfilter.c index 0915918e..b8a11322 100644 --- a/libvpx/vp9/common/vp9_loopfilter.c +++ b/libvpx/vp9/common/vp9_loopfilter.c @@ -13,6 +13,7 @@ #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_reconinter.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" @@ -775,7 +776,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n, // an 8x8 in that the internal ones can be skipped and don't depend on // the prediction block size. if (tx_size_y == TX_4X4) - *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y; + *int_4x4_y |= size_mask[block_size] << shift_y; if (tx_size_uv == TX_4X4) *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; @@ -821,7 +822,121 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, left_64x64_txform_mask[tx_size_y]) << shift_y; if (tx_size_y == TX_4X4) - *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y; + *int_4x4_y |= size_mask[block_size] << shift_y; +} + +void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, + const int mi_col, LOOP_FILTER_MASK *lfm) { + int i; + + // The largest loopfilter we have is 16x16 so we use the 16x16 mask + // for 32x32 transforms also. + lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32]; + lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32]; + lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32]; + lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32]; + + // We do at least 8 tap filter on every 32x32 even if the transform size + // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and + // remove it from the 4x4. + lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border; + lfm->left_y[TX_4X4] &= ~left_border; + lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border; + lfm->above_y[TX_4X4] &= ~above_border; + lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv; + lfm->left_uv[TX_4X4] &= ~left_border_uv; + lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv; + lfm->above_uv[TX_4X4] &= ~above_border_uv; + + // We do some special edge handling. + if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) { + const uint64_t rows = cm->mi_rows - mi_row; + + // Each pixel inside the border gets a 1, + const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1); + const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1); + + // Remove values completely outside our border. + for (i = 0; i < TX_32X32; i++) { + lfm->left_y[i] &= mask_y; + lfm->above_y[i] &= mask_y; + lfm->left_uv[i] &= mask_uv; + lfm->above_uv[i] &= mask_uv; + } + lfm->int_4x4_y &= mask_y; + lfm->int_4x4_uv &= mask_uv; + + // We don't apply a wide loop filter on the last uv block row. If set + // apply the shorter one instead. + if (rows == 1) { + lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16]; + lfm->above_uv[TX_16X16] = 0; + } + if (rows == 5) { + lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00; + lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00); + } + } + + if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) { + const uint64_t columns = cm->mi_cols - mi_col; + + // Each pixel inside the border gets a 1, the multiply copies the border + // to where we need it. + const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL; + const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111; + + // Internal edges are not applied on the last column of the image so + // we mask 1 more for the internal edges + const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111; + + // Remove the bits outside the image edge. + for (i = 0; i < TX_32X32; i++) { + lfm->left_y[i] &= mask_y; + lfm->above_y[i] &= mask_y; + lfm->left_uv[i] &= mask_uv; + lfm->above_uv[i] &= mask_uv; + } + lfm->int_4x4_y &= mask_y; + lfm->int_4x4_uv &= mask_uv_int; + + // We don't apply a wide loop filter on the last uv column. If set + // apply the shorter one instead. + if (columns == 1) { + lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16]; + lfm->left_uv[TX_16X16] = 0; + } + if (columns == 5) { + lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc); + lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc); + } + } + // We don't apply a loop filter on the first column in the image, mask that + // out. + if (mi_col == 0) { + for (i = 0; i < TX_32X32; i++) { + lfm->left_y[i] &= 0xfefefefefefefefeULL; + lfm->left_uv[i] &= 0xeeee; + } + } + + // Assert if we try to apply 2 different loop filters at the same position. + assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8])); + assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); + assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); + assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); + assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); + assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); + assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); + assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); + assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); + assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); + assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); + assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16])); + assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); + assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); + assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); + assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); } // This function sets up the bit masks for the entire 64x64 region represented @@ -854,7 +969,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, const int shift_8_y[] = {0, 1, 8, 9}; const int shift_32_uv[] = {0, 2, 8, 10}; const int shift_16_uv[] = {0, 1, 4, 5}; - int i; const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? cm->mi_rows - mi_row : MI_BLOCK_SIZE); const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? @@ -969,114 +1083,8 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, } break; } - // The largest loopfilter we have is 16x16 so we use the 16x16 mask - // for 32x32 transforms also. - lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32]; - lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32]; - lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32]; - lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32]; - // We do at least 8 tap filter on every 32x32 even if the transform size - // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and - // remove it from the 4x4. - lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border; - lfm->left_y[TX_4X4] &= ~left_border; - lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border; - lfm->above_y[TX_4X4] &= ~above_border; - lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv; - lfm->left_uv[TX_4X4] &= ~left_border_uv; - lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv; - lfm->above_uv[TX_4X4] &= ~above_border_uv; - - // We do some special edge handling. - if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) { - const uint64_t rows = cm->mi_rows - mi_row; - - // Each pixel inside the border gets a 1, - const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1); - const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1); - - // Remove values completely outside our border. - for (i = 0; i < TX_32X32; i++) { - lfm->left_y[i] &= mask_y; - lfm->above_y[i] &= mask_y; - lfm->left_uv[i] &= mask_uv; - lfm->above_uv[i] &= mask_uv; - } - lfm->int_4x4_y &= mask_y; - lfm->int_4x4_uv &= mask_uv; - - // We don't apply a wide loop filter on the last uv block row. If set - // apply the shorter one instead. - if (rows == 1) { - lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16]; - lfm->above_uv[TX_16X16] = 0; - } - if (rows == 5) { - lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00; - lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00); - } - } - - if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) { - const uint64_t columns = cm->mi_cols - mi_col; - - // Each pixel inside the border gets a 1, the multiply copies the border - // to where we need it. - const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL; - const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111; - - // Internal edges are not applied on the last column of the image so - // we mask 1 more for the internal edges - const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111; - - // Remove the bits outside the image edge. - for (i = 0; i < TX_32X32; i++) { - lfm->left_y[i] &= mask_y; - lfm->above_y[i] &= mask_y; - lfm->left_uv[i] &= mask_uv; - lfm->above_uv[i] &= mask_uv; - } - lfm->int_4x4_y &= mask_y; - lfm->int_4x4_uv &= mask_uv_int; - - // We don't apply a wide loop filter on the last uv column. If set - // apply the shorter one instead. - if (columns == 1) { - lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16]; - lfm->left_uv[TX_16X16] = 0; - } - if (columns == 5) { - lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc); - lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc); - } - } - // We don't apply a loop filter on the first column in the image, mask that - // out. - if (mi_col == 0) { - for (i = 0; i < TX_32X32; i++) { - lfm->left_y[i] &= 0xfefefefefefefefeULL; - lfm->left_uv[i] &= 0xeeee; - } - } - - // Assert if we try to apply 2 different loop filters at the same position. - assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8])); - assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); - assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); - assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); - assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); - assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); - assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); - assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); - assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); - assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); - assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); - assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16])); - assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); - assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); - assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); - assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); + vp9_adjust_mask(cm, mi_row, mi_col, lfm); } static void filter_selectively_vert(uint8_t *s, int pitch, @@ -1188,9 +1196,7 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; const int skip_this_r = skip_this && !block_edge_above; - const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV) - ? get_uv_tx_size(&mi[0].mbmi, plane) - : mi[0].mbmi.tx_size; + const TX_SIZE tx_size = get_uv_tx_size(&mi[0].mbmi, plane); const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; @@ -1427,6 +1433,7 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, struct buf_2d *const dst = &plane->dst; uint8_t *const dst0 = dst->buf; int r, c; + uint8_t lfl_uv[16]; uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; @@ -1437,11 +1444,9 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, // Vertical pass: do 2 rows at one time for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { - if (plane->plane_type == 1) { - for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { - lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; - lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; - } + for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { + lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; + lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; } { @@ -1456,18 +1461,18 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, highbd_filter_selectively_vert_row2( plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_uv[r << 1], (int)cm->bit_depth); + &lfl_uv[r << 1], (int)cm->bit_depth); } else { filter_selectively_vert_row2( plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_uv[r << 1]); + &lfl_uv[r << 1]); } #else filter_selectively_vert_row2( plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_uv[r << 1]); + &lfl_uv[r << 1]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 16 * dst->stride; @@ -1508,16 +1513,16 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfm->lfl_uv[r << 1], (int)cm->bit_depth); + &lfl_uv[r << 1], (int)cm->bit_depth); } else { filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfm->lfl_uv[r << 1]); + &lfl_uv[r << 1]); } #else filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfm->lfl_uv[r << 1]); + &lfl_uv[r << 1]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; @@ -1528,13 +1533,11 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, } } -void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, - VP9_COMMON *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int start, int stop, int y_only) { +static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int start, int stop, int y_only) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; enum lf_path path; - LOOP_FILTER_MASK lfm; int mi_row, mi_col; if (y_only) @@ -1548,24 +1551,24 @@ void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; + LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { int plane; vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); // TODO(JBB): Make setup_mask work for non 420. - vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, - &lfm); + vp9_adjust_mask(cm, mi_row, mi_col, lfm); - vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); + vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); for (plane = 1; plane < num_planes; ++plane) { switch (path) { case LF_PATH_420: - vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm); + vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); break; case LF_PATH_444: - vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm); + vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); break; case LF_PATH_SLOW: vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, @@ -1588,13 +1591,135 @@ void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, if (partial_frame && cm->mi_rows > 8) { start_mi_row = cm->mi_rows >> 1; start_mi_row &= 0xfffffff8; - mi_rows_to_filter = MAX(cm->mi_rows / 8, 8); + mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); + } + end_mi_row = start_mi_row + mi_rows_to_filter; + loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only); +} + +// Used by the encoder to build the loopfilter masks. +void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level, + int partial_frame) { + int start_mi_row, end_mi_row, mi_rows_to_filter; + int mi_col, mi_row; + if (!frame_filter_level) return; + start_mi_row = 0; + mi_rows_to_filter = cm->mi_rows; + if (partial_frame && cm->mi_rows > 8) { + start_mi_row = cm->mi_rows >> 1; + start_mi_row &= 0xfffffff8; + mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); } end_mi_row = start_mi_row + mi_rows_to_filter; + vp9_loop_filter_frame_init(cm, frame_filter_level); - vp9_loop_filter_rows(frame, cm, xd->plane, - start_mi_row, end_mi_row, - y_only); + + for (mi_row = start_mi_row; mi_row < end_mi_row; mi_row += MI_BLOCK_SIZE) { + MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { + // vp9_setup_mask() zeros lfm + vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, + get_lfm(&cm->lf, mi_row, mi_col)); + } + } +} + +// 8x8 blocks in a superblock. A "1" represents the first block in a 16x16 +// or greater area. +static const uint8_t first_block_in_16x16[8][8] = { + {1, 0, 1, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0} +}; + +// This function sets up the bit masks for a block represented +// by mi_row, mi_col in a 64x64 region. +// TODO(SJL): This function only works for yv12. +void vp9_build_mask(VP9_COMMON *cm, const MB_MODE_INFO *mbmi, int mi_row, + int mi_col, int bw, int bh) { + const BLOCK_SIZE block_size = mbmi->sb_type; + const TX_SIZE tx_size_y = mbmi->tx_size; + const loop_filter_info_n *const lfi_n = &cm->lf_info; + const int filter_level = get_filter_level(lfi_n, mbmi); + const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); + LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; + uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; + uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; + const int row_in_sb = (mi_row & 7); + const int col_in_sb = (mi_col & 7); + const int shift_y = col_in_sb + (row_in_sb << 3); + const int shift_uv = (col_in_sb >> 1) + ((row_in_sb >> 1) << 2); + const int build_uv = first_block_in_16x16[row_in_sb][col_in_sb]; + + if (!filter_level) { + return; + } else { + int index = shift_y; + int i; + for (i = 0; i < bh; i++) { + memset(&lfm->lfl_y[index], filter_level, bw); + index += 8; + } + } + + // These set 1 in the current block size for the block size edges. + // For instance if the block size is 32x16, we'll set: + // above = 1111 + // 0000 + // and + // left = 1000 + // = 1000 + // NOTE : In this example the low bit is left most ( 1000 ) is stored as + // 1, not 8... + // + // U and V set things on a 16 bit scale. + // + *above_y |= above_prediction_mask[block_size] << shift_y; + *left_y |= left_prediction_mask[block_size] << shift_y; + + if (build_uv) { + *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; + *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; + } + + // If the block has no coefficients and is not intra we skip applying + // the loop filter on block edges. + if (mbmi->skip && is_inter_block(mbmi)) + return; + + // Add a mask for the transform size. The transform size mask is set to + // be correct for a 64x64 prediction block size. Mask to match the size of + // the block we are working on and then shift it into place. + *above_y |= (size_mask[block_size] & + above_64x64_txform_mask[tx_size_y]) << shift_y; + *left_y |= (size_mask[block_size] & + left_64x64_txform_mask[tx_size_y]) << shift_y; + + if (build_uv) { + *above_uv |= (size_mask_uv[block_size] & + above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + + *left_uv |= (size_mask_uv[block_size] & + left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + } + + // Try to determine what to do with the internal 4x4 block boundaries. These + // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the + // internal ones can be skipped and don't depend on the prediction block size. + if (tx_size_y == TX_4X4) + *int_4x4_y |= size_mask[block_size] << shift_y; + + if (build_uv && tx_size_uv == TX_4X4) + *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; } void vp9_loop_filter_data_reset( @@ -1608,9 +1733,17 @@ void vp9_loop_filter_data_reset( memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); } +void vp9_reset_lfm(VP9_COMMON *const cm) { + if (cm->lf.filter_level) { + memset(cm->lf.lfm, 0, + ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride * + sizeof(*cm->lf.lfm)); + } +} + int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) { (void)unused; - vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, - lf_data->start, lf_data->stop, lf_data->y_only); + loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, + lf_data->start, lf_data->stop, lf_data->y_only); return 1; } diff --git a/libvpx/vp9/common/vp9_loopfilter.h b/libvpx/vp9/common/vp9_loopfilter.h index f7cbde67..7f943ea0 100644 --- a/libvpx/vp9/common/vp9_loopfilter.h +++ b/libvpx/vp9/common/vp9_loopfilter.h @@ -35,24 +35,6 @@ enum lf_path { LF_PATH_SLOW, }; -struct loopfilter { - int filter_level; - - int sharpness_level; - int last_sharpness_level; - - uint8_t mode_ref_delta_enabled; - uint8_t mode_ref_delta_update; - - // 0 = Intra, Last, GF, ARF - signed char ref_deltas[MAX_REF_LF_DELTAS]; - signed char last_ref_deltas[MAX_REF_LF_DELTAS]; - - // 0 = ZERO_MV, MV - signed char mode_deltas[MAX_MODE_LF_DELTAS]; - signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; -}; - // Need to align this structure so when it is declared and // passed it can be loaded into vector registers. typedef struct { @@ -83,9 +65,29 @@ typedef struct { uint16_t above_uv[TX_SIZES]; uint16_t int_4x4_uv; uint8_t lfl_y[64]; - uint8_t lfl_uv[16]; } LOOP_FILTER_MASK; +struct loopfilter { + int filter_level; + + int sharpness_level; + int last_sharpness_level; + + uint8_t mode_ref_delta_enabled; + uint8_t mode_ref_delta_update; + + // 0 = Intra, Last, GF, ARF + signed char ref_deltas[MAX_REF_LF_DELTAS]; + signed char last_ref_deltas[MAX_REF_LF_DELTAS]; + + // 0 = ZERO_MV, MV + signed char mode_deltas[MAX_MODE_LF_DELTAS]; + signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; + + LOOP_FILTER_MASK *lfm; + int lfm_stride; +}; + /* assorted loopfilter functions which get used elsewhere */ struct VP9Common; struct macroblockd; @@ -116,7 +118,7 @@ void vp9_filter_block_plane_non420(struct VP9Common *cm, void vp9_loop_filter_init(struct VP9Common *cm); // Update the loop filter for the current frame. -// This should be called before vp9_loop_filter_rows(), vp9_loop_filter_frame() +// This should be called before vp9_loop_filter_frame(), vp9_build_mask_frame() // calls this function directly. void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl); @@ -126,11 +128,19 @@ void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, int filter_level, int y_only, int partial_frame); -// Apply the loop filter to [start, stop) macro block rows in frame_buffer. -void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, - struct VP9Common *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int start, int stop, int y_only); +// Get the superblock lfm for a given mi_row, mi_col. +static INLINE LOOP_FILTER_MASK *get_lfm(const struct loopfilter *lf, + const int mi_row, const int mi_col) { + return &lf->lfm[(mi_col >> 3) + ((mi_row >> 3) * lf->lfm_stride)]; +} + +void vp9_build_mask(struct VP9Common *cm, const MB_MODE_INFO *mbmi, int mi_row, + int mi_col, int bw, int bh); +void vp9_adjust_mask(struct VP9Common *const cm, const int mi_row, + const int mi_col, LOOP_FILTER_MASK *lfm); +void vp9_build_mask_frame(struct VP9Common *cm, int frame_filter_level, + int partial_frame); +void vp9_reset_lfm(struct VP9Common *const cm); typedef struct LoopFilterWorkerData { YV12_BUFFER_CONFIG *frame_buffer; diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h index c373c027..ceffdedf 100644 --- a/libvpx/vp9/common/vp9_onyxc_int.h +++ b/libvpx/vp9/common/vp9_onyxc_int.h @@ -112,10 +112,11 @@ typedef struct BufferPool { typedef struct VP9Common { struct vpx_internal_error_info error; vpx_color_space_t color_space; + vpx_color_range_t color_range; int width; int height; - int display_width; - int display_height; + int render_width; + int render_height; int last_width; int last_height; @@ -357,13 +358,12 @@ static INLINE void vp9_init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd, xd->above_context[i] = cm->above_context + i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols); - if (xd->plane[i].plane_type == PLANE_TYPE_Y) { + if (get_plane_type(i) == PLANE_TYPE_Y) { memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant)); } else { memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant)); } xd->fc = cm->fc; - xd->frame_parallel_decoding_mode = cm->frame_parallel_decoding_mode; } xd->above_seg_context = cm->above_seg_context; diff --git a/libvpx/vp9/common/vp9_postproc.c b/libvpx/vp9/common/vp9_postproc.c index 71ab8615..b685d813 100644 --- a/libvpx/vp9/common/vp9_postproc.c +++ b/libvpx/vp9/common/vp9_postproc.c @@ -16,6 +16,7 @@ #include "./vpx_scale_rtcd.h" #include "./vp9_rtcd.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" #include "vpx_scale/vpx_scale.h" @@ -625,7 +626,7 @@ static void swap_mi_and_prev_mi(VP9_COMMON *cm) { int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *ppflags) { - const int q = MIN(105, cm->lf.filter_level * 2); + const int q = VPXMIN(105, cm->lf.filter_level * 2); const int flags = ppflags->post_proc_flag; YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer; struct postproc_state *const ppstate = &cm->postproc_state; diff --git a/libvpx/vp9/common/vp9_pred_common.h b/libvpx/vp9/common/vp9_pred_common.h index 67b95dbc..6f7af4a5 100644 --- a/libvpx/vp9/common/vp9_pred_common.h +++ b/libvpx/vp9/common/vp9_pred_common.h @@ -13,6 +13,7 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_onyxc_int.h" +#include "vpx_dsp/vpx_dsp_common.h" #ifdef __cplusplus extern "C" { @@ -24,14 +25,14 @@ static INLINE int get_segment_id(const VP9_COMMON *cm, const int mi_offset = mi_row * cm->mi_cols + mi_col; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; - const int xmis = MIN(cm->mi_cols - mi_col, bw); - const int ymis = MIN(cm->mi_rows - mi_row, bh); + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); int x, y, segment_id = MAX_SEGMENTS; for (y = 0; y < ymis; ++y) for (x = 0; x < xmis; ++x) - segment_id = MIN(segment_id, - segment_ids[mi_offset + y * cm->mi_cols + x]); + segment_id = + VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); return segment_id; diff --git a/libvpx/vp9/common/vp9_reconinter.c b/libvpx/vp9/common/vp9_reconinter.c index f83f8257..d8c14ecc 100644 --- a/libvpx/vp9/common/vp9_reconinter.c +++ b/libvpx/vp9/common/vp9_reconinter.c @@ -187,7 +187,19 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, const int is_scaled = vp9_is_scaled(sf); if (is_scaled) { - pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf); + // Co-ordinate of containing block to pixel precision. + const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); + const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); + if (plane == 0) + pre_buf->buf = xd->block_refs[ref]->buf->y_buffer; + else if (plane == 1) + pre_buf->buf = xd->block_refs[ref]->buf->u_buffer; + else + pre_buf->buf = xd->block_refs[ref]->buf->v_buffer; + + pre_buf->buf += scaled_buffer_offset(x_start + x, y_start + y, + pre_buf->stride, sf); + pre = pre_buf->buf; scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); xs = sf->x_step_q4; ys = sf->y_step_q4; diff --git a/libvpx/vp9/common/vp9_reconintra.c b/libvpx/vp9/common/vp9_reconintra.c index e60eff80..3d84a288 100644 --- a/libvpx/vp9/common/vp9_reconintra.c +++ b/libvpx/vp9/common/vp9_reconintra.c @@ -133,7 +133,6 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, int frame_width, frame_height; int x0, y0; const struct macroblockd_plane *const pd = &xd->plane[plane]; - // int base=128; int base = 128 << (bd - 8); // 127 127 127 .. 127 127 127 127 127 127 // 129 A B .. Y Z diff --git a/libvpx/vp9/common/vp9_rtcd_defs.pl b/libvpx/vp9/common/vp9_rtcd_defs.pl index 737fc56d..5bf71ef9 100644 --- a/libvpx/vp9/common/vp9_rtcd_defs.pl +++ b/libvpx/vp9/common/vp9_rtcd_defs.pl @@ -85,16 +85,26 @@ specialize qw/vp9_filter_by_weight8x8 sse2 msa/; # dct # if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { - # Note as optimized versions of these functions are added we need to add a check to ensure - # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only. - add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; - specialize qw/vp9_iht4x4_16_add/; + # Force C versions if CONFIG_EMULATE_HARDWARE is 1 + if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { + add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + specialize qw/vp9_iht4x4_16_add/; + + add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + specialize qw/vp9_iht8x8_64_add/; + + add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type"; + specialize qw/vp9_iht16x16_256_add/; + } else { + add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + specialize qw/vp9_iht4x4_16_add sse2/; - add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; - specialize qw/vp9_iht8x8_64_add/; + add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type"; + specialize qw/vp9_iht8x8_64_add sse2/; - add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type"; - specialize qw/vp9_iht16x16_256_add/; + add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type"; + specialize qw/vp9_iht16x16_256_add sse2/; + } } else { # Force C versions if CONFIG_EMULATE_HARDWARE is 1 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { @@ -231,11 +241,15 @@ if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") { } if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { -# the transform coefficients are held in 32-bit -# values, so the assembler code for vp9_block_error can no longer be used. add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; specialize qw/vp9_block_error/; + add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; + specialize qw/vp9_highbd_block_error/, "$sse2_x86inc"; + + add_proto qw/int64_t vp9_highbd_block_error_8bit/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; + specialize qw/vp9_highbd_block_error_8bit/, "$sse2_x86inc", "$avx_x86inc"; + add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_fp/; @@ -310,9 +324,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # ENCODEMB INVOKE - add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; - specialize qw/vp9_highbd_block_error sse2/; - add_proto qw/void vp9_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_highbd_quantize_fp/; diff --git a/libvpx/vp9/common/vp9_thread_common.c b/libvpx/vp9/common/vp9_thread_common.c index 6b11c93c..db78d6be 100644 --- a/libvpx/vp9/common/vp9_thread_common.c +++ b/libvpx/vp9/common/vp9_thread_common.c @@ -9,6 +9,7 @@ */ #include "./vpx_config.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_thread_common.h" @@ -108,29 +109,27 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, for (mi_row = start; mi_row < stop; mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; + LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { const int r = mi_row >> MI_BLOCK_SIZE_LOG2; const int c = mi_col >> MI_BLOCK_SIZE_LOG2; - LOOP_FILTER_MASK lfm; int plane; sync_read(lf_sync, r, c); vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); - // TODO(JBB): Make setup_mask work for non 420. - vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, - &lfm); + vp9_adjust_mask(cm, mi_row, mi_col, lfm); - vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); + vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); for (plane = 1; plane < num_planes; ++plane) { switch (path) { case LF_PATH_420: - vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm); + vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); break; case LF_PATH_444: - vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm); + vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); break; case LF_PATH_SLOW: vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, @@ -165,7 +164,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, // Decoder may allocate more threads than number of tiles based on user's // input. const int tile_cols = 1 << cm->log2_tile_cols; - const int num_workers = MIN(nworkers, tile_cols); + const int num_workers = VPXMIN(nworkers, tile_cols); int i; if (!lf_sync->sync_range || sb_rows != lf_sync->rows || @@ -229,7 +228,7 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, if (partial_frame && cm->mi_rows > 8) { start_mi_row = cm->mi_rows >> 1; start_mi_row &= 0xfffffff8; - mi_rows_to_filter = MAX(cm->mi_rows / 8, 8); + mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); } end_mi_row = start_mi_row + mi_rows_to_filter; vp9_loop_filter_frame_init(cm, frame_filter_level); @@ -317,21 +316,21 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { } // Accumulate frame counts. -void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts, - int is_dec) { +void vp9_accumulate_frame_counts(FRAME_COUNTS *accum, + const FRAME_COUNTS *counts, int is_dec) { int i, j, k, l, m; for (i = 0; i < BLOCK_SIZE_GROUPS; i++) for (j = 0; j < INTRA_MODES; j++) - cm->counts.y_mode[i][j] += counts->y_mode[i][j]; + accum->y_mode[i][j] += counts->y_mode[i][j]; for (i = 0; i < INTRA_MODES; i++) for (j = 0; j < INTRA_MODES; j++) - cm->counts.uv_mode[i][j] += counts->uv_mode[i][j]; + accum->uv_mode[i][j] += counts->uv_mode[i][j]; for (i = 0; i < PARTITION_CONTEXTS; i++) for (j = 0; j < PARTITION_TYPES; j++) - cm->counts.partition[i][j] += counts->partition[i][j]; + accum->partition[i][j] += counts->partition[i][j]; if (is_dec) { int n; @@ -340,10 +339,10 @@ void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts, for (k = 0; k < REF_TYPES; k++) for (l = 0; l < COEF_BANDS; l++) for (m = 0; m < COEFF_CONTEXTS; m++) { - cm->counts.eob_branch[i][j][k][l][m] += + accum->eob_branch[i][j][k][l][m] += counts->eob_branch[i][j][k][l][m]; for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) - cm->counts.coef[i][j][k][l][m][n] += + accum->coef[i][j][k][l][m][n] += counts->coef[i][j][k][l][m][n]; } } else { @@ -352,64 +351,64 @@ void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts, for (k = 0; k < REF_TYPES; k++) for (l = 0; l < COEF_BANDS; l++) for (m = 0; m < COEFF_CONTEXTS; m++) - cm->counts.eob_branch[i][j][k][l][m] += + accum->eob_branch[i][j][k][l][m] += counts->eob_branch[i][j][k][l][m]; - // In the encoder, cm->counts.coef is only updated at frame + // In the encoder, coef is only updated at frame // level, so not need to accumulate it here. // for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) - // cm->counts.coef[i][j][k][l][m][n] += + // accum->coef[i][j][k][l][m][n] += // counts->coef[i][j][k][l][m][n]; } for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) for (j = 0; j < SWITCHABLE_FILTERS; j++) - cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j]; + accum->switchable_interp[i][j] += counts->switchable_interp[i][j]; for (i = 0; i < INTER_MODE_CONTEXTS; i++) for (j = 0; j < INTER_MODES; j++) - cm->counts.inter_mode[i][j] += counts->inter_mode[i][j]; + accum->inter_mode[i][j] += counts->inter_mode[i][j]; for (i = 0; i < INTRA_INTER_CONTEXTS; i++) for (j = 0; j < 2; j++) - cm->counts.intra_inter[i][j] += counts->intra_inter[i][j]; + accum->intra_inter[i][j] += counts->intra_inter[i][j]; for (i = 0; i < COMP_INTER_CONTEXTS; i++) for (j = 0; j < 2; j++) - cm->counts.comp_inter[i][j] += counts->comp_inter[i][j]; + accum->comp_inter[i][j] += counts->comp_inter[i][j]; for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) - cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k]; + accum->single_ref[i][j][k] += counts->single_ref[i][j][k]; for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < 2; j++) - cm->counts.comp_ref[i][j] += counts->comp_ref[i][j]; + accum->comp_ref[i][j] += counts->comp_ref[i][j]; for (i = 0; i < TX_SIZE_CONTEXTS; i++) { for (j = 0; j < TX_SIZES; j++) - cm->counts.tx.p32x32[i][j] += counts->tx.p32x32[i][j]; + accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j]; for (j = 0; j < TX_SIZES - 1; j++) - cm->counts.tx.p16x16[i][j] += counts->tx.p16x16[i][j]; + accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j]; for (j = 0; j < TX_SIZES - 2; j++) - cm->counts.tx.p8x8[i][j] += counts->tx.p8x8[i][j]; + accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j]; } for (i = 0; i < TX_SIZES; i++) - cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i]; + accum->tx.tx_totals[i] += counts->tx.tx_totals[i]; for (i = 0; i < SKIP_CONTEXTS; i++) for (j = 0; j < 2; j++) - cm->counts.skip[i][j] += counts->skip[i][j]; + accum->skip[i][j] += counts->skip[i][j]; for (i = 0; i < MV_JOINTS; i++) - cm->counts.mv.joints[i] += counts->mv.joints[i]; + accum->mv.joints[i] += counts->mv.joints[i]; for (k = 0; k < 2; k++) { - nmv_component_counts *comps = &cm->counts.mv.comps[k]; - nmv_component_counts *comps_t = &counts->mv.comps[k]; + nmv_component_counts *const comps = &accum->mv.comps[k]; + const nmv_component_counts *const comps_t = &counts->mv.comps[k]; for (i = 0; i < 2; i++) { comps->sign[i] += comps_t->sign[i]; diff --git a/libvpx/vp9/common/vp9_thread_common.h b/libvpx/vp9/common/vp9_thread_common.h index 07af1bc4..b3b60c25 100644 --- a/libvpx/vp9/common/vp9_thread_common.h +++ b/libvpx/vp9/common/vp9_thread_common.h @@ -8,12 +8,16 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_LOOPFILTER_THREAD_H_ -#define VP9_COMMON_VP9_LOOPFILTER_THREAD_H_ +#ifndef VP9_COMMON_VP9_THREAD_COMMON_H_ +#define VP9_COMMON_VP9_THREAD_COMMON_H_ #include "./vpx_config.h" #include "vp9/common/vp9_loopfilter.h" #include "vpx_util/vpx_thread.h" +#ifdef __cplusplus +extern "C" { +#endif + struct VP9Common; struct FRAME_COUNTS; @@ -51,7 +55,11 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VPxWorker *workers, int num_workers, VP9LfSync *lf_sync); -void vp9_accumulate_frame_counts(struct VP9Common *cm, - struct FRAME_COUNTS *counts, int is_dec); +void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum, + const struct FRAME_COUNTS *counts, int is_dec); + +#ifdef __cplusplus +} // extern "C" +#endif -#endif // VP9_COMMON_VP9_LOOPFILTER_THREAD_H_ +#endif // VP9_COMMON_VP9_THREAD_COMMON_H_ diff --git a/libvpx/vp9/common/vp9_tile_common.c b/libvpx/vp9/common/vp9_tile_common.c index 7a20e0a9..9fcb97c8 100644 --- a/libvpx/vp9/common/vp9_tile_common.c +++ b/libvpx/vp9/common/vp9_tile_common.c @@ -9,8 +9,8 @@ */ #include "vp9/common/vp9_tile_common.h" - #include "vp9/common/vp9_onyxc_int.h" +#include "vpx_dsp/vpx_dsp_common.h" #define MIN_TILE_WIDTH_B64 4 #define MAX_TILE_WIDTH_B64 64 @@ -18,7 +18,7 @@ static int get_tile_offset(int idx, int mis, int log2) { const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2; const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2; - return MIN(offset, mis); + return VPXMIN(offset, mis); } void vp9_tile_set_row(TileInfo *tile, const VP9_COMMON *cm, int row) { diff --git a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c index 4a163457..8d312d03 100644 --- a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -12,14 +12,14 @@ #include "vpx_dsp/x86/txfm_common_sse2.h" #include "vpx_ports/mem.h" -void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride, +void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { __m128i in[2]; const __m128i zero = _mm_setzero_si128(); const __m128i eight = _mm_set1_epi16(8); - in[0] = _mm_loadu_si128((const __m128i *)(input)); - in[1] = _mm_loadu_si128((const __m128i *)(input + 8)); + in[0] = load_input_data(input); + in[1] = load_input_data(input + 8); switch (tx_type) { case 0: // DCT_DCT @@ -77,21 +77,21 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride, } } -void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride, +void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { __m128i in[8]; const __m128i zero = _mm_setzero_si128(); const __m128i final_rounding = _mm_set1_epi16(1 << 4); // load input data - in[0] = _mm_load_si128((const __m128i *)input); - in[1] = _mm_load_si128((const __m128i *)(input + 8 * 1)); - in[2] = _mm_load_si128((const __m128i *)(input + 8 * 2)); - in[3] = _mm_load_si128((const __m128i *)(input + 8 * 3)); - in[4] = _mm_load_si128((const __m128i *)(input + 8 * 4)); - in[5] = _mm_load_si128((const __m128i *)(input + 8 * 5)); - in[6] = _mm_load_si128((const __m128i *)(input + 8 * 6)); - in[7] = _mm_load_si128((const __m128i *)(input + 8 * 7)); + in[0] = load_input_data(input); + in[1] = load_input_data(input + 8 * 1); + in[2] = load_input_data(input + 8 * 2); + in[3] = load_input_data(input + 8 * 3); + in[4] = load_input_data(input + 8 * 4); + in[5] = load_input_data(input + 8 * 5); + in[6] = load_input_data(input + 8 * 6); + in[7] = load_input_data(input + 8 * 7); switch (tx_type) { case 0: // DCT_DCT @@ -144,8 +144,8 @@ void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride, RECON_AND_STORE(dest + 7 * stride, in[7]); } -void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride, - int tx_type) { +void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride, int tx_type) { __m128i in0[16], in1[16]; load_buffer_8x16(input, in0); diff --git a/libvpx/vp9/decoder/vp9_decodeframe.c b/libvpx/vp9/decoder/vp9_decodeframe.c index fb7b3b80..f1916639 100644 --- a/libvpx/vp9/decoder/vp9_decodeframe.c +++ b/libvpx/vp9/decoder/vp9_decodeframe.c @@ -17,6 +17,7 @@ #include "vpx_dsp/bitreader_buffer.h" #include "vpx_dsp/bitreader.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/mem_ops.h" @@ -658,7 +659,7 @@ static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd, // pixels of each superblock row can be changed by next superblock row. if (pbi->frame_parallel_decode) vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf, - MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); + VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); // Skip border extension if block is inside the frame. if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 || @@ -686,7 +687,7 @@ static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd, if (pbi->frame_parallel_decode) { const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS; vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf, - MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); + VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); } } #if CONFIG_VP9_HIGHBITDEPTH @@ -757,8 +758,8 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl, int n4_hl) { // get minimum log2 num4x4s dimension - const int x = MIN(n4_wl, n4_hl); - return MIN(mbmi->tx_size, x); + const int x = VPXMIN(n4_wl, n4_hl); + return VPXMIN(mbmi->tx_size, x); } static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) { @@ -819,8 +820,8 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, const int less8x8 = bsize < BLOCK_8X8; const int bw = 1 << (bwl - 1); const int bh = 1 << (bhl - 1); - const int x_mis = MIN(bw, cm->mi_cols - mi_col); - const int y_mis = MIN(bh, cm->mi_rows - mi_row); + const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); + const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis, bwl, bhl); @@ -895,6 +896,10 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, } xd->corrupted |= vpx_reader_has_error(r); + + if (cm->lf.filter_level) { + vp9_build_mask(cm, mbmi, mi_row, mi_col, bw, bh); + } } static INLINE int dec_partition_plane_context(const MACROBLOCKD *xd, @@ -1180,11 +1185,11 @@ static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) { : literal_to_filter[vpx_rb_read_literal(rb, 2)]; } -static void setup_display_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { - cm->display_width = cm->width; - cm->display_height = cm->height; +static void setup_render_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { + cm->render_width = cm->width; + cm->render_height = cm->height; if (vpx_rb_read_bit(rb)) - vp9_read_frame_size(rb, &cm->display_width, &cm->display_height); + vp9_read_frame_size(rb, &cm->render_width, &cm->render_height); } static void resize_mv_buffer(VP9_COMMON *cm) { @@ -1232,7 +1237,7 @@ static void setup_frame_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { BufferPool *const pool = cm->buffer_pool; vp9_read_frame_size(rb, &width, &height); resize_context_buffers(cm, width, height); - setup_display_size(cm, rb); + setup_render_size(cm, rb); lock_buffer_pool(pool); if (vpx_realloc_frame_buffer( @@ -1255,6 +1260,9 @@ static void setup_frame_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; + pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; + pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; + pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; } static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth, @@ -1313,7 +1321,7 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm, } resize_context_buffers(cm, width, height); - setup_display_size(cm, rb); + setup_render_size(cm, rb); lock_buffer_pool(pool); if (vpx_realloc_frame_buffer( @@ -1336,6 +1344,9 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm, pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; + pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; + pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; + pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; } static void setup_tile_info(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { @@ -1358,12 +1369,6 @@ static void setup_tile_info(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { cm->log2_tile_rows += vpx_rb_read_bit(rb); } -typedef struct TileBuffer { - const uint8_t *data; - size_t size; - int col; // only used with multi-threaded decoding -} TileBuffer; - // Reads the next tile returning its size and adjusting '*data' accordingly // based on 'is_last'. static void get_tile_buffer(const uint8_t *const data_end, @@ -1461,6 +1466,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * aligned_cols); + vp9_reset_lfm(cm); + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); if (pbi->tile_data == NULL || @@ -1560,30 +1567,54 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, return vpx_reader_find_end(&tile_data->bit_reader); } +// On entry 'tile_data->data_end' points to the end of the input frame, on exit +// it is updated to reflect the bitreader position of the final tile column if +// present in the tile buffer group or NULL otherwise. static int tile_worker_hook(TileWorkerData *const tile_data, - const TileInfo *const tile) { - int mi_row, mi_col; + VP9Decoder *const pbi) { + TileInfo *volatile tile = &tile_data->xd.tile; + const int final_col = (1 << pbi->common.log2_tile_cols) - 1; + const uint8_t *volatile bit_reader_end = NULL; + volatile int n = tile_data->buf_start; + tile_data->error_info.setjmp = 1; if (setjmp(tile_data->error_info.jmp)) { tile_data->error_info.setjmp = 0; tile_data->xd.corrupted = 1; + tile_data->data_end = NULL; return 0; } - tile_data->error_info.setjmp = 1; tile_data->xd.error_info = &tile_data->error_info; + tile_data->xd.corrupted = 0; - for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; - mi_row += MI_BLOCK_SIZE) { - vp9_zero(tile_data->xd.left_context); - vp9_zero(tile_data->xd.left_seg_context); - for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; - mi_col += MI_BLOCK_SIZE) { - decode_partition(tile_data->pbi, &tile_data->xd, - mi_row, mi_col, &tile_data->bit_reader, - BLOCK_64X64, 4); + do { + int mi_row, mi_col; + const TileBuffer *const buf = pbi->tile_buffers + n; + vp9_zero(tile_data->dqcoeff); + vp9_tile_init(tile, &pbi->common, 0, buf->col); + setup_token_decoder(buf->data, tile_data->data_end, buf->size, + &tile_data->error_info, &tile_data->bit_reader, + pbi->decrypt_cb, pbi->decrypt_state); + vp9_init_macroblockd(&pbi->common, &tile_data->xd, tile_data->dqcoeff); + + for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; + mi_row += MI_BLOCK_SIZE) { + vp9_zero(tile_data->xd.left_context); + vp9_zero(tile_data->xd.left_seg_context); + for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; + mi_col += MI_BLOCK_SIZE) { + decode_partition(pbi, &tile_data->xd, mi_row, mi_col, + &tile_data->bit_reader, BLOCK_64X64, 4); + } } - } + + if (buf->col == final_col) { + bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader); + } + } while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end); + + tile_data->data_end = bit_reader_end; return !tile_data->xd.corrupted; } @@ -1603,20 +1634,15 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; - const int num_workers = MIN(pbi->max_threads & ~1, tile_cols); - TileBuffer tile_buffers[1][1 << 6]; + const int num_workers = VPXMIN(pbi->max_threads, tile_cols); int n; - int final_worker = -1; assert(tile_cols <= (1 << 6)); assert(tile_rows == 1); (void)tile_rows; - // TODO(jzern): See if we can remove the restriction of passing in max - // threads to the decoder. if (pbi->num_tile_workers == 0) { - const int num_threads = pbi->max_threads & ~1; - int i; + const int num_threads = pbi->max_threads; CHECK_MEM_ERROR(cm, pbi->tile_workers, vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); // Ensure tile data offsets will be properly aligned. This may fail on @@ -1625,14 +1651,12 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, CHECK_MEM_ERROR(cm, pbi->tile_worker_data, vpx_memalign(32, num_threads * sizeof(*pbi->tile_worker_data))); - CHECK_MEM_ERROR(cm, pbi->tile_worker_info, - vpx_malloc(num_threads * sizeof(*pbi->tile_worker_info))); - for (i = 0; i < num_threads; ++i) { - VPxWorker *const worker = &pbi->tile_workers[i]; + for (n = 0; n < num_threads; ++n) { + VPxWorker *const worker = &pbi->tile_workers[n]; ++pbi->num_tile_workers; winterface->init(worker); - if (i < num_threads - 1 && !winterface->reset(worker)) { + if (n < num_threads - 1 && !winterface->reset(worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile decoder thread creation failed"); } @@ -1642,10 +1666,14 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, // Reset tile decoding hook for (n = 0; n < num_workers; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; + TileWorkerData *const tile_data = &pbi->tile_worker_data[n]; winterface->sync(worker); + tile_data->xd = pbi->mb; + tile_data->xd.counts = + cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; worker->hook = (VPxWorkerHook)tile_worker_hook; - worker->data1 = &pbi->tile_worker_data[n]; - worker->data2 = &pbi->tile_worker_info[n]; + worker->data1 = tile_data; + worker->data2 = pbi; } // Note: this memset assumes above_context[0], [1] and [2] @@ -1655,101 +1683,95 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * aligned_mi_cols); + vp9_reset_lfm(cm); + // Load tile data into tile_buffers - get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, + &pbi->tile_buffers); // Sort the buffers based on size in descending order. - qsort(tile_buffers[0], tile_cols, sizeof(tile_buffers[0][0]), + qsort(pbi->tile_buffers, tile_cols, sizeof(pbi->tile_buffers[0]), compare_tile_buffers); - // Rearrange the tile buffers such that per-tile group the largest, and - // presumably the most difficult, tile will be decoded in the main thread. - // This should help minimize the number of instances where the main thread is - // waiting for a worker to complete. - { - int group_start = 0; - while (group_start < tile_cols) { - const TileBuffer largest = tile_buffers[0][group_start]; - const int group_end = MIN(group_start + num_workers, tile_cols) - 1; - memmove(tile_buffers[0] + group_start, tile_buffers[0] + group_start + 1, - (group_end - group_start) * sizeof(tile_buffers[0][0])); - tile_buffers[0][group_end] = largest; - group_start = group_end + 1; + if (num_workers == tile_cols) { + // Rearrange the tile buffers such that the largest, and + // presumably the most difficult, tile will be decoded in the main thread. + // This should help minimize the number of instances where the main thread + // is waiting for a worker to complete. + const TileBuffer largest = pbi->tile_buffers[0]; + memmove(pbi->tile_buffers, pbi->tile_buffers + 1, + (tile_cols - 1) * sizeof(pbi->tile_buffers[0])); + pbi->tile_buffers[tile_cols - 1] = largest; + } else { + int start = 0, end = tile_cols - 2; + TileBuffer tmp; + + // Interleave the tiles to distribute the load between threads, assuming a + // larger tile implies it is more difficult to decode. + while (start < end) { + tmp = pbi->tile_buffers[start]; + pbi->tile_buffers[start] = pbi->tile_buffers[end]; + pbi->tile_buffers[end] = tmp; + start += 2; + end -= 2; } } // Initialize thread frame counts. if (!cm->frame_parallel_decoding_mode) { - int i; - - for (i = 0; i < num_workers; ++i) { + for (n = 0; n < num_workers; ++n) { TileWorkerData *const tile_data = - (TileWorkerData*)pbi->tile_workers[i].data1; + (TileWorkerData*)pbi->tile_workers[n].data1; vp9_zero(tile_data->counts); } } - n = 0; - while (n < tile_cols) { - int i; - for (i = 0; i < num_workers && n < tile_cols; ++i) { - VPxWorker *const worker = &pbi->tile_workers[i]; + { + const int base = tile_cols / num_workers; + const int remain = tile_cols % num_workers; + int buf_start = 0; + + for (n = 0; n < num_workers; ++n) { + const int count = base + (remain + n) / num_workers; + VPxWorker *const worker = &pbi->tile_workers[n]; TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; - TileInfo *const tile = (TileInfo*)worker->data2; - TileBuffer *const buf = &tile_buffers[0][n]; - tile_data->pbi = pbi; - tile_data->xd = pbi->mb; - tile_data->xd.corrupted = 0; - tile_data->xd.counts = cm->frame_parallel_decoding_mode ? - 0 : &tile_data->counts; - vp9_zero(tile_data->dqcoeff); - vp9_tile_init(tile, cm, 0, buf->col); - vp9_tile_init(&tile_data->xd.tile, cm, 0, buf->col); - setup_token_decoder(buf->data, data_end, buf->size, &cm->error, - &tile_data->bit_reader, pbi->decrypt_cb, - pbi->decrypt_state); - vp9_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); + tile_data->buf_start = buf_start; + tile_data->buf_end = buf_start + count - 1; + tile_data->data_end = data_end; + buf_start += count; worker->had_error = 0; - if (i == num_workers - 1 || n == tile_cols - 1) { + if (n == num_workers - 1) { + assert(tile_data->buf_end == tile_cols - 1); winterface->execute(worker); } else { winterface->launch(worker); } - - if (buf->col == tile_cols - 1) { - final_worker = i; - } - - ++n; } - for (; i > 0; --i) { - VPxWorker *const worker = &pbi->tile_workers[i - 1]; + for (; n > 0; --n) { + VPxWorker *const worker = &pbi->tile_workers[n - 1]; + TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; // TODO(jzern): The tile may have specific error data associated with // its vpx_internal_error_info which could be propagated to the main info // in cm. Additionally once the threads have been synced and an error is // detected, there's no point in continuing to decode tiles. pbi->mb.corrupted |= !winterface->sync(worker); + if (!bit_reader_end) bit_reader_end = tile_data->data_end; } - if (final_worker > -1) { - TileWorkerData *const tile_data = - (TileWorkerData*)pbi->tile_workers[final_worker].data1; - bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader); - final_worker = -1; - } + } - // Accumulate thread frame counts. - if (n >= tile_cols && !cm->frame_parallel_decoding_mode) { - for (i = 0; i < num_workers; ++i) { - TileWorkerData *const tile_data = - (TileWorkerData*)pbi->tile_workers[i].data1; - vp9_accumulate_frame_counts(cm, &tile_data->counts, 1); - } + // Accumulate thread frame counts. + if (!cm->frame_parallel_decoding_mode) { + for (n = 0; n < num_workers; ++n) { + TileWorkerData *const tile_data = + (TileWorkerData*)pbi->tile_workers[n].data1; + vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1); } } + assert(bit_reader_end || pbi->mb.corrupted); return bit_reader_end; } @@ -1773,7 +1795,7 @@ static void read_bitdepth_colorspace_sampling( } cm->color_space = vpx_rb_read_literal(rb, 3); if (cm->color_space != VPX_CS_SRGB) { - vpx_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range + cm->color_range = (vpx_color_range_t)vpx_rb_read_bit(rb); if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { cm->subsampling_x = vpx_rb_read_bit(rb); cm->subsampling_y = vpx_rb_read_bit(rb); @@ -1787,6 +1809,7 @@ static void read_bitdepth_colorspace_sampling( cm->subsampling_y = cm->subsampling_x = 1; } } else { + cm->color_range = VPX_CR_FULL_RANGE; if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { // Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed. // 4:2:2 or 4:4:0 chroma sampling is not allowed. @@ -1892,6 +1915,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, // specifies that the default color format should be YUV 4:2:0 in this // case (normative). cm->color_space = VPX_CS_BT_601; + cm->color_range = VPX_CR_STUDIO_RANGE; cm->subsampling_y = cm->subsampling_x = 1; cm->bit_depth = VPX_BITS_8; #if CONFIG_VP9_HIGHBITDEPTH @@ -1942,6 +1966,9 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, get_frame_new_buffer(cm)->bit_depth = cm->bit_depth; #endif get_frame_new_buffer(cm)->color_space = cm->color_space; + get_frame_new_buffer(cm)->color_range = cm->color_range; + get_frame_new_buffer(cm)->render_width = cm->render_width; + get_frame_new_buffer(cm)->render_height = cm->render_height; if (pbi->need_resync) { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, @@ -2102,7 +2129,7 @@ static struct vpx_read_bit_buffer *init_read_bit_buffer( rb->error_handler = error_handler; rb->error_handler_data = &pbi->common; if (pbi->decrypt_cb) { - const int n = (int)MIN(MAX_VP9_HEADER_SIZE, data_end - data); + const int n = (int)VPXMIN(MAX_VP9_HEADER_SIZE, data_end - data); pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n); rb->bit_buffer = clear_data; rb->bit_buffer_end = clear_data + n; diff --git a/libvpx/vp9/decoder/vp9_decodeframe.h b/libvpx/vp9/decoder/vp9_decodeframe.h index 05af7063..ce33cbdb 100644 --- a/libvpx/vp9/decoder/vp9_decodeframe.h +++ b/libvpx/vp9/decoder/vp9_decodeframe.h @@ -16,6 +16,8 @@ extern "C" { #endif +#include "vp9/common/vp9_enums.h" + struct VP9Decoder; struct vpx_read_bit_buffer; diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c index 33818a99..d3ca7b3f 100644 --- a/libvpx/vp9/decoder/vp9_decodemv.c +++ b/libvpx/vp9/decoder/vp9_decodemv.c @@ -22,6 +22,8 @@ #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_decodeframe.h" +#include "vpx_dsp/vpx_dsp_common.h" + static PREDICTION_MODE read_intra_mode(vpx_reader *r, const vpx_prob *p) { return (PREDICTION_MODE)vpx_read_tree(r, vp9_intra_mode_tree, p); } @@ -87,7 +89,7 @@ static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) return read_selected_tx_size(cm, xd, max_tx_size, r); else - return MIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]); + return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]); } static int dec_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids, @@ -96,8 +98,8 @@ static int dec_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids, for (y = 0; y < y_mis; y++) for (x = 0; x < x_mis; x++) - segment_id = MIN(segment_id, - segment_ids[mi_offset + y * cm->mi_cols + x]); + segment_id = + VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); return segment_id; @@ -156,8 +158,8 @@ static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, const int bh = xd->plane[0].n4_h >> 1; // TODO(slavarnway): move x_mis, y_mis into xd ????? - const int x_mis = MIN(cm->mi_cols - mi_col, bw); - const int y_mis = MIN(cm->mi_rows - mi_row, bh); + const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw); + const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh); if (!seg->enabled) return 0; // Default for disabled segmentation @@ -212,8 +214,8 @@ static void read_intra_frame_mode_info(VP9_COMMON *const cm, const int bh = xd->plane[0].n4_h >> 1; // TODO(slavarnway): move x_mis, y_mis into xd ????? - const int x_mis = MIN(cm->mi_cols - mi_col, bw); - const int y_mis = MIN(cm->mi_rows - mi_row, bh); + const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw); + const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh); mbmi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r); mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); diff --git a/libvpx/vp9/decoder/vp9_decoder.c b/libvpx/vp9/decoder/vp9_decoder.c index 6734d002..4e88819b 100644 --- a/libvpx/vp9/decoder/vp9_decoder.c +++ b/libvpx/vp9/decoder/vp9_decoder.c @@ -126,6 +126,9 @@ VP9Decoder *vp9_decoder_create(BufferPool *const pool) { void vp9_decoder_remove(VP9Decoder *pbi) { int i; + if (!pbi) + return; + vpx_get_worker_interface()->end(&pbi->lf_worker); vpx_free(pbi->lf_worker.data1); vpx_free(pbi->tile_data); @@ -134,7 +137,6 @@ void vp9_decoder_remove(VP9Decoder *pbi) { vpx_get_worker_interface()->end(worker); } vpx_free(pbi->tile_worker_data); - vpx_free(pbi->tile_worker_info); vpx_free(pbi->tile_workers); if (pbi->num_tile_workers > 0) { diff --git a/libvpx/vp9/decoder/vp9_decoder.h b/libvpx/vp9/decoder/vp9_decoder.h index 915f9dc8..4a5188f8 100644 --- a/libvpx/vp9/decoder/vp9_decoder.h +++ b/libvpx/vp9/decoder/vp9_decoder.h @@ -36,8 +36,15 @@ typedef struct TileData { DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); } TileData; +typedef struct TileBuffer { + const uint8_t *data; + size_t size; + int col; // only used with multi-threaded decoding +} TileBuffer; + typedef struct TileWorkerData { - struct VP9Decoder *pbi; + const uint8_t *data_end; + int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive vpx_reader bit_reader; FRAME_COUNTS counts; DECLARE_ALIGNED(16, MACROBLOCKD, xd); @@ -65,7 +72,7 @@ typedef struct VP9Decoder { VPxWorker lf_worker; VPxWorker *tile_workers; TileWorkerData *tile_worker_data; - TileInfo *tile_worker_info; + TileBuffer tile_buffers[64]; int num_tile_workers; TileData *tile_data; diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c index e4412dc3..59123653 100644 --- a/libvpx/vp9/decoder/vp9_detokenize.c +++ b/libvpx/vp9/decoder/vp9_detokenize.c @@ -259,7 +259,7 @@ int vp9_decode_block_tokens(MACROBLOCKD *xd, const int16_t *const dequant = pd->seg_dequant[seg_id]; const int ctx = get_entropy_context(tx_size, pd->above_context + x, pd->left_context + y); - const int eob = decode_coefs(xd, pd->plane_type, + const int eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, dequant, ctx, sc->scan, sc->neighbors, r); dec_set_contexts(xd, pd, tx_size, eob > 0, x, y); diff --git a/libvpx/vp9/decoder/vp9_dthread.h b/libvpx/vp9/decoder/vp9_dthread.h index f6cdccd9..ba7c38a5 100644 --- a/libvpx/vp9/decoder/vp9_dthread.h +++ b/libvpx/vp9/decoder/vp9_dthread.h @@ -15,6 +15,10 @@ #include "vpx_util/vpx_thread.h" #include "vpx/internal/vpx_codec_internal.h" +#ifdef __cplusplus +extern "C" { +#endif + struct VP9Common; struct VP9Decoder; @@ -63,4 +67,8 @@ void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row); void vp9_frameworker_copy_context(VPxWorker *const dst_worker, VPxWorker *const src_worker); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_DECODER_VP9_DTHREAD_H_ diff --git a/libvpx/vp9/encoder/vp9_aq_complexity.c b/libvpx/vp9/encoder/vp9_aq_complexity.c index 15f227fb..30ec1911 100644 --- a/libvpx/vp9/encoder/vp9_aq_complexity.c +++ b/libvpx/vp9/encoder/vp9_aq_complexity.c @@ -10,6 +10,7 @@ #include <limits.h> #include <math.h> +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/system_state.h" #include "vp9/encoder/vp9_aq_complexity.h" @@ -117,8 +118,8 @@ void vp9_caq_select_segment(VP9_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs, const int mi_offset = mi_row * cm->mi_cols + mi_col; const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; - const int xmis = MIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]); - const int ymis = MIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]); + const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]); + const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]); int x, y; int i; unsigned char segment; @@ -136,7 +137,7 @@ void vp9_caq_select_segment(VP9_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs, vpx_clear_system_state(); low_var_thresh = (cpi->oxcf.pass == 2) - ? MAX(cpi->twopass.mb_av_energy, MIN_DEFAULT_LV_THRESH) + ? VPXMAX(cpi->twopass.mb_av_energy, MIN_DEFAULT_LV_THRESH) : DEFAULT_LV_THRESH; vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col); diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c index e6b36861..2cd89c0d 100644 --- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -11,6 +11,7 @@ #include <limits.h> #include <math.h> +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/system_state.h" #include "vp9/encoder/vp9_aq_cyclicrefresh.h" @@ -20,46 +21,9 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_segmentation.h" -struct CYCLIC_REFRESH { - // Percentage of blocks per frame that are targeted as candidates - // for cyclic refresh. - int percent_refresh; - // Maximum q-delta as percentage of base q. - int max_qdelta_perc; - // Superblock starting index for cycling through the frame. - int sb_index; - // Controls how long block will need to wait to be refreshed again, in - // excess of the cycle time, i.e., in the case of all zero motion, block - // will be refreshed every (100/percent_refresh + time_for_refresh) frames. - int time_for_refresh; - // Target number of (8x8) blocks that are set for delta-q. - int target_num_seg_blocks; - // Actual number of (8x8) blocks that were applied delta-q. - int actual_num_seg1_blocks; - int actual_num_seg2_blocks; - // RD mult. parameters for segment 1. - int rdmult; - // Cyclic refresh map. - signed char *map; - // Map of the last q a block was coded at. - uint8_t *last_coded_q_map; - // Thresholds applied to the projected rate/distortion of the coding block, - // when deciding whether block should be refreshed. - int64_t thresh_rate_sb; - int64_t thresh_dist_sb; - // Threshold applied to the motion vector (in units of 1/8 pel) of the - // coding block, when deciding whether block should be refreshed. - int16_t motion_thresh; - // Rate target ratio to set q delta. - double rate_ratio_qdelta; - // Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2. - int rate_boost_fac; - double low_content_avg; - int qindex_delta[3]; -}; - CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { size_t last_coded_q_map_size; + size_t consec_zero_mv_size; CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); if (cr == NULL) return NULL; @@ -78,12 +42,20 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { assert(MAXQ <= 255); memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size); + consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv); + cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size); + if (cr->consec_zero_mv == NULL) { + vpx_free(cr); + return NULL; + } + memset(cr->consec_zero_mv, 0, consec_zero_mv_size); return cr; } void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { vpx_free(cr->map); vpx_free(cr->last_coded_q_map); + vpx_free(cr->consec_zero_mv); vpx_free(cr); } @@ -195,7 +167,8 @@ int vp9_cyclic_refresh_rc_bits_per_mb(const VP9_COMP *cpi, int i, int num8x8bl = cm->MBs << 2; // Weight for segment prior to encoding: take the average of the target // number for the frame to be encoded and the actual from the previous frame. - double weight_segment = (double)((cr->target_num_seg_blocks + + int target_refresh = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100; + double weight_segment = (double)((target_refresh + cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) >> 1) / num8x8bl; // Compute delta-q corresponding to qindex i. @@ -223,8 +196,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; - const int xmis = MIN(cm->mi_cols - mi_col, bw); - const int ymis = MIN(cm->mi_rows - mi_row, bh); + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; const int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist, bsize); @@ -236,7 +209,7 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, // segment_id. if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) { mbmi->segment_id = refresh_this_block; - // Reset segment_id if will be skipped. + // Reset segment_id if it will be skipped. if (skip) mbmi->segment_id = CR_SEGMENT_ID_BASE; } @@ -265,14 +238,48 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, int map_offset = block_index + y * cm->mi_cols + x; cr->map[map_offset] = new_map_value; cpi->segmentation_map[map_offset] = mbmi->segment_id; + } +} + +void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi, + const MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + const VP9_COMMON *const cm = &cpi->common; + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + MV mv = mbmi->mv[0].as_mv; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); + const int block_index = mi_row * cm->mi_cols + mi_col; + int x, y; + for (y = 0; y < ymis; y++) + for (x = 0; x < xmis; x++) { + int map_offset = block_index + y * cm->mi_cols + x; // Inter skip blocks were clearly not coded at the current qindex, so // don't update the map for them. For cases where motion is non-zero or // the reference frame isn't the previous frame, the previous value in // the map for this spatial location is not entirely correct. - if (!is_inter_block(mbmi) || !skip) + if ((!is_inter_block(mbmi) || !mbmi->skip) && + mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) { cr->last_coded_q_map[map_offset] = clamp( cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ); + } else if (is_inter_block(mbmi) && mbmi->skip && + mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) { + cr->last_coded_q_map[map_offset] = VPXMIN( + clamp(cm->base_qindex + cr->qindex_delta[mbmi->segment_id], + 0, MAXQ), + cr->last_coded_q_map[map_offset]); + // Update the consecutive zero/low_mv count. + if (is_inter_block(mbmi) && (abs(mv.row) < 8 && abs(mv.col) < 8)) { + if (cr->consec_zero_mv[map_offset] < 255) + cr->consec_zero_mv[map_offset]++; + } else { + cr->consec_zero_mv[map_offset] = 0; + } } + } } // Update the actual number of blocks that were applied the segment delta q. @@ -389,6 +396,10 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { unsigned char *const seg_map = cpi->segmentation_map; int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame; int xmis, ymis, x, y; + int consec_zero_mv_thresh = 0; + int qindex_thresh = 0; + int count_sel = 0; + int count_tot = 0; memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols); sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; @@ -401,6 +412,12 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { assert(cr->sb_index < sbs_in_frame); i = cr->sb_index; cr->target_num_seg_blocks = 0; + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) + consec_zero_mv_thresh = 100; + qindex_thresh = + cpi->oxcf.content == VP9E_CONTENT_SCREEN + ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex) + : vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex); do { int sum_map = 0; // Get the mi_row/mi_col corresponding to superblock index i. @@ -408,18 +425,14 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { int sb_col_index = i - sb_row_index * sb_cols; int mi_row = sb_row_index * MI_BLOCK_SIZE; int mi_col = sb_col_index * MI_BLOCK_SIZE; - int qindex_thresh = - cpi->oxcf.content == VP9E_CONTENT_SCREEN - ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex) - : 0; assert(mi_row >= 0 && mi_row < cm->mi_rows); assert(mi_col >= 0 && mi_col < cm->mi_cols); bl_index = mi_row * cm->mi_cols + mi_col; // Loop through all 8x8 blocks in superblock and update map. - xmis = MIN(cm->mi_cols - mi_col, - num_8x8_blocks_wide_lookup[BLOCK_64X64]); - ymis = MIN(cm->mi_rows - mi_row, - num_8x8_blocks_high_lookup[BLOCK_64X64]); + xmis = + VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_64X64]); + ymis = + VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_64X64]); for (y = 0; y < ymis; y++) { for (x = 0; x < xmis; x++) { const int bl_index2 = bl_index + y * cm->mi_cols + x; @@ -427,8 +440,12 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { // for possible boost/refresh (segment 1). The segment id may get // reset to 0 later if block gets coded anything other than ZEROMV. if (cr->map[bl_index2] == 0) { - if (cr->last_coded_q_map[bl_index2] > qindex_thresh) + count_tot++; + if (cr->last_coded_q_map[bl_index2] > qindex_thresh || + cr->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) { sum_map++; + count_sel++; + } } else if (cr->map[bl_index2] < 0) { cr->map[bl_index2]++; } @@ -449,6 +466,9 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { } } while (cr->target_num_seg_blocks < block_count && i != cr->sb_index); cr->sb_index = i; + cr->reduce_refresh = 0; + if (count_sel < (3 * count_tot) >> 2) + cr->reduce_refresh = 1; } // Set cyclic refresh parameters. @@ -457,6 +477,8 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; cr->percent_refresh = 10; + if (cr->reduce_refresh) + cr->percent_refresh = 5; cr->max_qdelta_perc = 50; cr->time_for_refresh = 0; // Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4) @@ -476,7 +498,11 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { cr->rate_boost_fac = 10; } else { cr->motion_thresh = 32; - cr->rate_boost_fac = 17; + cr->rate_boost_fac = 15; + } + if (cpi->svc.spatial_layer_id > 0) { + cr->motion_thresh = 4; + cr->rate_boost_fac = 12; } } @@ -489,11 +515,10 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc); if (cm->current_video_frame == 0) cr->low_content_avg = 0.0; - // Don't apply refresh on key frame or enhancement layer frames. + // Don't apply refresh on key frame or temporal enhancement layer frames. if (!apply_cyclic_refresh || (cm->frame_type == KEY_FRAME) || - (cpi->svc.temporal_layer_id > 0) || - (cpi->svc.spatial_layer_id > 0)) { + (cpi->svc.temporal_layer_id > 0)) { // Set segmentation map to 0 and disable. unsigned char *const seg_map = cpi->segmentation_map; memset(seg_map, 0, cm->mi_rows * cm->mi_cols); @@ -501,6 +526,8 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { if (cm->frame_type == KEY_FRAME) { memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); + memset(cr->consec_zero_mv, 0, + cm->mi_rows * cm->mi_cols * sizeof(*cr->consec_zero_mv)); cr->sb_index = 0; } return; @@ -551,11 +578,16 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { // Set a more aggressive (higher) q delta for segment BOOST2. qindex_delta = compute_deltaq( - cpi, cm->base_qindex, MIN(CR_MAX_RATE_TARGET_RATIO, - 0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta)); + cpi, cm->base_qindex, + VPXMIN(CR_MAX_RATE_TARGET_RATIO, + 0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta)); cr->qindex_delta[2] = qindex_delta; vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta); + // Reset if resoluton change has occurred. + if (cpi->resize_pending != 0) + vp9_cyclic_refresh_reset_resize(cpi); + // Update the segmentation and refresh map. cyclic_refresh_update_map(cpi); } @@ -569,6 +601,8 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; memset(cr->map, 0, cm->mi_rows * cm->mi_cols); + memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols); + memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols); cr->sb_index = 0; cpi->refresh_golden_frame = 1; } diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h index 29d2a91b..a5b38138 100644 --- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h +++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -12,6 +12,7 @@ #ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ #define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ +#include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" #ifdef __cplusplus @@ -27,9 +28,49 @@ extern "C" { // Maximum rate target ratio for setting segment delta-qp. #define CR_MAX_RATE_TARGET_RATIO 4.0 +struct CYCLIC_REFRESH { + // Percentage of blocks per frame that are targeted as candidates + // for cyclic refresh. + int percent_refresh; + // Maximum q-delta as percentage of base q. + int max_qdelta_perc; + // Superblock starting index for cycling through the frame. + int sb_index; + // Controls how long block will need to wait to be refreshed again, in + // excess of the cycle time, i.e., in the case of all zero motion, block + // will be refreshed every (100/percent_refresh + time_for_refresh) frames. + int time_for_refresh; + // Target number of (8x8) blocks that are set for delta-q. + int target_num_seg_blocks; + // Actual number of (8x8) blocks that were applied delta-q. + int actual_num_seg1_blocks; + int actual_num_seg2_blocks; + // RD mult. parameters for segment 1. + int rdmult; + // Cyclic refresh map. + signed char *map; + // Map of the last q a block was coded at. + uint8_t *last_coded_q_map; + // Count on how many consecutive times a block uses ZER0MV for encoding. + uint8_t *consec_zero_mv; + // Thresholds applied to the projected rate/distortion of the coding block, + // when deciding whether block should be refreshed. + int64_t thresh_rate_sb; + int64_t thresh_dist_sb; + // Threshold applied to the motion vector (in units of 1/8 pel) of the + // coding block, when deciding whether block should be refreshed. + int16_t motion_thresh; + // Rate target ratio to set q delta. + double rate_ratio_qdelta; + // Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2. + int rate_boost_fac; + double low_content_avg; + int qindex_delta[3]; + int reduce_refresh; +}; + struct VP9_COMP; -struct CYCLIC_REFRESH; typedef struct CYCLIC_REFRESH CYCLIC_REFRESH; CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols); @@ -54,6 +95,11 @@ void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi, int mi_row, int mi_col, BLOCK_SIZE bsize, int64_t rate, int64_t dist, int skip); +void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi, + const MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, + BLOCK_SIZE bsize); + // Update the segmentation map, and related quantities: cyclic refresh map, // refresh sb_index, and target number of blocks to be refreshed. void vp9_cyclic_refresh_update__map(struct VP9_COMP *const cpi); diff --git a/libvpx/vp9/encoder/vp9_bitstream.c b/libvpx/vp9/encoder/vp9_bitstream.c index d0de095a..46155543 100644 --- a/libvpx/vp9/encoder/vp9_bitstream.c +++ b/libvpx/vp9/encoder/vp9_bitstream.c @@ -14,6 +14,7 @@ #include "vpx/vpx_encoder.h" #include "vpx_dsp/bitwriter_buffer.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem_ops.h" #include "vpx_ports/system_state.h" @@ -175,12 +176,10 @@ static void pack_mb_tokens(vpx_writer *w, const unsigned char *pb = b->prob; int v = e >> 1; int n = l; /* number of bits in v, assumed nonzero */ - int i = 0; do { const int bb = (v >> --n) & 1; - vpx_write(w, bb, pb[i >> 1]); - i = b->tree[i + bb]; + vpx_write(w, bb, *pb++); } while (n); } @@ -815,7 +814,7 @@ static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd, static void encode_txfm_probs(VP9_COMMON *cm, vpx_writer *w, FRAME_COUNTS *counts) { // Mode - vpx_write_literal(w, MIN(cm->tx_mode, ALLOW_32X32), 2); + vpx_write_literal(w, VPXMIN(cm->tx_mode, ALLOW_32X32), 2); if (cm->tx_mode >= ALLOW_32X32) vpx_write_bit(w, cm->tx_mode == TX_MODE_SELECT); @@ -968,14 +967,14 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { return total_size; } -static void write_display_size(const VP9_COMMON *cm, - struct vpx_write_bit_buffer *wb) { - const int scaling_active = cm->width != cm->display_width || - cm->height != cm->display_height; +static void write_render_size(const VP9_COMMON *cm, + struct vpx_write_bit_buffer *wb) { + const int scaling_active = cm->width != cm->render_width || + cm->height != cm->render_height; vpx_wb_write_bit(wb, scaling_active); if (scaling_active) { - vpx_wb_write_literal(wb, cm->display_width - 1, 16); - vpx_wb_write_literal(wb, cm->display_height - 1, 16); + vpx_wb_write_literal(wb, cm->render_width - 1, 16); + vpx_wb_write_literal(wb, cm->render_height - 1, 16); } } @@ -984,7 +983,7 @@ static void write_frame_size(const VP9_COMMON *cm, vpx_wb_write_literal(wb, cm->width - 1, 16); vpx_wb_write_literal(wb, cm->height - 1, 16); - write_display_size(cm, wb); + write_render_size(cm, wb); } static void write_frame_size_with_refs(VP9_COMP *cpi, @@ -1022,7 +1021,7 @@ static void write_frame_size_with_refs(VP9_COMP *cpi, vpx_wb_write_literal(wb, cm->height - 1, 16); } - write_display_size(cm, wb); + write_render_size(cm, wb); } static void write_sync_code(struct vpx_write_bit_buffer *wb) { @@ -1059,7 +1058,8 @@ static void write_bitdepth_colorspace_sampling( } vpx_wb_write_literal(wb, cm->color_space, 3); if (cm->color_space != VPX_CS_SRGB) { - vpx_wb_write_bit(wb, 0); // 0: [16, 235] (i.e. xvYCC), 1: [0, 255] + // 0: [16, 235] (i.e. xvYCC), 1: [0, 255] + vpx_wb_write_bit(wb, cm->color_range); if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { assert(cm->subsampling_x != 1 || cm->subsampling_y != 1); vpx_wb_write_bit(wb, cm->subsampling_x); diff --git a/libvpx/vp9/encoder/vp9_context_tree.c b/libvpx/vp9/encoder/vp9_context_tree.c index e87cccba..396ed3fe 100644 --- a/libvpx/vp9/encoder/vp9_context_tree.c +++ b/libvpx/vp9/encoder/vp9_context_tree.c @@ -30,13 +30,13 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk, for (i = 0; i < MAX_MB_PLANE; ++i) { for (k = 0; k < 3; ++k) { CHECK_MEM_ERROR(cm, ctx->coeff[i][k], - vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k]))); + vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k]))); CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k], - vpx_memalign(16, num_pix * sizeof(*ctx->qcoeff[i][k]))); + vpx_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k]))); CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k], - vpx_memalign(16, num_pix * sizeof(*ctx->dqcoeff[i][k]))); + vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k]))); CHECK_MEM_ERROR(cm, ctx->eobs[i][k], - vpx_memalign(16, num_blk * sizeof(*ctx->eobs[i][k]))); + vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k]))); ctx->coeff_pbuf[i][k] = ctx->coeff[i][k]; ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k]; ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k]; diff --git a/libvpx/vp9/encoder/vp9_context_tree.h b/libvpx/vp9/encoder/vp9_context_tree.h index ac244977..8e365ce3 100644 --- a/libvpx/vp9/encoder/vp9_context_tree.h +++ b/libvpx/vp9/encoder/vp9_context_tree.h @@ -14,6 +14,10 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/encoder/vp9_block.h" +#ifdef __cplusplus +extern "C" { +#endif + struct VP9_COMP; struct VP9Common; struct ThreadData; @@ -84,4 +88,8 @@ typedef struct PC_TREE { void vp9_setup_pc_tree(struct VP9Common *cm, struct ThreadData *td); void vp9_free_pc_tree(struct ThreadData *td); +#ifdef __cplusplus +} // extern "C" +#endif + #endif /* VP9_ENCODER_VP9_CONTEXT_TREE_H_ */ diff --git a/libvpx/vp9/encoder/vp9_denoiser.c b/libvpx/vp9/encoder/vp9_denoiser.c index 5f992856..8623b422 100644 --- a/libvpx/vp9/encoder/vp9_denoiser.c +++ b/libvpx/vp9/encoder/vp9_denoiser.c @@ -10,19 +10,18 @@ #include <assert.h> #include <limits.h> +#include <math.h> + #include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_scale/yv12config.h" #include "vpx/vpx_integer.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_denoiser.h" +#include "vp9/encoder/vp9_encoder.h" -/* The VP9 denoiser is a work-in-progress. It currently is only designed to work - * with speed 6, though it (inexplicably) seems to also work with speed 5 (one - * would need to modify the source code in vp9_pickmode.c and vp9_encoder.c to - * make the calls to the vp9_denoiser_* functions when in speed 5). - * - * The implementation is very similar to that of the VP8 denoiser. While +/* The VP9 denoiser is similar to that of the VP8 denoiser. While * choosing the motion vectors / reference frames, the denoiser is run, and if * it did not modify the signal to much, the denoised block is copied to the * signal. @@ -120,10 +119,10 @@ int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride, adj = adj_val[2]; } if (diff > 0) { - avg[c] = MIN(UINT8_MAX, sig[c] + adj); + avg[c] = VPXMIN(UINT8_MAX, sig[c] + adj); total_adj += adj; } else { - avg[c] = MAX(0, sig[c] - adj); + avg[c] = VPXMAX(0, sig[c] - adj); total_adj -= adj; } } @@ -160,13 +159,13 @@ int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride, // Diff positive means we made positive adjustment above // (in first try/attempt), so now make negative adjustment to bring // denoised signal down. - avg[c] = MAX(0, avg[c] - adj); + avg[c] = VPXMAX(0, avg[c] - adj); total_adj -= adj; } else { // Diff negative means we made negative adjustment above // (in first try/attempt), so now make positive adjustment to bring // denoised signal up. - avg[c] = MIN(UINT8_MAX, avg[c] + adj); + avg[c] = VPXMIN(UINT8_MAX, avg[c] + adj); total_adj += adj; } } @@ -194,8 +193,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx, - int *motion_magnitude - ) { + int *motion_magnitude, + int is_skin) { int mv_col, mv_row; int sse_diff = ctx->zeromv_sse - ctx->newmv_sse; MV_REFERENCE_FRAME frame; @@ -213,6 +212,9 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, saved_mbmi = *mbmi; + if (is_skin && *motion_magnitude > 16) + return COPY_BLOCK; + // If the best reference frame uses inter-prediction and there is enough of a // difference in sum-squared-error, use it. if (frame != INTRA_FRAME && @@ -312,18 +314,38 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx) { int motion_magnitude = 0; - VP9_DENOISER_DECISION decision = FILTER_BLOCK; + VP9_DENOISER_DECISION decision = COPY_BLOCK; YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME]; YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y; uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col); uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride, mi_row, mi_col); struct buf_2d src = mb->plane[0].src; + int is_skin = 0; + + if (bs <= BLOCK_16X16 && denoiser->denoising_on) { + // Take center pixel in block to determine is_skin. + const int y_width_shift = (4 << b_width_log2_lookup[bs]) >> 1; + const int y_height_shift = (4 << b_height_log2_lookup[bs]) >> 1; + const int uv_width_shift = y_width_shift >> 1; + const int uv_height_shift = y_height_shift >> 1; + const int stride = mb->plane[0].src.stride; + const int strideuv = mb->plane[1].src.stride; + const uint8_t ysource = + mb->plane[0].src.buf[y_height_shift * stride + y_width_shift]; + const uint8_t usource = + mb->plane[1].src.buf[uv_height_shift * strideuv + uv_width_shift]; + const uint8_t vsource = + mb->plane[2].src.buf[uv_height_shift * strideuv + uv_width_shift]; + is_skin = vp9_skin_pixel(ysource, usource, vsource); + } - decision = perform_motion_compensation(denoiser, mb, bs, - denoiser->increase_denoising, - mi_row, mi_col, ctx, - &motion_magnitude); + if (denoiser->denoising_on) + decision = perform_motion_compensation(denoiser, mb, bs, + denoiser->increase_denoising, + mi_row, mi_col, ctx, + &motion_magnitude, + is_skin); if (decision == FILTER_BLOCK) { decision = vp9_denoiser_filter(src.buf, src.stride, @@ -345,23 +367,24 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, } } -static void copy_frame(YV12_BUFFER_CONFIG dest, const YV12_BUFFER_CONFIG src) { +static void copy_frame(YV12_BUFFER_CONFIG * const dest, + const YV12_BUFFER_CONFIG * const src) { int r; - const uint8_t *srcbuf = src.y_buffer; - uint8_t *destbuf = dest.y_buffer; + const uint8_t *srcbuf = src->y_buffer; + uint8_t *destbuf = dest->y_buffer; - assert(dest.y_width == src.y_width); - assert(dest.y_height == src.y_height); + assert(dest->y_width == src->y_width); + assert(dest->y_height == src->y_height); - for (r = 0; r < dest.y_height; ++r) { - memcpy(destbuf, srcbuf, dest.y_width); - destbuf += dest.y_stride; - srcbuf += src.y_stride; + for (r = 0; r < dest->y_height; ++r) { + memcpy(destbuf, srcbuf, dest->y_width); + destbuf += dest->y_stride; + srcbuf += src->y_stride; } } -static void swap_frame_buffer(YV12_BUFFER_CONFIG *dest, - YV12_BUFFER_CONFIG *src) { +static void swap_frame_buffer(YV12_BUFFER_CONFIG * const dest, + YV12_BUFFER_CONFIG * const src) { uint8_t *tmp_buf = dest->y_buffer; assert(dest->y_width == src->y_width); assert(dest->y_height == src->y_height); @@ -374,27 +397,46 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame, - int refresh_last_frame) { - if (frame_type == KEY_FRAME) { + int refresh_last_frame, + int resized) { + // Copy source into denoised reference buffers on KEY_FRAME or + // if the just encoded frame was resized. + if (frame_type == KEY_FRAME || resized != 0) { int i; // Start at 1 so as not to overwrite the INTRA_FRAME for (i = 1; i < MAX_REF_FRAMES; ++i) - copy_frame(denoiser->running_avg_y[i], src); + copy_frame(&denoiser->running_avg_y[i], &src); return; } - /* For non key frames */ - if (refresh_alt_ref_frame) { - swap_frame_buffer(&denoiser->running_avg_y[ALTREF_FRAME], - &denoiser->running_avg_y[INTRA_FRAME]); - } - if (refresh_golden_frame) { - swap_frame_buffer(&denoiser->running_avg_y[GOLDEN_FRAME], - &denoiser->running_avg_y[INTRA_FRAME]); - } - if (refresh_last_frame) { - swap_frame_buffer(&denoiser->running_avg_y[LAST_FRAME], - &denoiser->running_avg_y[INTRA_FRAME]); + // If more than one refresh occurs, must copy frame buffer. + if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) + > 1) { + if (refresh_alt_ref_frame) { + copy_frame(&denoiser->running_avg_y[ALTREF_FRAME], + &denoiser->running_avg_y[INTRA_FRAME]); + } + if (refresh_golden_frame) { + copy_frame(&denoiser->running_avg_y[GOLDEN_FRAME], + &denoiser->running_avg_y[INTRA_FRAME]); + } + if (refresh_last_frame) { + copy_frame(&denoiser->running_avg_y[LAST_FRAME], + &denoiser->running_avg_y[INTRA_FRAME]); + } + } else { + if (refresh_alt_ref_frame) { + swap_frame_buffer(&denoiser->running_avg_y[ALTREF_FRAME], + &denoiser->running_avg_y[INTRA_FRAME]); + } + if (refresh_golden_frame) { + swap_frame_buffer(&denoiser->running_avg_y[GOLDEN_FRAME], + &denoiser->running_avg_y[INTRA_FRAME]); + } + if (refresh_last_frame) { + swap_frame_buffer(&denoiser->running_avg_y[LAST_FRAME], + &denoiser->running_avg_y[INTRA_FRAME]); + } } } @@ -456,15 +498,43 @@ int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, vp9_denoiser_free(denoiser); return 1; } + + fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height, + ssx, ssy, +#if CONFIG_VP9_HIGHBITDEPTH + use_highbitdepth, +#endif + border, legacy_byte_alignment); + if (fail) { + vp9_denoiser_free(denoiser); + return 1; + } #ifdef OUTPUT_YUV_DENOISED make_grayscale(&denoiser->running_avg_y[i]); #endif denoiser->increase_denoising = 0; denoiser->frame_buffer_initialized = 1; - + vp9_denoiser_init_noise_estimate(denoiser, width, height); return 0; } +void vp9_denoiser_init_noise_estimate(VP9_DENOISER *denoiser, + int width, + int height) { + // Denoiser is off by default, i.e., no denoising is performed. + // Noise level is measured periodically, and if observed to be above + // thresh_noise_estimate, then denoising is performed, i.e., denoising_on = 1. + denoiser->denoising_on = 0; + denoiser->noise_estimate = 0; + denoiser->noise_estimate_count = 0; + denoiser->thresh_noise_estimate = 20; + if (width * height >= 1920 * 1080) { + denoiser->thresh_noise_estimate = 70; + } else if (width * height >= 1280 * 720) { + denoiser->thresh_noise_estimate = 40; + } +} + void vp9_denoiser_free(VP9_DENOISER *denoiser) { int i; denoiser->frame_buffer_initialized = 0; @@ -475,6 +545,120 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser) { vpx_free_frame_buffer(&denoiser->running_avg_y[i]); } vpx_free_frame_buffer(&denoiser->mc_running_avg_y); + vpx_free_frame_buffer(&denoiser->last_source); +} + +void vp9_denoiser_update_noise_estimate(VP9_COMP *const cpi) { + const VP9_COMMON *const cm = &cpi->common; + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + int frame_period = 10; + int thresh_consec_zeromv = 8; + unsigned int thresh_sum_diff = 128; + int num_frames_estimate = 20; + int min_blocks_estimate = cm->mi_rows * cm->mi_cols >> 7; + // Estimate of noise level every frame_period frames. + // Estimate is between current source and last source. + if (cm->current_video_frame % frame_period != 0 || + cpi->denoiser.last_source.y_buffer == NULL) { + copy_frame(&cpi->denoiser.last_source, cpi->Source); + return; + } else { + int num_samples = 0; + uint64_t avg_est = 0; + int bsize = BLOCK_16X16; + static const unsigned char const_source[16] = { + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128}; + // Loop over sub-sample of 16x16 blocks of frame, and for blocks that have + // been encoded as zero/small mv at least x consecutive frames, compute + // the variance to update estimate of noise in the source. + const uint8_t *src_y = cpi->Source->y_buffer; + const int src_ystride = cpi->Source->y_stride; + const uint8_t *last_src_y = cpi->denoiser.last_source.y_buffer; + const int last_src_ystride = cpi->denoiser.last_source.y_stride; + const uint8_t *src_u = cpi->Source->u_buffer; + const uint8_t *src_v = cpi->Source->v_buffer; + const int src_uvstride = cpi->Source->uv_stride; + const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1; + const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1; + const int uv_width_shift = y_width_shift >> 1; + const int uv_height_shift = y_height_shift >> 1; + int mi_row, mi_col; + for (mi_row = 0; mi_row < cm->mi_rows; mi_row ++) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col ++) { + // 16x16 blocks, 1/4 sample of frame. + if (mi_row % 4 == 0 && mi_col % 4 == 0) { + int bl_index = mi_row * cm->mi_cols + mi_col; + int bl_index1 = bl_index + 1; + int bl_index2 = bl_index + cm->mi_cols; + int bl_index3 = bl_index2 + 1; + // Only consider blocks that are likely steady background. i.e, have + // been encoded as zero/low motion x (= thresh_consec_zeromv) frames + // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all + // 4 sub-blocks for 16x16 block. Also, avoid skin blocks. + const uint8_t ysource = + src_y[y_height_shift * src_ystride + y_width_shift]; + const uint8_t usource = + src_u[uv_height_shift * src_uvstride + uv_width_shift]; + const uint8_t vsource = + src_v[uv_height_shift * src_uvstride + uv_width_shift]; + int is_skin = vp9_skin_pixel(ysource, usource, vsource); + if (cr->consec_zero_mv[bl_index] > thresh_consec_zeromv && + cr->consec_zero_mv[bl_index1] > thresh_consec_zeromv && + cr->consec_zero_mv[bl_index2] > thresh_consec_zeromv && + cr->consec_zero_mv[bl_index3] > thresh_consec_zeromv && + !is_skin) { + // Compute variance. + unsigned int sse; + unsigned int variance = cpi->fn_ptr[bsize].vf(src_y, + src_ystride, + last_src_y, + last_src_ystride, + &sse); + // Only consider this block as valid for noise measurement if the + // average term (sse - variance = N * avg^{2}, N = 16X16) of the + // temporal residual is small (avoid effects from lighting change). + if ((sse - variance) < thresh_sum_diff) { + unsigned int sse2; + const unsigned int spatial_variance = + cpi->fn_ptr[bsize].vf(src_y, src_ystride, const_source, + 0, &sse2); + avg_est += variance / (10 + spatial_variance); + num_samples++; + } + } + } + src_y += 8; + last_src_y += 8; + src_u += 4; + src_v += 4; + } + src_y += (src_ystride << 3) - (cm->mi_cols << 3); + last_src_y += (last_src_ystride << 3) - (cm->mi_cols << 3); + src_u += (src_uvstride << 2) - (cm->mi_cols << 2); + src_v += (src_uvstride << 2) - (cm->mi_cols << 2); + } + // Update noise estimate if we have at a minimum number of block samples, + // and avg_est > 0 (avg_est == 0 can happen if the application inputs + // duplicate frames). + if (num_samples > min_blocks_estimate && avg_est > 0) { + // Normalize. + avg_est = (avg_est << 8) / num_samples; + // Update noise estimate. + cpi->denoiser.noise_estimate = (3 * cpi->denoiser.noise_estimate + + avg_est) >> 2; + cpi->denoiser.noise_estimate_count++; + if (cpi->denoiser.noise_estimate_count == num_frames_estimate) { + // Reset counter and check noise level condition. + cpi->denoiser.noise_estimate_count = 0; + if (cpi->denoiser.noise_estimate > cpi->denoiser.thresh_noise_estimate) + cpi->denoiser.denoising_on = 1; + else + cpi->denoiser.denoising_on = 0; + } + } + } + copy_frame(&cpi->denoiser.last_source, cpi->Source); } #ifdef OUTPUT_YUV_DENOISED diff --git a/libvpx/vp9/encoder/vp9_denoiser.h b/libvpx/vp9/encoder/vp9_denoiser.h index b2af792b..f8ad4acd 100644 --- a/libvpx/vp9/encoder/vp9_denoiser.h +++ b/libvpx/vp9/encoder/vp9_denoiser.h @@ -12,6 +12,7 @@ #define VP9_ENCODER_DENOISER_H_ #include "vp9/encoder/vp9_block.h" +#include "vp9/encoder/vp9_skin_detection.h" #include "vpx_scale/yv12config.h" #ifdef __cplusplus @@ -28,16 +29,24 @@ typedef enum vp9_denoiser_decision { typedef struct vp9_denoiser { YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES]; YV12_BUFFER_CONFIG mc_running_avg_y; + YV12_BUFFER_CONFIG last_source; int increase_denoising; int frame_buffer_initialized; + int denoising_on; + int noise_estimate; + int thresh_noise_estimate; + int noise_estimate_count; } VP9_DENOISER; +struct VP9_COMP; + void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame, - int refresh_last_frame); + int refresh_last_frame, + int resized); void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, @@ -67,6 +76,12 @@ static int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) { void vp9_denoiser_free(VP9_DENOISER *denoiser); +void vp9_denoiser_init_noise_estimate(VP9_DENOISER *denoiser, + int width, + int height); + +void vp9_denoiser_update_noise_estimate(struct VP9_COMP *const cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c index 295a7512..2333a139 100644 --- a/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/libvpx/vp9/encoder/vp9_encodeframe.c @@ -16,6 +16,7 @@ #include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #include "vpx_ports/vpx_timer.h" #include "vpx_ports/system_state.h" @@ -979,8 +980,8 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, const struct segmentation *const seg = &cm->seg; const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type]; const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type]; - const int x_mis = MIN(bw, cm->mi_cols - mi_col); - const int y_mis = MIN(bh, cm->mi_rows - mi_row); + const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); + const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; int w, h; @@ -1132,8 +1133,8 @@ static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, mbmi->sb_type = bsize; mbmi->mode = ZEROMV; - mbmi->tx_size = MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[tx_mode]); + mbmi->tx_size = + VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]); mbmi->skip = 1; mbmi->uv_mode = DC_PRED; mbmi->ref_frame[0] = LAST_FRAME; @@ -1496,7 +1497,7 @@ static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left, int cols_left, int *bh, int *bw) { if (rows_left <= 0 || cols_left <= 0) { - return MIN(bsize, BLOCK_8X8); + return VPXMIN(bsize, BLOCK_8X8); } else { for (; bsize > 0; bsize -= 3) { *bh = num_8x8_blocks_high_lookup[bsize]; @@ -1672,8 +1673,8 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td, const struct segmentation *const seg = &cm->seg; const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type]; const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type]; - const int x_mis = MIN(bw, cm->mi_cols - mi_col); - const int y_mis = MIN(bh, cm->mi_rows - mi_row); + const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); + const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); *(xd->mi[0]) = ctx->mic; *(x->mbmi_ext) = ctx->mbmi_ext; @@ -1738,10 +1739,12 @@ static void encode_b_rt(VP9_COMP *cpi, ThreadData *td, update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize); #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && output_enabled && - cpi->common.frame_type != KEY_FRAME) { + if (cpi->oxcf.noise_sensitivity > 0 && + output_enabled && + cpi->common.frame_type != KEY_FRAME && + cpi->resize_pending == 0) { vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, - MAX(BLOCK_8X8, bsize), ctx); + VPXMAX(BLOCK_8X8, bsize), ctx); } #endif @@ -2133,8 +2136,8 @@ static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, MODE_INFO *mi = mi_8x8[index+j]; BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0; bs_hist[sb_type]++; - *min_block_size = MIN(*min_block_size, sb_type); - *max_block_size = MAX(*max_block_size, sb_type); + *min_block_size = VPXMIN(*min_block_size, sb_type); + *max_block_size = VPXMAX(*max_block_size, sb_type); } index += xd->mi_stride; } @@ -2211,8 +2214,8 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, if (vp9_active_edge_sb(cpi, mi_row, mi_col)) { min_size = BLOCK_4X4; } else { - min_size = MIN(cpi->sf.rd_auto_partition_min_limit, - MIN(min_size, max_size)); + min_size = + VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size)); } // When use_square_partition_only is true, make sure at least one square @@ -2248,8 +2251,8 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, for (idx = 0; idx < mi_width; ++idx) { mi = prev_mi[idy * cm->mi_stride + idx]; bs = mi ? mi->mbmi.sb_type : bsize; - min_size = MIN(min_size, bs); - max_size = MAX(max_size, bs); + min_size = VPXMIN(min_size, bs); + max_size = VPXMAX(max_size, bs); } } } @@ -2258,8 +2261,8 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, for (idy = 0; idy < mi_height; ++idy) { mi = xd->mi[idy * cm->mi_stride - 1]; bs = mi ? mi->mbmi.sb_type : bsize; - min_size = MIN(min_size, bs); - max_size = MAX(max_size, bs); + min_size = VPXMIN(min_size, bs); + max_size = VPXMAX(max_size, bs); } } @@ -2267,8 +2270,8 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, for (idx = 0; idx < mi_width; ++idx) { mi = xd->mi[idx - cm->mi_stride]; bs = mi ? mi->mbmi.sb_type : bsize; - min_size = MIN(min_size, bs); - max_size = MAX(max_size, bs); + min_size = VPXMIN(min_size, bs); + max_size = VPXMAX(max_size, bs); } } @@ -2376,11 +2379,20 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, bsize >= BLOCK_8X8; int partition_vert_allowed = !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; - (void) *tp_orig; + + int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr; + int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr; + + (void)*tp_orig; assert(num_8x8_blocks_wide_lookup[bsize] == num_8x8_blocks_high_lookup[bsize]); + // Adjust dist breakout threshold according to the partition size. + dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + + b_height_log2_lookup[bsize]); + rate_breakout_thr *= num_pels_log2_lookup[bsize]; + vp9_rd_cost_init(&this_rdc); vp9_rd_cost_init(&sum_rdc); vp9_rd_cost_reset(&best_rdc); @@ -2409,9 +2421,11 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, force_vert_split); do_split &= bsize > min_size; } - if (cpi->sf.use_square_partition_only) { - partition_horz_allowed &= force_horz_split; - partition_vert_allowed &= force_vert_split; + + if (cpi->sf.use_square_partition_only && + bsize > cpi->sf.use_square_only_threshold) { + partition_horz_allowed &= force_horz_split; + partition_vert_allowed &= force_vert_split; } save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); @@ -2433,9 +2447,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, int mb_row = mi_row >> 1; int mb_col = mi_col >> 1; int mb_row_end = - MIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); + VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); int mb_col_end = - MIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); + VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); int r, c; // compute a complexity measure, basically measure inconsistency of motion @@ -2488,27 +2502,17 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } if (this_rdc.rdcost < best_rdc.rdcost) { - int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr; - int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr; - best_rdc = this_rdc; if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; - // Adjust dist breakout threshold according to the partition size. - dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + - b_height_log2_lookup[bsize]); - - rate_breakout_thr *= num_pels_log2_lookup[bsize]; - // If all y, u, v transform blocks in this partition are skippable, and // the dist & rate are within the thresholds, the partition search is // terminated for current branch of the partition search tree. - // The dist & rate thresholds are set to 0 at speed 0 to disable the - // early termination at that speed. - if (!x->e_mbd.lossless && - (ctx->skippable && best_rdc.dist < dist_breakout_thr && - best_rdc.rate < rate_breakout_thr)) { + if (!x->e_mbd.lossless && ctx->skippable && + ((best_rdc.dist < (dist_breakout_thr >> 2)) || + (best_rdc.dist < dist_breakout_thr && + best_rdc.rate < rate_breakout_thr))) { do_split = 0; do_rect = 0; } @@ -2524,9 +2528,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, int mb_row = mi_row >> 1; int mb_col = mi_col >> 1; int mb_row_end = - MIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); + VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); int mb_col_end = - MIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); + VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); int r, c; int skip = 1; @@ -2618,11 +2622,21 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_SPLIT; + + // Rate and distortion based partition search termination clause. + if (!x->e_mbd.lossless && + ((best_rdc.dist < (dist_breakout_thr >> 2)) || + (best_rdc.dist < dist_breakout_thr && + best_rdc.rate < rate_breakout_thr))) { + do_rect = 0; + } } } else { // skip rectangular partition test when larger block size // gives better rd cost - if (cpi->sf.less_rectangular_check) + if ((cpi->sf.less_rectangular_check) && + ((bsize > cpi->sf.use_square_only_threshold) || + (best_rdc.dist < dist_breakout_thr))) do_rect &= !partition_none_allowed; } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); @@ -2631,7 +2645,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, // PARTITION_HORZ if (partition_horz_allowed && (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) { - subsize = get_subsize(bsize, PARTITION_HORZ); + subsize = get_subsize(bsize, PARTITION_HORZ); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && @@ -2672,6 +2686,10 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_HORZ; + + if ((cpi->sf.less_rectangular_check) && + (bsize > cpi->sf.use_square_only_threshold)) + do_rect = 0; } } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); @@ -2679,7 +2697,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, // PARTITION_VERT if (partition_vert_allowed && (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) { - subsize = get_subsize(bsize, PARTITION_VERT); + subsize = get_subsize(bsize, PARTITION_VERT); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); @@ -2733,7 +2751,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, (void) best_rd; *rd_cost = best_rdc; - if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && pc_tree->index != 3) { int output_enabled = (bsize == BLOCK_64X64); @@ -3646,7 +3663,7 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) { const int last_stride = cpi->Last_Source->y_stride; // Pick cutoff threshold - const int cutoff = (MIN(cm->width, cm->height) >= 720) ? + const int cutoff = (VPXMIN(cm->width, cm->height) >= 720) ? (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100) : (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100); DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]); @@ -3947,7 +3964,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { #endif // If allowed, encoding tiles in parallel with one thread handling one tile. - if (MIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1) + if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1) vp9_encode_tiles_mt(cpi); else encode_tiles(cpi); @@ -4162,10 +4179,10 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, int plane; mbmi->skip = 1; for (plane = 0; plane < MAX_MB_PLANE; ++plane) - vp9_encode_intra_block_plane(x, MAX(bsize, BLOCK_8X8), plane); + vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane); if (output_enabled) sum_intra_stats(td->counts, mi); - vp9_tokenize_sb(cpi, td, t, !output_enabled, MAX(bsize, BLOCK_8X8)); + vp9_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8)); } else { int ref; const int is_compound = has_second_ref(mbmi); @@ -4178,12 +4195,14 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, &xd->block_refs[ref]->sf); } if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip) - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, + VPXMAX(bsize, BLOCK_8X8)); - vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); + vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, + VPXMAX(bsize, BLOCK_8X8)); - vp9_encode_sb(x, MAX(bsize, BLOCK_8X8)); - vp9_tokenize_sb(cpi, td, t, !output_enabled, MAX(bsize, BLOCK_8X8)); + vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8)); + vp9_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8)); } if (output_enabled) { @@ -4197,8 +4216,8 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TX_SIZE tx_size; // The new intra coding scheme requires no change of transform size if (is_inter_block(&mi->mbmi)) { - tx_size = MIN(tx_mode_to_biggest_tx_size[cm->tx_mode], - max_txsize_lookup[bsize]); + tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode], + max_txsize_lookup[bsize]); } else { tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4; } @@ -4210,5 +4229,7 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, } ++td->counts->tx.tx_totals[mbmi->tx_size]; ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])]; + if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_update_sb_postencode(cpi, mbmi, mi_row, mi_col, bsize); } } diff --git a/libvpx/vp9/encoder/vp9_encodemb.c b/libvpx/vp9/encoder/vp9_encodemb.c index 00e4c610..3c6a9283 100644 --- a/libvpx/vp9/encoder/vp9_encodemb.c +++ b/libvpx/vp9/encoder/vp9_encodemb.c @@ -99,7 +99,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); const int eob = p->eobs[block]; - const PLANE_TYPE type = pd->plane_type; + const PLANE_TYPE type = get_plane_type(plane); const int default_eob = 16 << (tx_size << 1); const int mul = 1 + (tx_size == TX_32X32); const int16_t *dequant_ptr = pd->dequant; @@ -789,7 +789,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, src_diff = &p->src_diff[4 * (j * diff_stride + i)]; if (tx_size == TX_4X4) { - tx_type = get_tx_type_4x4(pd->plane_type, xd, block); + tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block); scan_order = &vp9_scan_orders[TX_4X4][tx_type]; mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode; } else { @@ -797,7 +797,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, if (tx_size == TX_32X32) { scan_order = &vp9_default_scan_orders[TX_32X32]; } else { - tx_type = get_tx_type(pd->plane_type, xd); + tx_type = get_tx_type(get_plane_type(plane), xd); scan_order = &vp9_scan_orders[tx_size][tx_type]; } } diff --git a/libvpx/vp9/encoder/vp9_encodemv.c b/libvpx/vp9/encoder/vp9_encodemv.c index 7848c93a..e7196634 100644 --- a/libvpx/vp9/encoder/vp9_encodemv.c +++ b/libvpx/vp9/encoder/vp9_encodemv.c @@ -16,6 +16,8 @@ #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_encodemv.h" +#include "vpx_dsp/vpx_dsp_common.h" + static struct vp9_token mv_joint_encodings[MV_JOINTS]; static struct vp9_token mv_class_encodings[MV_CLASSES]; static struct vp9_token mv_fp_encodings[MV_FP_SIZE]; @@ -216,8 +218,8 @@ void vp9_encode_mv(VP9_COMP* cpi, vpx_writer* w, // If auto_mv_step_size is enabled then keep track of the largest // motion vector component used. if (cpi->sf.mv.auto_mv_step_size) { - unsigned int maxv = MAX(abs(mv->row), abs(mv->col)) >> 3; - cpi->max_mv_magnitude = MAX(maxv, cpi->max_mv_magnitude); + unsigned int maxv = VPXMAX(abs(mv->row), abs(mv->col)) >> 3; + cpi->max_mv_magnitude = VPXMAX(maxv, cpi->max_mv_magnitude); } } diff --git a/libvpx/vp9/encoder/vp9_encoder.c b/libvpx/vp9/encoder/vp9_encoder.c index 4654d63b..72eafec4 100644 --- a/libvpx/vp9/encoder/vp9_encoder.c +++ b/libvpx/vp9/encoder/vp9_encoder.c @@ -17,6 +17,7 @@ #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx/internal/vpx_psnr.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" #if CONFIG_INTERNAL_STATS #include "vpx_dsp/ssim.h" @@ -411,6 +412,8 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free_frame_buffer(&cpi->svc.empty_frame.img); memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame)); + + vp9_free_svc_cyclic_refresh(cpi); } static void save_coding_context(VP9_COMP *cpi) { @@ -686,7 +689,7 @@ static int alloc_context_buffers_ext(VP9_COMP *cpi) { return 0; } -void vp9_alloc_compressor_data(VP9_COMP *cpi) { +static void alloc_compressor_data(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; vp9_alloc_context_buffers(cm, cm->width, cm->height); @@ -772,10 +775,11 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { cm->use_highbitdepth = oxcf->use_highbitdepth; #endif cm->color_space = oxcf->color_space; + cm->color_range = oxcf->color_range; cm->width = oxcf->width; cm->height = oxcf->height; - vp9_alloc_compressor_data(cpi); + alloc_compressor_data(cpi); cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode; @@ -1452,11 +1456,14 @@ static void realloc_segmentation_maps(VP9_COMP *cpi) { void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; + int last_w = cpi->oxcf.width; + int last_h = cpi->oxcf.height; if (cm->profile != oxcf->profile) cm->profile = oxcf->profile; cm->bit_depth = oxcf->bit_depth; cm->color_space = oxcf->color_space; + cm->color_range = oxcf->color_range; if (cm->profile <= PROFILE_1) assert(cm->bit_depth == VPX_BITS_8); @@ -1490,8 +1497,8 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { // Under a configuration change, where maximum_buffer_size may change, // keep buffer level clipped to the maximum allowed buffer size. - rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size); - rc->buffer_level = MIN(rc->buffer_level, rc->maximum_buffer_size); + rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size); + rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size); // Set up frame rate and related parameters rate control values. vp9_new_framerate(cpi, cpi->framerate); @@ -1502,15 +1509,25 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { cm->interp_filter = cpi->sf.default_interp_filter; - cm->display_width = cpi->oxcf.width; - cm->display_height = cpi->oxcf.height; - cm->width = cpi->oxcf.width; - cm->height = cpi->oxcf.height; + if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) { + cm->render_width = cpi->oxcf.render_width; + cm->render_height = cpi->oxcf.render_height; + } else { + cm->render_width = cpi->oxcf.width; + cm->render_height = cpi->oxcf.height; + } + if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) { + cm->width = cpi->oxcf.width; + cm->height = cpi->oxcf.height; + } if (cpi->initial_width) { - if (cm->width > cpi->initial_width || cm->height > cpi->initial_height) { + int new_mi_size = 0; + vp9_set_mb_mi(cm, cm->width, cm->height); + new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows); + if (cm->mi_alloc_size < new_mi_size) { vp9_free_context_buffers(cm); - vp9_alloc_compressor_data(cpi); + alloc_compressor_data(cpi); realloc_segmentation_maps(cpi); cpi->initial_width = cpi->initial_height = 0; } @@ -1918,14 +1935,15 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V)) void vp9_remove_compressor(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; + VP9_COMMON *cm; unsigned int i; int t; if (!cpi) return; - if (cpi && (cm->current_video_frame > 0)) { + cm = &cpi->common; + if (cm->current_video_frame > 0) { #if CONFIG_INTERNAL_STATS vpx_clear_system_state(); @@ -2247,42 +2265,6 @@ typedef struct { uint32_t samples[4]; // total/y/u/v } PSNR_STATS; -static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, - PSNR_STATS *psnr) { - static const double peak = 255.0; - const int widths[3] = { - a->y_crop_width, a->uv_crop_width, a->uv_crop_width}; - const int heights[3] = { - a->y_crop_height, a->uv_crop_height, a->uv_crop_height}; - const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer}; - const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride}; - const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer}; - const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride}; - int i; - uint64_t total_sse = 0; - uint32_t total_samples = 0; - - for (i = 0; i < 3; ++i) { - const int w = widths[i]; - const int h = heights[i]; - const uint32_t samples = w * h; - const uint64_t sse = get_sse(a_planes[i], a_strides[i], - b_planes[i], b_strides[i], - w, h); - psnr->sse[1 + i] = sse; - psnr->samples[1 + i] = samples; - psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse); - - total_sse += sse; - total_samples += samples; - } - - psnr->sse[0] = total_sse; - psnr->samples[0] = total_samples; - psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak, - (double)total_sse); -} - #if CONFIG_VP9_HIGHBITDEPTH static void calc_highbd_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, @@ -2335,6 +2317,44 @@ static void calc_highbd_psnr(const YV12_BUFFER_CONFIG *a, psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak, (double)total_sse); } + +#else // !CONFIG_VP9_HIGHBITDEPTH + +static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, + PSNR_STATS *psnr) { + static const double peak = 255.0; + const int widths[3] = { + a->y_crop_width, a->uv_crop_width, a->uv_crop_width}; + const int heights[3] = { + a->y_crop_height, a->uv_crop_height, a->uv_crop_height}; + const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer}; + const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride}; + const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer}; + const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride}; + int i; + uint64_t total_sse = 0; + uint32_t total_samples = 0; + + for (i = 0; i < 3; ++i) { + const int w = widths[i]; + const int h = heights[i]; + const uint32_t samples = w * h; + const uint64_t sse = get_sse(a_planes[i], a_strides[i], + b_planes[i], b_strides[i], + w, h); + psnr->sse[1 + i] = sse; + psnr->samples[1 + i] = samples; + psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse); + + total_sse += sse; + total_samples += samples; + } + + psnr->sse[0] = total_sse; + psnr->samples[0] = total_samples; + psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak, + (double)total_sse); +} #endif // CONFIG_VP9_HIGHBITDEPTH static void generate_psnr_packet(VP9_COMP *cpi) { @@ -2615,7 +2635,7 @@ static int scale_down(VP9_COMP *cpi, int q) { if (rc->frame_size_selector == UNSCALED && q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) { const int max_size_thresh = (int)(rate_thresh_mult[SCALE_STEP1] - * MAX(rc->this_frame_target, rc->avg_frame_bandwidth)); + * VPXMAX(rc->this_frame_target, rc->avg_frame_bandwidth)); scale = rc->projected_frame_size > max_size_thresh ? 1 : 0; } return scale; @@ -2736,7 +2756,8 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { cpi->common.frame_type, cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame, - cpi->refresh_last_frame); + cpi->refresh_last_frame, + cpi->resize_pending); } #endif } @@ -2744,6 +2765,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { MACROBLOCKD *xd = &cpi->td.mb.e_mbd; struct loopfilter *lf = &cm->lf; + if (xd->lossless) { lf->filter_level = 0; } else { @@ -2760,6 +2782,8 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { } if (lf->filter_level > 0) { + vp9_build_mask_frame(cm, lf->filter_level, 0); + if (cpi->num_workers > 1) vp9_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane, lf->filter_level, 0, 0, @@ -2998,7 +3022,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { static void set_mv_search_params(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; - const unsigned int max_mv_def = MIN(cm->width, cm->height); + const unsigned int max_mv_def = VPXMIN(cm->width, cm->height); // Default based on max resolution. cpi->mv_step_param = vp9_init_search_range(max_mv_def); @@ -3013,8 +3037,8 @@ static void set_mv_search_params(VP9_COMP *cpi) { // Allow mv_steps to correspond to twice the max mv magnitude found // in the previous frame, capped by the default max_mv_magnitude based // on resolution. - cpi->mv_step_param = - vp9_init_search_range(MIN(max_mv_def, 2 * cpi->max_mv_magnitude)); + cpi->mv_step_param = vp9_init_search_range( + VPXMIN(max_mv_def, 2 * cpi->max_mv_magnitude)); } cpi->max_mv_magnitude = 0; } @@ -3076,6 +3100,21 @@ static void set_size_dependent_vars(VP9_COMP *cpi, int *q, #endif // CONFIG_VP9_POSTPROC } +#if CONFIG_VP9_TEMPORAL_DENOISING +static void setup_denoiser_buffer(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if (cpi->oxcf.noise_sensitivity > 0 && + !cpi->denoiser.frame_buffer_initialized) { + vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_ENC_BORDER_IN_PIXELS); + } +} +#endif + static void init_motion_estimation(VP9_COMP *cpi) { int y_stride = cpi->scaled_source.y_stride; @@ -3107,26 +3146,30 @@ static void set_frame_size(VP9_COMP *cpi) { if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR && !cpi->use_svc && - oxcf->resize_mode == RESIZE_DYNAMIC) { - if (cpi->resize_pending == 1) { - oxcf->scaled_frame_width = - (cm->width * cpi->resize_scale_num) / cpi->resize_scale_den; - oxcf->scaled_frame_height = - (cm->height * cpi->resize_scale_num) /cpi->resize_scale_den; - } else if (cpi->resize_pending == -1) { - // Go back up to original size. - oxcf->scaled_frame_width = oxcf->width; - oxcf->scaled_frame_height = oxcf->height; - } - if (cpi->resize_pending != 0) { - // There has been a change in frame size. - vp9_set_size_literal(cpi, - oxcf->scaled_frame_width, - oxcf->scaled_frame_height); - - // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed. - set_mv_search_params(cpi); - } + oxcf->resize_mode == RESIZE_DYNAMIC && + cpi->resize_pending != 0) { + oxcf->scaled_frame_width = + (oxcf->width * cpi->resize_scale_num) / cpi->resize_scale_den; + oxcf->scaled_frame_height = + (oxcf->height * cpi->resize_scale_num) /cpi->resize_scale_den; + // There has been a change in frame size. + vp9_set_size_literal(cpi, + oxcf->scaled_frame_width, + oxcf->scaled_frame_height); + + // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed. + set_mv_search_params(cpi); + +#if CONFIG_VP9_TEMPORAL_DENOISING + // Reset the denoiser on the resized frame. + if (cpi->oxcf.noise_sensitivity > 0) { + vp9_denoiser_free(&(cpi->denoiser)); + setup_denoiser_buffer(cpi); + // Dynamic resize is only triggered for non-SVC, so we can force + // golden frame update here as temporary fix to denoiser. + cpi->refresh_golden_frame = 1; + } +#endif } if ((oxcf->pass == 2) && @@ -3193,11 +3236,26 @@ static void encode_without_recode_loop(VP9_COMP *cpi, cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source, - &cpi->scaled_source); - if (cpi->unscaled_last_source != NULL) + &cpi->scaled_source, + (cpi->oxcf.pass == 0)); + + // Avoid scaling last_source unless its needed. + // Last source is currently only used for screen-content mode, + // or if partition_search_type == SOURCE_VAR_BASED_PARTITION. + if (cpi->unscaled_last_source != NULL && + (cpi->oxcf.content == VP9E_CONTENT_SCREEN || + cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION)) cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source, - &cpi->scaled_last_source); + &cpi->scaled_last_source, + (cpi->oxcf.pass == 0)); + +#if CONFIG_VP9_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0 && + cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + vp9_denoiser_update_noise_estimate(cpi); + } +#endif if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && @@ -3270,6 +3328,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->frame_type != KEY_FRAME && !cpi->use_svc && + cpi->ext_refresh_frame_flags_pending == 0 && (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR)) vp9_cyclic_refresh_check_golden_update(cpi); @@ -3328,11 +3387,13 @@ static void encode_with_recode_loop(VP9_COMP *cpi, } cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source, - &cpi->scaled_source); + &cpi->scaled_source, + (cpi->oxcf.pass == 0)); if (cpi->unscaled_last_source != NULL) cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source, - &cpi->scaled_last_source); + &cpi->scaled_last_source, + (cpi->oxcf.pass == 0)); if (frame_is_intra_only(cm) == 0) { if (loop_count > 0) { @@ -3414,7 +3475,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // Adjust Q q = (int)((q * high_err_target) / kf_err); - q = MIN(q, (q_high + q_low) >> 1); + q = VPXMIN(q, (q_high + q_low) >> 1); } else if (kf_err < low_err_target && rc->projected_frame_size >= frame_under_shoot_limit) { // The key frame is much better than the previous frame @@ -3423,7 +3484,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // Adjust Q q = (int)((q * low_err_target) / kf_err); - q = MIN(q, (q_high + q_low + 1) >> 1); + q = VPXMIN(q, (q_high + q_low + 1) >> 1); } // Clamp Q to upper and lower limits: @@ -3432,7 +3493,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, loop = q != last_q; } else if (recode_loop_test( cpi, frame_over_shoot_limit, frame_under_shoot_limit, - q, MAX(q_high, top_index), bottom_index)) { + q, VPXMAX(q_high, top_index), bottom_index)) { // Is the projected frame size out of range and are we allowed // to attempt to recode. int last_q = q; @@ -3474,12 +3535,12 @@ static void encode_with_recode_loop(VP9_COMP *cpi, vp9_rc_update_rate_correction_factors(cpi); q = vp9_rc_regulate_q(cpi, rc->this_frame_target, - bottom_index, MAX(q_high, top_index)); + bottom_index, VPXMAX(q_high, top_index)); while (q < q_low && retries < 10) { vp9_rc_update_rate_correction_factors(cpi); q = vp9_rc_regulate_q(cpi, rc->this_frame_target, - bottom_index, MAX(q_high, top_index)); + bottom_index, VPXMAX(q_high, top_index)); retries++; } } @@ -3578,26 +3639,22 @@ static void set_ext_overrides(VP9_COMP *cpi) { cpi->refresh_last_frame = cpi->ext_refresh_last_frame; cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame; cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame; - cpi->ext_refresh_frame_flags_pending = 0; } } YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, - YV12_BUFFER_CONFIG *scaled) { + YV12_BUFFER_CONFIG *scaled, + int use_normative_scaler) { if (cm->mi_cols * MI_SIZE != unscaled->y_width || cm->mi_rows * MI_SIZE != unscaled->y_height) { #if CONFIG_VP9_HIGHBITDEPTH - if (unscaled->y_width == (scaled->y_width << 1) && - unscaled->y_height == (scaled->y_height << 1)) + if (use_normative_scaler) scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth); else scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth); #else - // Use the faster normative (convolve8) scaling filter: for now only for - // scaling factor of 2. - if (unscaled->y_width == (scaled->y_width << 1) && - unscaled->y_height == (scaled->y_height << 1)) + if (use_normative_scaler) scale_and_extend_frame(unscaled, scaled); else scale_and_extend_frame_nonnormative(unscaled, scaled); @@ -3747,6 +3804,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (vp9_rc_drop_frame(cpi)) { vp9_rc_postencode_update_drop_frame(cpi); ++cm->current_video_frame; + cpi->ext_refresh_frame_flags_pending = 0; return; } } @@ -3799,6 +3857,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cpi->refresh_last_frame = 1; cm->frame_to_show = get_frame_new_buffer(cm); + cm->frame_to_show->color_space = cm->color_space; + cm->frame_to_show->color_range = cm->color_range; + cm->frame_to_show->render_width = cm->render_width; + cm->frame_to_show->render_height = cm->render_height; // Pick the loop filter level for the frame. loopfilter_frame(cpi, cm); @@ -3828,6 +3890,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } } + cpi->ext_refresh_frame_flags_pending = 0; + if (cpi->refresh_golden_frame == 1) cpi->frame_flags |= FRAMEFLAGS_GOLDEN; else @@ -3953,21 +4017,6 @@ static void check_initial_width(VP9_COMP *cpi, } } -#if CONFIG_VP9_TEMPORAL_DENOISING -static void setup_denoiser_buffer(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if (cpi->oxcf.noise_sensitivity > 0 && - !cpi->denoiser.frame_buffer_initialized) { - vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS); - } -} -#endif - int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time) { @@ -4053,8 +4102,8 @@ static void adjust_frame_rate(VP9_COMP *cpi, // Average this frame's rate into the last second's average // frame rate. If we haven't seen 1 second yet, then average // over the whole interval seen. - const double interval = MIN((double)(source->ts_end - - cpi->first_time_stamp_ever), 10000000.0); + const double interval = VPXMIN( + (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0); double avg_duration = 10000000.0 / cpi->framerate; avg_duration *= (interval - avg_duration + this_duration); avg_duration /= interval; @@ -4118,7 +4167,7 @@ static void adjust_image_stat(double y, double u, double v, double all, s->stat[U] += u; s->stat[V] += v; s->stat[ALL] += all; - s->worst = MIN(s->worst, all); + s->worst = VPXMIN(s->worst, all); } #endif // CONFIG_INTERNAL_STATS @@ -4237,7 +4286,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // non-zero spatial layer, it should not be an intra picture. // TODO(Won Kap): this needs to change if per-layer intra frame is // allowed. - if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->svc.spatial_layer_id) { + if ((source->flags & VPX_EFLAG_FORCE_KF) && + cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) { source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF); } @@ -4448,7 +4498,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, frame_ssim2 = vpx_calc_ssim(orig, recon, &weight); #endif // CONFIG_VP9_HIGHBITDEPTH - cpi->worst_ssim= MIN(cpi->worst_ssim, frame_ssim2); + cpi->worst_ssim = VPXMIN(cpi->worst_ssim, frame_ssim2); cpi->summed_quality += frame_ssim2 * weight; cpi->summed_weights += weight; @@ -4485,7 +4535,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->Source->y_buffer, cpi->Source->y_stride, cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride, cpi->Source->y_width, cpi->Source->y_height); - cpi->worst_blockiness = MAX(cpi->worst_blockiness, frame_blockiness); + cpi->worst_blockiness = + VPXMAX(cpi->worst_blockiness, frame_blockiness); cpi->total_blockiness += frame_blockiness; } } @@ -4505,8 +4556,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, double consistency = vpx_sse_to_psnr(samples, peak, (double)cpi->total_inconsistency); if (consistency > 0.0) - cpi->worst_consistency = MIN(cpi->worst_consistency, - consistency); + cpi->worst_consistency = + VPXMIN(cpi->worst_consistency, consistency); cpi->total_inconsistency += this_inconsistency; } } @@ -4618,8 +4669,10 @@ int vp9_set_internal_size(VP9_COMP *cpi, // always go to the next whole number cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs; cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs; - assert(cm->width <= cpi->initial_width); - assert(cm->height <= cpi->initial_height); + if (cm->current_video_frame) { + assert(cm->width <= cpi->initial_width); + assert(cm->height <= cpi->initial_height); + } update_frame_size(cpi); diff --git a/libvpx/vp9/encoder/vp9_encoder.h b/libvpx/vp9/encoder/vp9_encoder.h index c10abd20..159c03aa 100644 --- a/libvpx/vp9/encoder/vp9_encoder.h +++ b/libvpx/vp9/encoder/vp9_encoder.h @@ -238,6 +238,9 @@ typedef struct VP9EncoderConfig { int use_highbitdepth; #endif vpx_color_space_t color_space; + vpx_color_range_t color_range; + int render_width; + int render_height; VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode; } VP9EncoderConfig; @@ -605,8 +608,6 @@ int64_t vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); #endif // CONFIG_VP9_HIGHBITDEPTH -void vp9_alloc_compressor_data(VP9_COMP *cpi); - void vp9_scale_references(VP9_COMP *cpi); void vp9_update_reference_frames(VP9_COMP *cpi); @@ -615,7 +616,8 @@ void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv); YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, - YV12_BUFFER_CONFIG *scaled); + YV12_BUFFER_CONFIG *scaled, + int use_normative_scaler); void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); diff --git a/libvpx/vp9/encoder/vp9_ethread.c b/libvpx/vp9/encoder/vp9_ethread.c index 00025b7a..ad25712b 100644 --- a/libvpx/vp9/encoder/vp9_ethread.c +++ b/libvpx/vp9/encoder/vp9_ethread.c @@ -11,6 +11,7 @@ #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_ethread.h" +#include "vpx_dsp/vpx_dsp_common.h" static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { int i, j, k, l, m, n; @@ -67,7 +68,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - const int num_workers = MIN(cpi->oxcf.max_threads, tile_cols); + const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp9_init_tile_data(cpi); @@ -80,7 +81,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) { // resolution. if (cpi->use_svc) { int max_tile_cols = get_max_tile_cols(cpi); - allocated_workers = MIN(cpi->oxcf.max_threads, max_tile_cols); + allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); } CHECK_MEM_ERROR(cm, cpi->workers, @@ -191,7 +192,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) { // Accumulate counters. if (i < cpi->num_workers - 1) { - vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0); + vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } diff --git a/libvpx/vp9/encoder/vp9_ethread.h b/libvpx/vp9/encoder/vp9_ethread.h index e87c50bc..1efa4dcd 100644 --- a/libvpx/vp9/encoder/vp9_ethread.h +++ b/libvpx/vp9/encoder/vp9_ethread.h @@ -11,6 +11,10 @@ #ifndef VP9_ENCODER_VP9_ETHREAD_H_ #define VP9_ENCODER_VP9_ETHREAD_H_ +#ifdef __cplusplus +extern "C" { +#endif + struct VP9_COMP; struct ThreadData; @@ -22,4 +26,8 @@ typedef struct EncWorkerData { void vp9_encode_tiles_mt(struct VP9_COMP *cpi); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_ETHREAD_H_ diff --git a/libvpx/vp9/encoder/vp9_extend.c b/libvpx/vp9/encoder/vp9_extend.c index 0c304dc5..92585b82 100644 --- a/libvpx/vp9/encoder/vp9_extend.c +++ b/libvpx/vp9/encoder/vp9_extend.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" @@ -111,10 +112,12 @@ void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src, // Motion estimation may use src block variance with the block size up // to 64x64, so the right and bottom need to be extended to 64 multiple // or up to 16, whichever is greater. - const int er_y = MAX(src->y_width + 16, ALIGN_POWER_OF_TWO(src->y_width, 6)) - - src->y_crop_width; - const int eb_y = MAX(src->y_height + 16, ALIGN_POWER_OF_TWO(src->y_height, 6)) - - src->y_crop_height; + const int er_y = + VPXMAX(src->y_width + 16, ALIGN_POWER_OF_TWO(src->y_width, 6)) - + src->y_crop_width; + const int eb_y = + VPXMAX(src->y_height + 16, ALIGN_POWER_OF_TWO(src->y_height, 6)) - + src->y_crop_height; const int uv_width_subsampling = (src->uv_width != src->y_width); const int uv_height_subsampling = (src->uv_height != src->y_height); const int et_uv = et_y >> uv_height_subsampling; diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c index e0c5966e..30738b52 100644 --- a/libvpx/vp9/encoder/vp9_firstpass.c +++ b/libvpx/vp9/encoder/vp9_firstpass.c @@ -15,6 +15,7 @@ #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" @@ -381,7 +382,7 @@ static unsigned int highbd_get_prediction_error(BLOCK_SIZE bsize, // for first pass test. static int get_search_range(const VP9_COMP *cpi) { int sr = 0; - const int dim = MIN(cpi->initial_width, cpi->initial_height); + const int dim = VPXMIN(cpi->initial_width, cpi->initial_height); while ((dim << sr) < MAX_FULL_PEL_VAL) ++sr; @@ -596,7 +597,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { (cpi->ref_frame_flags & VP9_GOLD_FLAG) ? GOLDEN_FRAME : NONE); cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source, - &cpi->scaled_source); + &cpi->scaled_source, 0); } vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); @@ -1024,7 +1025,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { // Exclude any image dead zone if (image_data_start_row > 0) { intra_skip_count = - MAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2)); + VPXMAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2)); } { @@ -1161,7 +1162,7 @@ static double calc_correction_factor(double err_per_mb, // Adjustment based on actual quantizer to power term. const double power_term = - MIN(vp9_convert_qindex_to_q(q, bit_depth) * 0.01 + pt_low, pt_high); + VPXMIN(vp9_convert_qindex_to_q(q, bit_depth) * 0.01 + pt_low, pt_high); // Calculate correction factor. if (power_term < 1.0) @@ -1182,19 +1183,22 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi, double group_weight_factor) { const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; + // Clamp the target rate to VBR min / max limts. + const int target_rate = + vp9_rc_clamp_pframe_target_size(cpi, section_target_bandwidth); inactive_zone = fclamp(inactive_zone, 0.0, 1.0); - if (section_target_bandwidth <= 0) { + if (target_rate <= 0) { return rc->worst_quality; // Highest value allowed } else { const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs : cpi->common.MBs; - const int active_mbs = MAX(1, num_mbs - (int)(num_mbs * inactive_zone)); + const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone)); const double av_err_per_mb = section_err / active_mbs; const double speed_term = 1.0 + 0.04 * oxcf->speed; const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR; - const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth << + const int target_norm_bits_per_mb = ((uint64_t)target_rate << BPER_MB_NORMBITS) / active_mbs; int q; @@ -1223,7 +1227,7 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi, // Restriction on active max q for constrained quality mode. if (cpi->oxcf.rc_mode == VPX_CQ) - q = MAX(q, oxcf->cq_level); + q = VPXMAX(q, oxcf->cq_level); return q; } } @@ -1233,11 +1237,11 @@ static void setup_rf_level_maxq(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; for (i = INTER_NORMAL; i < RATE_FACTOR_LEVELS; ++i) { int qdelta = vp9_frame_type_qdelta(cpi, i, rc->worst_quality); - rc->rf_level_maxq[i] = MAX(rc->worst_quality + qdelta, rc->best_quality); + rc->rf_level_maxq[i] = VPXMAX(rc->worst_quality + qdelta, rc->best_quality); } } -void vp9_init_subsampling(VP9_COMP *cpi) { +static void init_subsampling(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; const int w = cm->width; @@ -1332,7 +1336,7 @@ void vp9_init_second_pass(VP9_COMP *cpi) { twopass->last_kfgroup_zeromotion_pct = 100; if (oxcf->resize_mode != RESIZE_NONE) { - vp9_init_subsampling(cpi); + init_subsampling(cpi); } } @@ -1364,12 +1368,12 @@ static double get_sr_decay_rate(const VP9_COMP *cpi, if ((sr_diff > LOW_SR_DIFF_TRHESH)) { - sr_diff = MIN(sr_diff, SR_DIFF_MAX); + sr_diff = VPXMIN(sr_diff, SR_DIFF_MAX); sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) - (MOTION_AMP_PART * motion_amplitude_factor) - (INTRA_PART * modified_pcnt_intra); } - return MAX(sr_decay, MIN(DEFAULT_DECAY_LIMIT, modified_pct_inter)); + return VPXMAX(sr_decay, VPXMIN(DEFAULT_DECAY_LIMIT, modified_pct_inter)); } // This function gives an estimate of how badly we believe the prediction @@ -1379,7 +1383,7 @@ static double get_zero_motion_factor(const VP9_COMP *cpi, const double zero_motion_pct = frame->pcnt_inter - frame->pcnt_motion; double sr_decay = get_sr_decay_rate(cpi, frame); - return MIN(sr_decay, zero_motion_pct); + return VPXMIN(sr_decay, zero_motion_pct); } #define ZM_POWER_FACTOR 0.75 @@ -1391,8 +1395,8 @@ static double get_prediction_decay_rate(const VP9_COMP *cpi, (0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion), ZM_POWER_FACTOR)); - return MAX(zero_motion_factor, - (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor))); + return VPXMAX(zero_motion_factor, + (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor))); } // Function to test for a condition where a complex transition is followed @@ -1483,12 +1487,12 @@ static double calc_frame_boost(VP9_COMP *cpi, const double lq = vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth); - const double boost_q_correction = MIN((0.5 + (lq * 0.015)), 1.5); + const double boost_q_correction = VPXMIN((0.5 + (lq * 0.015)), 1.5); int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs : cpi->common.MBs; // Correct for any inactive region in the image - num_mbs = (int)MAX(1, num_mbs * calculate_active_area(cpi, this_frame)); + num_mbs = (int)VPXMAX(1, num_mbs * calculate_active_area(cpi, this_frame)); // Underlying boost factor is based on inter error ratio. frame_boost = (BASELINE_ERR_PER_MB * num_mbs) / @@ -1504,7 +1508,7 @@ static double calc_frame_boost(VP9_COMP *cpi, else frame_boost += frame_boost * (this_frame_mv_in_out / 2.0); - return MIN(frame_boost, max_boost * boost_q_correction); + return VPXMIN(frame_boost, max_boost * boost_q_correction); } static int calc_arf_boost(VP9_COMP *cpi, int offset, @@ -1593,7 +1597,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, arf_boost = (*f_boost + *b_boost); if (arf_boost < ((b_frames + f_frames) * 20)) arf_boost = ((b_frames + f_frames) * 20); - arf_boost = MAX(arf_boost, MIN_ARF_GF_BOOST); + arf_boost = VPXMAX(arf_boost, MIN_ARF_GF_BOOST); return arf_boost; } @@ -1664,7 +1668,8 @@ static int calculate_boost_bits(int frame_count, } // Calculate the number of extra bits for use in the boosted frame or frames. - return MAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0); + return VPXMAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), + 0); } // Current limit on maximum number of active arfs in a GF/ARF group. @@ -1803,7 +1808,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx]; target_frame_size = clamp(target_frame_size, 0, - MIN(max_bits, (int)total_group_bits)); + VPXMIN(max_bits, (int)total_group_bits)); gf_group->update_type[frame_index] = LF_UPDATE; gf_group->rf_level[frame_index] = INTER_NORMAL; @@ -1924,7 +1929,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int int_lbq = (int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex, cpi->common.bit_depth)); - active_min_gf_interval = rc->min_gf_interval + MIN(2, int_max_q / 200); + active_min_gf_interval = rc->min_gf_interval + VPXMIN(2, int_max_q / 200); if (active_min_gf_interval > rc->max_gf_interval) active_min_gf_interval = rc->max_gf_interval; @@ -1935,7 +1940,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // bits to spare and are better with a smaller interval and smaller boost. // At high Q when there are few bits to spare we are better with a longer // interval to spread the cost of the GF. - active_max_gf_interval = 12 + MIN(4, (int_lbq / 6)); + active_max_gf_interval = 12 + VPXMIN(4, (int_lbq / 6)); if (active_max_gf_interval < active_min_gf_interval) active_max_gf_interval = active_min_gf_interval; @@ -1980,8 +1985,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { decay_accumulator = decay_accumulator * loop_decay_rate; // Monitor for static sections. - zero_motion_accumulator = - MIN(zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); + zero_motion_accumulator = VPXMIN( + zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. @@ -2037,7 +2042,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { (cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) && (zero_motion_accumulator < 0.995)) ? 1 : 0; } else { - rc->gfu_boost = MAX((int)boost_score, MIN_ARF_GF_BOOST); + rc->gfu_boost = VPXMAX((int)boost_score, MIN_ARF_GF_BOOST); rc->source_alt_ref_pending = 0; } @@ -2092,11 +2097,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // rc factor is a weight factor that corrects for local rate control drift. double rc_factor = 1.0; if (rc->rate_error_estimate > 0) { - rc_factor = MAX(RC_FACTOR_MIN, - (double)(100 - rc->rate_error_estimate) / 100.0); + rc_factor = VPXMAX(RC_FACTOR_MIN, + (double)(100 - rc->rate_error_estimate) / 100.0); } else { - rc_factor = MIN(RC_FACTOR_MAX, - (double)(100 - rc->rate_error_estimate) / 100.0); + rc_factor = VPXMIN(RC_FACTOR_MAX, + (double)(100 - rc->rate_error_estimate) / 100.0); } tmp_q = get_twopass_worst_quality(cpi, group_av_err, @@ -2104,7 +2109,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { vbr_group_bits_per_frame, twopass->kfgroup_inter_fraction * rc_factor); twopass->active_worst_quality = - MAX(tmp_q, twopass->active_worst_quality >> 1); + VPXMAX(tmp_q, twopass->active_worst_quality >> 1); } #endif @@ -2421,7 +2426,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } else { twopass->kf_group_bits = 0; } - twopass->kf_group_bits = MAX(0, twopass->kf_group_bits); + twopass->kf_group_bits = VPXMAX(0, twopass->kf_group_bits); // Reset the first pass file position. reset_fpf_position(twopass, start_position); @@ -2435,22 +2440,21 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { break; // Monitor for static sections. - zero_motion_accumulator = - MIN(zero_motion_accumulator, - get_zero_motion_factor(cpi, &next_frame)); + zero_motion_accumulator = VPXMIN( + zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); // Not all frames in the group are necessarily used in calculating boost. if ((i <= rc->max_gf_interval) || ((i <= (rc->max_gf_interval * 4)) && (decay_accumulator > 0.5))) { const double frame_boost = - calc_frame_boost(cpi, this_frame, 0, KF_MAX_BOOST); + calc_frame_boost(cpi, &next_frame, 0, KF_MAX_BOOST); // How fast is prediction quality decaying. if (!detect_flash(twopass, 0)) { const double loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); decay_accumulator *= loop_decay_rate; - decay_accumulator = MAX(decay_accumulator, MIN_DECAY_FACTOR); + decay_accumulator = VPXMAX(decay_accumulator, MIN_DECAY_FACTOR); av_decay_accumulator += decay_accumulator; ++loop_decay_counter; } @@ -2471,8 +2475,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Apply various clamps for min and max boost rc->kf_boost = (int)(av_decay_accumulator * boost_score); - rc->kf_boost = MAX(rc->kf_boost, (rc->frames_to_key * 3)); - rc->kf_boost = MAX(rc->kf_boost, MIN_KF_BOOST); + rc->kf_boost = VPXMAX(rc->kf_boost, (rc->frames_to_key * 3)); + rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_BOOST); // Work out how many bits to allocate for the key frame itself. kf_bits = calculate_boost_bits((rc->frames_to_key - 1), @@ -2736,11 +2740,6 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { } target_rate = gf_group->bit_allocation[gf_group->index]; - if (cpi->common.frame_type == KEY_FRAME) - target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate); - else - target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate); - rc->base_frame_target = target_rate; { @@ -2770,7 +2769,7 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { // is designed to prevent extreme behaviour at the end of a clip // or group of frames. rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size; - twopass->bits_left = MAX(twopass->bits_left - bits_used, 0); + twopass->bits_left = VPXMAX(twopass->bits_left - bits_used, 0); // Calculate the pct rc error. if (rc->total_actual_bits) { @@ -2786,7 +2785,7 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { twopass->kf_group_bits -= bits_used; twopass->last_kfgroup_zeromotion_pct = twopass->kf_zeromotion_pct; } - twopass->kf_group_bits = MAX(twopass->kf_group_bits, 0); + twopass->kf_group_bits = VPXMAX(twopass->kf_group_bits, 0); // Increment the gf group index ready for the next frame. ++twopass->gf_group.index; @@ -2836,18 +2835,18 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { rc->vbr_bits_off_target_fast += fast_extra_thresh - rc->projected_frame_size; rc->vbr_bits_off_target_fast = - MIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth)); + VPXMIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth)); // Fast adaptation of minQ if necessary to use up the extra bits. if (rc->avg_frame_bandwidth) { twopass->extend_minq_fast = (int)(rc->vbr_bits_off_target_fast * 8 / rc->avg_frame_bandwidth); } - twopass->extend_minq_fast = MIN(twopass->extend_minq_fast, - minq_adj_limit - twopass->extend_minq); + twopass->extend_minq_fast = VPXMIN( + twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq); } else if (rc->vbr_bits_off_target_fast) { - twopass->extend_minq_fast = MIN(twopass->extend_minq_fast, - minq_adj_limit - twopass->extend_minq); + twopass->extend_minq_fast = VPXMIN( + twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq); } else { twopass->extend_minq_fast = 0; } diff --git a/libvpx/vp9/encoder/vp9_firstpass.h b/libvpx/vp9/encoder/vp9_firstpass.h index 49f9da38..5875a7b9 100644 --- a/libvpx/vp9/encoder/vp9_firstpass.h +++ b/libvpx/vp9/encoder/vp9_firstpass.h @@ -153,8 +153,6 @@ void vp9_twopass_postencode_update(struct VP9_COMP *cpi); // Post encode update of the rate control parameters for 2-pass void vp9_twopass_postencode_update(struct VP9_COMP *cpi); -void vp9_init_subsampling(struct VP9_COMP *cpi); - void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width, int *scaled_frame_height); diff --git a/libvpx/vp9/encoder/vp9_mbgraph.c b/libvpx/vp9/encoder/vp9_mbgraph.c index d59f3157..41b6d195 100644 --- a/libvpx/vp9/encoder/vp9_mbgraph.c +++ b/libvpx/vp9/encoder/vp9_mbgraph.c @@ -13,6 +13,7 @@ #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/system_state.h" #include "vp9/encoder/vp9_segmentation.h" @@ -29,7 +30,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, int mb_col) { MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; - const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; + MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; + const SEARCH_METHODS old_search_method = mv_sf->search_method; const vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; const int tmp_col_min = x->mv_col_min; @@ -41,17 +43,18 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, // Further step/diamond searches as necessary int step_param = mv_sf->reduce_first_step_size; - step_param = MIN(step_param, MAX_MVSEARCH_STEPS - 2); + step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); vp9_set_mv_search_range(x, ref_mv); ref_full.col = ref_mv->col >> 3; ref_full.row = ref_mv->row >> 3; - /*cpi->sf.search_method == HEX*/ - vp9_hex_search(x, &ref_full, step_param, x->errorperbit, 0, - cond_cost_list(cpi, cost_list), - &v_fn_ptr, 0, ref_mv, dst_mv); + mv_sf->search_method = HEX; + vp9_full_pixel_search(cpi, x, BLOCK_16X16, &ref_full, step_param, + x->errorperbit, cond_cost_list(cpi, cost_list), ref_mv, + dst_mv, 0, 0); + mv_sf->search_method = old_search_method; // Try sub-pixel MC // if (bestsme > error_thresh && bestsme < INT_MAX) diff --git a/libvpx/vp9/encoder/vp9_mcomp.c b/libvpx/vp9/encoder/vp9_mcomp.c index aa3e51ce..be8f57f7 100644 --- a/libvpx/vp9/encoder/vp9_mcomp.c +++ b/libvpx/vp9/encoder/vp9_mcomp.c @@ -15,6 +15,7 @@ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" @@ -37,10 +38,10 @@ void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; - col_min = MAX(col_min, (MV_LOW >> 3) + 1); - row_min = MAX(row_min, (MV_LOW >> 3) + 1); - col_max = MIN(col_max, (MV_UPP >> 3) - 1); - row_max = MIN(row_max, (MV_UPP >> 3) - 1); + col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1); + row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1); + col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1); + row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1); // Get intersection of UMV window and valid MV window to reduce # of checks // in diamond search. @@ -57,12 +58,12 @@ void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { int vp9_init_search_range(int size) { int sr = 0; // Minimum search size no matter what the passed in value. - size = MAX(16, size); + size = VPXMAX(16, size); while ((size << sr) < MAX_FULL_PEL_VAL) sr++; - sr = MIN(sr, MAX_MVSEARCH_STEPS - 2); + sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2); return sr; } @@ -297,10 +298,10 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { int br = bestmv->row * 8; \ int bc = bestmv->col * 8; \ int hstep = 4; \ - const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); \ - const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); \ - const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); \ - const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); \ + const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); \ + const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); \ + const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); \ + const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); \ int tr = br; \ int tc = bc; \ \ @@ -668,10 +669,10 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, int bc = bestmv->col * 8; int hstep = 4; int iter, round = 3 - forced_stop; - const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); - const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); - const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); - const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); + const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); + const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); + const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); + const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); int tr = br; int tc = bc; const MV *search_step = search_step_table; @@ -1371,15 +1372,15 @@ int vp9_get_mvpred_av_var(const MACROBLOCK *x, x->mvcost, x->errorperbit) : 0); } -int vp9_hex_search(const MACROBLOCK *x, - MV *ref_mv, - int search_param, - int sad_per_bit, - int do_init_search, - int *cost_list, - const vp9_variance_fn_ptr_t *vfp, - int use_mvcost, - const MV *center_mv, MV *best_mv) { +static int hex_search(const MACROBLOCK *x, + MV *ref_mv, + int search_param, + int sad_per_bit, + int do_init_search, + int *cost_list, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost, + const MV *center_mv, MV *best_mv) { // First scale has 8-closest points, the rest have 6 points in hex shape // at increasing scales static const int hex_num_candidates[MAX_PATTERN_SCALES] = { @@ -1406,16 +1407,16 @@ int vp9_hex_search(const MACROBLOCK *x, hex_num_candidates, hex_candidates); } -int vp9_bigdia_search(const MACROBLOCK *x, - MV *ref_mv, - int search_param, - int sad_per_bit, - int do_init_search, - int *cost_list, - const vp9_variance_fn_ptr_t *vfp, - int use_mvcost, - const MV *center_mv, - MV *best_mv) { +static int bigdia_search(const MACROBLOCK *x, + MV *ref_mv, + int search_param, + int sad_per_bit, + int do_init_search, + int *cost_list, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost, + const MV *center_mv, + MV *best_mv) { // First scale has 4-closest points, the rest have 8 points in diamond // shape at increasing scales static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { @@ -1448,16 +1449,16 @@ int vp9_bigdia_search(const MACROBLOCK *x, bigdia_num_candidates, bigdia_candidates); } -int vp9_square_search(const MACROBLOCK *x, - MV *ref_mv, - int search_param, - int sad_per_bit, - int do_init_search, - int *cost_list, - const vp9_variance_fn_ptr_t *vfp, - int use_mvcost, - const MV *center_mv, - MV *best_mv) { +static int square_search(const MACROBLOCK *x, + MV *ref_mv, + int search_param, + int sad_per_bit, + int do_init_search, + int *cost_list, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost, + const MV *center_mv, + MV *best_mv) { // All scales have 8 closest points in square shape static const int square_num_candidates[MAX_PATTERN_SCALES] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, @@ -1490,34 +1491,34 @@ int vp9_square_search(const MACROBLOCK *x, square_num_candidates, square_candidates); } -int vp9_fast_hex_search(const MACROBLOCK *x, - MV *ref_mv, - int search_param, - int sad_per_bit, - int do_init_search, // must be zero for fast_hex - int *cost_list, - const vp9_variance_fn_ptr_t *vfp, - int use_mvcost, - const MV *center_mv, - MV *best_mv) { - return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), - sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, - center_mv, best_mv); +static int fast_hex_search(const MACROBLOCK *x, + MV *ref_mv, + int search_param, + int sad_per_bit, + int do_init_search, // must be zero for fast_hex + int *cost_list, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost, + const MV *center_mv, + MV *best_mv) { + return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), + sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, + center_mv, best_mv); } -int vp9_fast_dia_search(const MACROBLOCK *x, - MV *ref_mv, - int search_param, - int sad_per_bit, - int do_init_search, - int *cost_list, - const vp9_variance_fn_ptr_t *vfp, - int use_mvcost, - const MV *center_mv, - MV *best_mv) { - return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), - sad_per_bit, do_init_search, cost_list, vfp, - use_mvcost, center_mv, best_mv); +static int fast_dia_search(const MACROBLOCK *x, + MV *ref_mv, + int search_param, + int sad_per_bit, + int do_init_search, + int *cost_list, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost, + const MV *center_mv, + MV *best_mv) { + return bigdia_search( + x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), sad_per_bit, + do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv); } #undef CHECK_BETTER @@ -1547,10 +1548,10 @@ int vp9_full_range_search_c(const MACROBLOCK *x, best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); - start_row = MAX(-range, x->mv_row_min - ref_mv->row); - start_col = MAX(-range, x->mv_col_min - ref_mv->col); - end_row = MIN(range, x->mv_row_max - ref_mv->row); - end_col = MIN(range, x->mv_col_max - ref_mv->col); + start_row = VPXMAX(-range, x->mv_row_min - ref_mv->row); + start_col = VPXMAX(-range, x->mv_col_min - ref_mv->col); + end_row = VPXMIN(range, x->mv_row_max - ref_mv->row); + end_col = VPXMIN(range, x->mv_col_max - ref_mv->col); for (r = start_row; r <= end_row; ++r) { for (c = start_col; c <= end_col; c += 4) { @@ -1946,15 +1947,16 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, return best_sad; } +// Runs sequence of diamond searches in smaller steps for RD. /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond refining search */ -int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, - MV *mvp_full, int step_param, - int sadpb, int further_steps, int do_refine, - int *cost_list, - const vp9_variance_fn_ptr_t *fn_ptr, - const MV *ref_mv, MV *dst_mv) { +static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, + MV *mvp_full, int step_param, + int sadpb, int further_steps, int do_refine, + int *cost_list, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *ref_mv, MV *dst_mv) { MV temp_mv; int thissme, n, num00 = 0; int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, @@ -2021,10 +2023,10 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); - const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); - const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); - const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); + const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min); + const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max); + const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min); + const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; int best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride) + @@ -2054,10 +2056,10 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); - const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); - const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); - const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); + const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min); + const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max); + const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min); + const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride) + @@ -2119,10 +2121,10 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); - const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); - const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); - const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); + const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min); + const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max); + const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min); + const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride) + @@ -2346,29 +2348,29 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, switch (method) { case FAST_DIAMOND: - var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, - cost_list, fn_ptr, 1, ref_mv, tmp_mv); + var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, + cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case FAST_HEX: - var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, - cost_list, fn_ptr, 1, ref_mv, tmp_mv); + var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, + cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case HEX: - var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1, - cost_list, fn_ptr, 1, ref_mv, tmp_mv); + var = hex_search(x, mvp_full, step_param, error_per_bit, 1, + cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case SQUARE: - var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1, - cost_list, fn_ptr, 1, ref_mv, tmp_mv); + var = square_search(x, mvp_full, step_param, error_per_bit, 1, + cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case BIGDIA: - var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1, - cost_list, fn_ptr, 1, ref_mv, tmp_mv); + var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, + cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case NSTEP: - var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, - MAX_MVSEARCH_STEPS - 1 - step_param, - 1, cost_list, fn_ptr, ref_mv, tmp_mv); + var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, + MAX_MVSEARCH_STEPS - 1 - step_param, + 1, cost_list, fn_ptr, ref_mv, tmp_mv); break; default: assert(0 && "Invalid search method."); diff --git a/libvpx/vp9/encoder/vp9_mcomp.h b/libvpx/vp9/encoder/vp9_mcomp.h index 817bd795..5efd5435 100644 --- a/libvpx/vp9/encoder/vp9_mcomp.h +++ b/libvpx/vp9/encoder/vp9_mcomp.h @@ -72,38 +72,12 @@ int vp9_refining_search_sad(const struct macroblock *x, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); -// Runs sequence of diamond searches in smaller steps for RD. -int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x, - MV *mvp_full, int step_param, - int sadpb, int further_steps, int do_refine, - int *cost_list, - const vp9_variance_fn_ptr_t *fn_ptr, - const MV *ref_mv, MV *dst_mv); - // Perform integral projection based motion estimation. unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col); -typedef int (integer_mv_pattern_search_fn) ( - const MACROBLOCK *x, - MV *ref_mv, - int search_param, - int error_per_bit, - int do_init_search, - int *cost_list, - const vp9_variance_fn_ptr_t *vf, - int use_mvcost, - const MV *center_mv, - MV *best_mv); - -integer_mv_pattern_search_fn vp9_hex_search; -integer_mv_pattern_search_fn vp9_bigdia_search; -integer_mv_pattern_search_fn vp9_square_search; -integer_mv_pattern_search_fn vp9_fast_hex_search; -integer_mv_pattern_search_fn vp9_fast_dia_search; - typedef int (fractional_mv_step_fp) ( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, diff --git a/libvpx/vp9/encoder/vp9_picklpf.c b/libvpx/vp9/encoder/vp9_picklpf.c index 8e191038..5444bc89 100644 --- a/libvpx/vp9/encoder/vp9_picklpf.c +++ b/libvpx/vp9/encoder/vp9_picklpf.c @@ -40,6 +40,8 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMMON *const cm = &cpi->common; int64_t filt_err; + vp9_build_mask_frame(cm, filt_level, partial_frame); + if (cpi->num_workers > 1) vp9_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane, filt_level, 1, partial_frame, @@ -92,8 +94,8 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, ss_err[filt_mid] = best_err; while (filter_step > 0) { - const int filt_high = MIN(filt_mid + filter_step, max_filter_level); - const int filt_low = MAX(filt_mid - filter_step, min_filter_level); + const int filt_high = VPXMIN(filt_mid + filter_step, max_filter_level); + const int filt_low = VPXMAX(filt_mid - filter_step, min_filter_level); // Bias against raising loop filter in favor of lowering it. int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; diff --git a/libvpx/vp9/encoder/vp9_pickmode.c b/libvpx/vp9/encoder/vp9_pickmode.c index cc018fcb..fc4d9ae6 100644 --- a/libvpx/vp9/encoder/vp9_pickmode.c +++ b/libvpx/vp9/encoder/vp9_pickmode.c @@ -16,6 +16,7 @@ #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" @@ -293,8 +294,8 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, if (cpi->common.tx_mode == TX_MODE_SELECT) { if (sse > (var << 2)) - tx_size = MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + tx_size = VPXMIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); else tx_size = TX_8X8; @@ -304,8 +305,8 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, else if (tx_size > TX_16X16) tx_size = TX_16X16; } else { - tx_size = MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + tx_size = VPXMIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); } assert(tx_size >= TX_8X8); @@ -475,8 +476,8 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, if (cpi->common.tx_mode == TX_MODE_SELECT) { if (sse > (var << 2)) xd->mi[0]->mbmi.tx_size = - MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + VPXMIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); else xd->mi[0]->mbmi.tx_size = TX_8X8; @@ -487,8 +488,8 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, xd->mi[0]->mbmi.tx_size = TX_16X16; } else { xd->mi[0]->mbmi.tx_size = - MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + VPXMIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); } // Evaluate if the partition block is a skippable block in Y plane. @@ -687,10 +688,11 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist, } #endif -static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE bsize, +static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize, MACROBLOCK *x, MACROBLOCKD *xd, int *out_rate_sum, int64_t *out_dist_sum, - unsigned int *var_y, unsigned int *sse_y) { + unsigned int *var_y, unsigned int *sse_y, + int start_plane, int stop_plane) { // Note our transform coeffs are 8 times an orthogonal transform. // Hence quantizer step is also 8 times. To get effective quantizer // we need to divide by 8 before sending to modeling function. @@ -702,12 +704,12 @@ static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE bsize, *out_rate_sum = 0; *out_dist_sum = 0; - for (i = 1; i <= 2; ++i) { + for (i = start_plane; i <= stop_plane; ++i) { struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; const uint32_t dc_quant = pd->dequant[0]; const uint32_t ac_quant = pd->dequant[1]; - const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); + const BLOCK_SIZE bs = plane_bsize; unsigned int var; if (!x->color_sensitivity[i - 1]) @@ -791,7 +793,7 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x, const unsigned int max_thresh = 36000; // The encode_breakout input const unsigned int min_thresh = - MIN(((unsigned int)x->encode_breakout << 4), max_thresh); + VPXMIN(((unsigned int)x->encode_breakout << 4), max_thresh); #if CONFIG_VP9_HIGHBITDEPTH const int shift = (xd->bd << 1) - 16; #endif @@ -892,12 +894,8 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, int i, j; int rate; int64_t dist; - int64_t this_sse = INT64_MAX; - int is_skippable; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - assert(plane == 0); - (void) plane; p->src.buf = &src_buf_base[4 * (j * src_stride + i)]; pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)]; @@ -907,13 +905,22 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, x->skip_encode ? p->src.buf : pd->dst.buf, x->skip_encode ? src_stride : dst_stride, pd->dst.buf, dst_stride, - i, j, 0); + i, j, plane); - // TODO(jingning): This needs further refactoring. - block_yrd(cpi, x, &rate, &dist, &is_skippable, &this_sse, 0, - bsize_tx, MIN(tx_size, TX_16X16)); - x->skip_txfm[0] = is_skippable; - rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable); + if (plane == 0) { + int64_t this_sse = INT64_MAX; + int is_skippable; + // TODO(jingning): This needs further refactoring. + block_yrd(cpi, x, &rate, &dist, &is_skippable, &this_sse, 0, + bsize_tx, VPXMIN(tx_size, TX_16X16)); + x->skip_txfm[0] = is_skippable; + // TODO(jingning): Skip is signalled per prediciton block not per tx block. + rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable); + } else { + unsigned int var, sse; + model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &rate, &dist, &var, &sse, + plane, plane); + } p->src.buf = src_buf_base; pd->dst.buf = dst_buf_base; @@ -961,8 +968,8 @@ static INLINE void update_thresh_freq_fact(VP9_COMP *cpi, if (thr_mode_idx == best_mode_idx) *freq_fact -= (*freq_fact >> 4); else - *freq_fact = MIN(*freq_fact + RD_THRESH_INC, - cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); + *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC, + cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); } void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, @@ -973,8 +980,8 @@ void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, PREDICTION_MODE this_mode; struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 }; const TX_SIZE intra_tx_size = - MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + VPXMIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); MODE_INFO *const mic = xd->mi[0]; int *bmode_costs; const MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; @@ -1160,8 +1167,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->sb_type = bsize; mbmi->ref_frame[0] = NONE; mbmi->ref_frame[1] = NONE; - mbmi->tx_size = MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cm->tx_mode]); + mbmi->tx_size = VPXMIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cm->tx_mode]); #if CONFIG_VP9_TEMPORAL_DENOISING vp9_denoiser_reset_frame_stats(ctx); @@ -1231,10 +1238,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (const_motion[ref_frame] && this_mode == NEARMV) continue; - i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME; - if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking) - if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1)) - ref_frame_skip_mask |= (1 << ref_frame); + if (!(this_mode == ZEROMV && ref_frame == LAST_FRAME)) { + i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME; + if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking) + if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1)) + ref_frame_skip_mask |= (1 << ref_frame); + } if (ref_frame_skip_mask & (1 << ref_frame)) continue; @@ -1414,7 +1423,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!this_early_term) { this_sse = (int64_t)sse_y; block_yrd(cpi, x, &this_rdc.rate, &this_rdc.dist, &is_skippable, - &this_sse, 0, bsize, MIN(mbmi->tx_size, TX_16X16)); + &this_sse, 0, bsize, VPXMIN(mbmi->tx_size, TX_16X16)); x->skip_txfm[0] = is_skippable; if (is_skippable) { this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); @@ -1442,12 +1451,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (x->color_sensitivity[0] || x->color_sensitivity[1]) { int uv_rate = 0; int64_t uv_dist = 0; + const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, &xd->plane[1]); if (x->color_sensitivity[0]) vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1); if (x->color_sensitivity[1]) vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2); - model_rd_for_sb_uv(cpi, bsize, x, xd, &uv_rate, &uv_dist, - &var_y, &sse_y); + model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &uv_rate, &uv_dist, + &var_y, &sse_y, 1, 2); this_rdc.rate += uv_rate; this_rdc.dist += uv_dist; } @@ -1522,11 +1532,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (!x->skip && best_rdc.rdcost > inter_mode_thresh && bsize <= cpi->sf.max_intra_bsize)) { struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 }; - const TX_SIZE intra_tx_size = - MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); int i; TX_SIZE best_intra_tx_size = TX_SIZES; + TX_SIZE intra_tx_size = + VPXMIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && intra_tx_size > TX_16X16) + intra_tx_size = TX_16X16; if (reuse_inter_pred && best_pred != NULL) { if (best_pred->data == orig_dst.buf) { @@ -1570,6 +1582,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->tx_size = intra_tx_size; vp9_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra, &args); + // Inter and intra RD will mismatch in scale for non-screen content. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) { + if (x->color_sensitivity[0]) + vp9_foreach_transformed_block_in_plane(xd, bsize, 1, + estimate_block_intra, &args); + if (x->color_sensitivity[1]) + vp9_foreach_transformed_block_in_plane(xd, bsize, 2, + estimate_block_intra, &args); + } this_rdc.rate = args.rate; this_rdc.dist = args.dist; this_rdc.rate += cpi->mbmode_cost[this_mode]; diff --git a/libvpx/vp9/encoder/vp9_ratectrl.c b/libvpx/vp9/encoder/vp9_ratectrl.c index 4ba34067..d7006857 100644 --- a/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/libvpx/vp9/encoder/vp9_ratectrl.c @@ -15,6 +15,7 @@ #include <stdlib.h> #include <string.h> +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" @@ -106,8 +107,8 @@ static int kf_low = 400; static int get_minq_index(double maxq, double x3, double x2, double x1, vpx_bit_depth_t bit_depth) { int i; - const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq, - maxq); + const double minqtarget = VPXMIN(((x3 * maxq + x2) * maxq + x1) * maxq, + maxq); // Special case handling to deal with the step from q2.0 // down to lossless mode represented by q 1.0. @@ -192,15 +193,15 @@ int vp9_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs, vpx_bit_depth_t bit_depth) { const int bpm = (int)(vp9_rc_bits_per_mb(frame_type, q, correction_factor, bit_depth)); - return MAX(FRAME_OVERHEAD_BITS, - (int)((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS); + return VPXMAX(FRAME_OVERHEAD_BITS, + (int)((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS); } int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) { const RATE_CONTROL *rc = &cpi->rc; const VP9EncoderConfig *oxcf = &cpi->oxcf; - const int min_frame_target = MAX(rc->min_frame_bandwidth, - rc->avg_frame_bandwidth >> 5); + const int min_frame_target = VPXMAX(rc->min_frame_bandwidth, + rc->avg_frame_bandwidth >> 5); if (target < min_frame_target) target = min_frame_target; if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) { @@ -216,7 +217,7 @@ int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) { if (oxcf->rc_max_inter_bitrate_pct) { const int max_rate = rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100; - target = MIN(target, max_rate); + target = VPXMIN(target, max_rate); } return target; } @@ -227,7 +228,7 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { if (oxcf->rc_max_intra_bitrate_pct) { const int max_rate = rc->avg_frame_bandwidth * oxcf->rc_max_intra_bitrate_pct / 100; - target = MIN(target, max_rate); + target = VPXMIN(target, max_rate); } if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth; @@ -250,7 +251,8 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { lrc->bits_off_target += bits_off_for_this_layer; // Clip buffer level to maximum buffer size for the layer. - lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size); + lrc->bits_off_target = + VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size); lrc->buffer_level = lrc->bits_off_target; } } @@ -268,7 +270,14 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { } // Clip the buffer level to the maximum specified buffer size. - rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size); + rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size); + + // For screen-content mode, and if frame-dropper is off, don't let buffer + // level go below threshold, given here as -rc->maximum_ buffer_size. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && + cpi->oxcf.drop_frames_water_mark == 0) + rc->bits_off_target = VPXMAX(rc->bits_off_target, -rc->maximum_buffer_size); + rc->buffer_level = rc->bits_off_target; if (is_one_pass_cbr_svc(cpi)) { @@ -287,8 +296,8 @@ int vp9_rc_get_default_min_gf_interval( if (factor <= factor_safe) return default_interval; else - return MAX(default_interval, - (int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5)); + return VPXMAX(default_interval, + (int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5)); // Note this logic makes: // 4K24: 5 // 4K30: 6 @@ -296,9 +305,9 @@ int vp9_rc_get_default_min_gf_interval( } int vp9_rc_get_default_max_gf_interval(double framerate, int min_gf_interval) { - int interval = MIN(MAX_GF_INTERVAL, (int)(framerate * 0.75)); + int interval = VPXMIN(MAX_GF_INTERVAL, (int)(framerate * 0.75)); interval += (interval & 0x01); // Round to even value - return MAX(interval, min_gf_interval); + return VPXMAX(interval, min_gf_interval); } void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { @@ -478,7 +487,7 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) { // More heavily damped adjustment used if we have been oscillating either side // of target. adjustment_limit = 0.25 + - 0.5 * MIN(1, fabs(log10(0.01 * correction_factor))); + 0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor))); cpi->rc.q_2_frame = cpi->rc.q_1_frame; cpi->rc.q_1_frame = cm->base_qindex; @@ -531,8 +540,7 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, do { if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && - cpi->svc.temporal_layer_id == 0 && - cpi->svc.spatial_layer_id == 0) { + cpi->svc.temporal_layer_id == 0) { bits_per_mb_at_this_q = (int)vp9_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor); } else { @@ -558,8 +566,8 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, if (cpi->oxcf.rc_mode == VPX_CBR && (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) && cpi->rc.q_1_frame != cpi->rc.q_2_frame) { - q = clamp(q, MIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame), - MAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame)); + q = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame), + VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame)); } return q; } @@ -617,7 +625,7 @@ static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) { : rc->last_q[INTER_FRAME] * 2; } } - return MIN(active_worst_quality, rc->worst_quality); + return VPXMIN(active_worst_quality, rc->worst_quality); } // Adjust active_worst_quality level based on buffer level. @@ -644,10 +652,10 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { // So for first few frames following key, the qp of that key frame is weighted // into the active_worst_quality setting. ambient_qp = (cm->current_video_frame < num_frames_weight_key) ? - MIN(rc->avg_frame_qindex[INTER_FRAME], rc->avg_frame_qindex[KEY_FRAME]) : - rc->avg_frame_qindex[INTER_FRAME]; - active_worst_quality = MIN(rc->worst_quality, - ambient_qp * 5 / 4); + VPXMIN(rc->avg_frame_qindex[INTER_FRAME], + rc->avg_frame_qindex[KEY_FRAME]) : + rc->avg_frame_qindex[INTER_FRAME]; + active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 / 4); if (rc->buffer_level > rc->optimal_buffer_level) { // Adjust down. // Maximum limit for down adjustment, ~30%. @@ -700,7 +708,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, (last_boosted_q * 0.75), cm->bit_depth); - active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); + active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); } else if (cm->current_video_frame > 0) { // not first frame of one pass and kf_boost is set double q_adj_factor = 1.0; @@ -833,7 +841,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, last_boosted_q * 0.75, cm->bit_depth); - active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); + active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); } else { // not first frame of one pass and kf_boost is set double q_adj_factor = 1.0; @@ -1002,21 +1010,21 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int qindex; if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) { - qindex = MIN(rc->last_kf_qindex, rc->last_boosted_qindex); + qindex = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex); active_best_quality = qindex; last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, last_boosted_q * 1.25, cm->bit_depth); - active_worst_quality = MIN(qindex + delta_qindex, active_worst_quality); - + active_worst_quality = + VPXMIN(qindex + delta_qindex, active_worst_quality); } else { qindex = rc->last_boosted_qindex; last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, last_boosted_q * 0.75, cm->bit_depth); - active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); + active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); } } else { // Not forced keyframe. @@ -1116,8 +1124,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, (cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) { int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group->index], active_worst_quality); - active_worst_quality = MAX(active_worst_quality + qdelta, - active_best_quality); + active_worst_quality = VPXMAX(active_worst_quality + qdelta, + active_best_quality); } #endif @@ -1126,7 +1134,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int qdelta = vp9_compute_qdelta_by_rate(rc, cm->frame_type, active_best_quality, 2.0, cm->bit_depth); - active_best_quality = MAX(active_best_quality + qdelta, rc->best_quality); + active_best_quality = + VPXMAX(active_best_quality + qdelta, rc->best_quality); } active_best_quality = clamp(active_best_quality, @@ -1141,7 +1150,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, rc->this_key_frame_forced) { // If static since last kf use better of last boosted and last kf q. if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) { - q = MIN(rc->last_kf_qindex, rc->last_boosted_qindex); + q = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex); } else { q = rc->last_boosted_qindex; } @@ -1203,9 +1212,9 @@ void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, // For very small rate targets where the fractional adjustment // may be tiny make sure there is at least a minimum range. const int tolerance = (cpi->sf.recode_tolerance * frame_target) / 100; - *frame_under_shoot_limit = MAX(frame_target - tolerance - 200, 0); - *frame_over_shoot_limit = MIN(frame_target + tolerance + 200, - cpi->rc.max_frame_bandwidth); + *frame_under_shoot_limit = VPXMAX(frame_target - tolerance - 200, 0); + *frame_over_shoot_limit = VPXMIN(frame_target + tolerance + 200, + cpi->rc.max_frame_bandwidth); } } @@ -1351,7 +1360,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits; - if (!cpi->use_svc) { + if (!cpi->use_svc || is_two_pass_svc(cpi)) { if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) // Update the alternate reference frame stats as appropriate. @@ -1458,7 +1467,8 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const SVC *const svc = &cpi->svc; const int64_t diff = rc->optimal_buffer_level - rc->buffer_level; const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100; - int min_frame_target = MAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS); + int min_frame_target = + VPXMAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS); int target; if (oxcf->gf_cbr_boost_pct) { @@ -1480,23 +1490,24 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { svc->temporal_layer_id, svc->number_temporal_layers); const LAYER_CONTEXT *lc = &svc->layer_context[layer]; target = lc->avg_frame_size; - min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS); + min_frame_target = VPXMAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS); } if (diff > 0) { // Lower the target bandwidth for this frame. - const int pct_low = (int)MIN(diff / one_pct_bits, oxcf->under_shoot_pct); + const int pct_low = (int)VPXMIN(diff / one_pct_bits, oxcf->under_shoot_pct); target -= (target * pct_low) / 200; } else if (diff < 0) { // Increase the target bandwidth for this frame. - const int pct_high = (int)MIN(-diff / one_pct_bits, oxcf->over_shoot_pct); + const int pct_high = + (int)VPXMIN(-diff / one_pct_bits, oxcf->over_shoot_pct); target += (target * pct_high) / 200; } if (oxcf->rc_max_inter_bitrate_pct) { const int max_rate = rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100; - target = MIN(target, max_rate); + target = VPXMIN(target, max_rate); } - return MAX(min_frame_target, target); + return VPXMAX(min_frame_target, target); } static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { @@ -1518,7 +1529,7 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const LAYER_CONTEXT *lc = &svc->layer_context[layer]; framerate = lc->framerate; } - kf_boost = MAX(kf_boost, (int)(2 * framerate - 16)); + kf_boost = VPXMAX(kf_boost, (int)(2 * framerate - 16)); if (rc->frames_since_key < framerate / 2) { kf_boost = (int)(kf_boost * rc->frames_since_key / (framerate / 2)); @@ -1584,7 +1595,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { cpi->ref_frame_flags &= (~VP9_ALT_FLAG); } else if (is_one_pass_cbr_svc(cpi)) { LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; - if (cpi->svc.spatial_layer_id == 0) { + if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode) { lc->is_key_frame = 0; } else { lc->is_key_frame = @@ -1726,7 +1737,7 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi, rc->max_gf_interval = rc->static_scene_max_gf_interval; // Clamp min to max - rc->min_gf_interval = MIN(rc->min_gf_interval, rc->max_gf_interval); + rc->min_gf_interval = VPXMIN(rc->min_gf_interval, rc->max_gf_interval); } void vp9_rc_update_framerate(VP9_COMP *cpi) { @@ -1739,7 +1750,8 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) { rc->min_frame_bandwidth = (int)(rc->avg_frame_bandwidth * oxcf->two_pass_vbrmin_section / 100); - rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); + rc->min_frame_bandwidth = + VPXMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); // A maximum bitrate for a frame is defined. // The baseline for this aligns with HW implementations that @@ -1750,8 +1762,8 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) { // specifies lossless encode. vbr_max_bits = (int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) / 100); - rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), - vbr_max_bits); + rc->max_frame_bandwidth = + VPXMAX(VPXMAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits); vp9_rc_set_gf_interval_range(cpi, rc); } @@ -1789,12 +1801,12 @@ static void vbr_rate_correction(VP9_COMP *cpi, int *this_frame_target) { // Dont do it for kf,arf,gf or overlay frames. if (!frame_is_kf_gf_arf(cpi) && !rc->is_src_frame_alt_ref && rc->vbr_bits_off_target_fast) { - int one_frame_bits = MAX(rc->avg_frame_bandwidth, *this_frame_target); + int one_frame_bits = VPXMAX(rc->avg_frame_bandwidth, *this_frame_target); int fast_extra_bits; - fast_extra_bits = - (int)MIN(rc->vbr_bits_off_target_fast, one_frame_bits); - fast_extra_bits = (int)MIN(fast_extra_bits, - MAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8)); + fast_extra_bits = (int)VPXMIN(rc->vbr_bits_off_target_fast, one_frame_bits); + fast_extra_bits = (int)VPXMIN( + fast_extra_bits, + VPXMAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8)); *this_frame_target += (int)fast_extra_bits; rc->vbr_bits_off_target_fast -= fast_extra_bits; } @@ -1804,6 +1816,11 @@ void vp9_set_target_rate(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; int target_rate = rc->base_frame_target; + if (cpi->common.frame_type == KEY_FRAME) + target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate); + else + target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate); + // Correction to rate target based on prior over or under shoot. if (cpi->oxcf.rc_mode == VPX_VBR || cpi->oxcf.rc_mode == VPX_CQ) vbr_rate_correction(cpi, &target_rate); @@ -1815,7 +1832,9 @@ void vp9_set_target_rate(VP9_COMP *cpi) { int vp9_resize_one_pass_cbr(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; - int resize_now = 0; + RESIZE_ACTION resize_action = NO_RESIZE; + int avg_qp_thr1 = 70; + int avg_qp_thr2 = 50; cpi->resize_scale_num = 1; cpi->resize_scale_den = 1; // Don't resize on key frame; reset the counters on key frame. @@ -1824,10 +1843,19 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) { cpi->resize_count = 0; return 0; } + +#if CONFIG_VP9_TEMPORAL_DENOISING + // If denoiser is on, apply a smaller qp threshold. + if (cpi->oxcf.noise_sensitivity > 0) { + avg_qp_thr1 = 60; + avg_qp_thr2 = 40; + } +#endif + // Resize based on average buffer underflow and QP over some window. // Ignore samples close to key frame, since QP is usually high after key. - if (cpi->rc.frames_since_key > 2 * cpi->framerate) { - const int window = (int)(5 * cpi->framerate); + if (cpi->rc.frames_since_key > 1 * cpi->framerate) { + const int window = (int)(4 * cpi->framerate); cpi->resize_avg_qp += cm->base_qindex; if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100)) ++cpi->resize_buffer_underflow; @@ -1835,18 +1863,30 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) { // Check for resize action every "window" frames. if (cpi->resize_count >= window) { int avg_qp = cpi->resize_avg_qp / cpi->resize_count; - // Resize down if buffer level has underflowed sufficent amount in past - // window, and we are at original resolution. + // Resize down if buffer level has underflowed sufficient amount in past + // window, and we are at original or 3/4 of original resolution. // Resize back up if average QP is low, and we are currently in a resized - // down state. - if (cpi->resize_state == 0 && - cpi->resize_buffer_underflow > (cpi->resize_count >> 2)) { - resize_now = 1; - cpi->resize_state = 1; - } else if (cpi->resize_state == 1 && - avg_qp < 40 * cpi->rc.worst_quality / 100) { - resize_now = -1; - cpi->resize_state = 0; + // down state, i.e. 1/2 or 3/4 of original resolution. + // Currently, use a flag to turn 3/4 resizing feature on/off. + if (cpi->resize_buffer_underflow > (cpi->resize_count >> 2)) { + if (cpi->resize_state == THREE_QUARTER) { + resize_action = DOWN_ONEHALF; + cpi->resize_state = ONE_HALF; + } else if (cpi->resize_state == ORIG) { + resize_action = ONEHALFONLY_RESIZE ? DOWN_ONEHALF : DOWN_THREEFOUR; + cpi->resize_state = ONEHALFONLY_RESIZE ? ONE_HALF : THREE_QUARTER; + } + } else if (cpi->resize_state != ORIG && + avg_qp < avg_qp_thr1 * cpi->rc.worst_quality / 100) { + if (cpi->resize_state == THREE_QUARTER || + avg_qp < avg_qp_thr2 * cpi->rc.worst_quality / 100 || + ONEHALFONLY_RESIZE) { + resize_action = UP_ORIG; + cpi->resize_state = ORIG; + } else if (cpi->resize_state == ONE_HALF) { + resize_action = UP_THREEFOUR; + cpi->resize_state = THREE_QUARTER; + } } // Reset for next window measurement. cpi->resize_avg_qp = 0; @@ -1856,26 +1896,30 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) { } // If decision is to resize, reset some quantities, and check is we should // reduce rate correction factor, - if (resize_now != 0) { + if (resize_action != NO_RESIZE) { int target_bits_per_frame; int active_worst_quality; int qindex; int tot_scale_change; - // For now, resize is by 1/2 x 1/2. - cpi->resize_scale_num = 1; - cpi->resize_scale_den = 2; + if (resize_action == DOWN_THREEFOUR || resize_action == UP_THREEFOUR) { + cpi->resize_scale_num = 3; + cpi->resize_scale_den = 4; + } else if (resize_action == DOWN_ONEHALF) { + cpi->resize_scale_num = 1; + cpi->resize_scale_den = 2; + } else { // UP_ORIG or anything else + cpi->resize_scale_num = 1; + cpi->resize_scale_den = 1; + } tot_scale_change = (cpi->resize_scale_den * cpi->resize_scale_den) / (cpi->resize_scale_num * cpi->resize_scale_num); // Reset buffer level to optimal, update target size. rc->buffer_level = rc->optimal_buffer_level; rc->bits_off_target = rc->optimal_buffer_level; rc->this_frame_target = calc_pframe_target_size_one_pass_cbr(cpi); - // Reset cyclic refresh parameters. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) - vp9_cyclic_refresh_reset_resize(cpi); // Get the projected qindex, based on the scaled target frame size (scaled // so target_bits_per_mb in vp9_rc_regulate_q will be correct target). - target_bits_per_frame = (resize_now == 1) ? + target_bits_per_frame = (resize_action >= 0) ? rc->this_frame_target * tot_scale_change : rc->this_frame_target / tot_scale_change; active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi); @@ -1886,19 +1930,19 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) { // If resize is down, check if projected q index is close to worst_quality, // and if so, reduce the rate correction factor (since likely can afford // lower q for resized frame). - if (resize_now == 1 && + if (resize_action > 0 && qindex > 90 * cpi->rc.worst_quality / 100) { rc->rate_correction_factors[INTER_NORMAL] *= 0.85; } // If resize is back up, check if projected q index is too much above the // current base_qindex, and if so, reduce the rate correction factor // (since prefer to keep q for resized frame at least close to previous q). - if (resize_now == -1 && + if (resize_action < 0 && qindex > 130 * cm->base_qindex / 100) { rc->rate_correction_factors[INTER_NORMAL] *= 0.9; } } - return resize_now; + return resize_action; } // Compute average source sad (temporal sad: between current source and @@ -1948,7 +1992,7 @@ void vp9_avg_source_sad(VP9_COMP *cpi) { // between current and the previous frame value(s). Use a minimum threshold // for cases where there is small change from content that is completely // static. - if (avg_sad > MAX(4000, (rc->avg_source_sad << 3)) && + if (avg_sad > VPXMAX(4000, (rc->avg_source_sad << 3)) && rc->frames_since_key > 1) rc->high_source_sad = 1; else @@ -1968,16 +2012,59 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int thresh_rate = rc->avg_frame_bandwidth * 10; if (cm->base_qindex < thresh_qp && frame_size > thresh_rate) { + double rate_correction_factor = + cpi->rc.rate_correction_factors[INTER_NORMAL]; + const int target_size = cpi->rc.avg_frame_bandwidth; + double new_correction_factor; + int target_bits_per_mb; + double q2; + int enumerator; // Force a re-encode, and for now use max-QP. *q = cpi->rc.worst_quality; - // Adjust avg_frame_qindex and buffer_level, as these parameters will affect - // QP selection for subsequent frames. If they have settled down to a very - // different (low QP) state, then not re-adjusting them may cause next - // frame to select low QP and overshoot again. - // TODO(marpan): Check if rate correction factor should also be adjusted. + // Adjust avg_frame_qindex, buffer_level, and rate correction factors, as + // these parameters will affect QP selection for subsequent frames. If they + // have settled down to a very different (low QP) state, then not adjusting + // them may cause next frame to select low QP and overshoot again. cpi->rc.avg_frame_qindex[INTER_FRAME] = *q; rc->buffer_level = rc->optimal_buffer_level; rc->bits_off_target = rc->optimal_buffer_level; + // Reset rate under/over-shoot flags. + cpi->rc.rc_1_frame = 0; + cpi->rc.rc_2_frame = 0; + // Adjust rate correction factor. + target_bits_per_mb = ((uint64_t)target_size << BPER_MB_NORMBITS) / cm->MBs; + // Rate correction factor based on target_bits_per_mb and qp (==max_QP). + // This comes from the inverse computation of vp9_rc_bits_per_mb(). + q2 = vp9_convert_qindex_to_q(*q, cm->bit_depth); + enumerator = 1800000; // Factor for inter frame. + enumerator += (int)(enumerator * q2) >> 12; + new_correction_factor = (double)target_bits_per_mb * q2 / enumerator; + if (new_correction_factor > rate_correction_factor) { + rate_correction_factor = + VPXMIN(2.0 * rate_correction_factor, new_correction_factor); + if (rate_correction_factor > MAX_BPB_FACTOR) + rate_correction_factor = MAX_BPB_FACTOR; + cpi->rc.rate_correction_factors[INTER_NORMAL] = rate_correction_factor; + } + // For temporal layers, reset the rate control parametes across all + // temporal layers. + if (cpi->use_svc) { + int i = 0; + SVC *svc = &cpi->svc; + for (i = 0; i < svc->number_temporal_layers; ++i) { + const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, + svc->number_temporal_layers); + LAYER_CONTEXT *lc = &svc->layer_context[layer]; + RATE_CONTROL *lrc = &lc->rc; + lrc->avg_frame_qindex[INTER_FRAME] = *q; + lrc->buffer_level = rc->optimal_buffer_level; + lrc->bits_off_target = rc->optimal_buffer_level; + lrc->rc_1_frame = 0; + lrc->rc_2_frame = 0; + lrc->rate_correction_factors[INTER_NORMAL] = + rate_correction_factor; + } + } return 1; } else { return 0; diff --git a/libvpx/vp9/encoder/vp9_ratectrl.h b/libvpx/vp9/encoder/vp9_ratectrl.h index 11dfa35c..136fd3e7 100644 --- a/libvpx/vp9/encoder/vp9_ratectrl.h +++ b/libvpx/vp9/encoder/vp9_ratectrl.h @@ -26,6 +26,7 @@ extern "C" { #define MIN_GF_INTERVAL 4 #define MAX_GF_INTERVAL 16 +#define ONEHALFONLY_RESIZE 0 typedef enum { INTER_NORMAL = 0, @@ -43,6 +44,20 @@ typedef enum { FRAME_SCALE_STEPS } FRAME_SCALE_LEVEL; +typedef enum { + NO_RESIZE = 0, + DOWN_THREEFOUR = 1, // From orig to 3/4. + DOWN_ONEHALF = 2, // From orig or 3/4 to 1/2. + UP_THREEFOUR = -1, // From 1/2 to 3/4. + UP_ORIG = -2, // From 1/2 or 3/4 to orig. +} RESIZE_ACTION; + +typedef enum { + ORIG = 0, + THREE_QUARTER = 1, + ONE_HALF = 2 +} RESIZE_STATE; + // Frame dimensions multiplier wrt the native frame size, in 1/16ths, // specified for the scale-up case. // e.g. 24 => 16/24 = 2/3 of native size. The restriction to 1/16th is diff --git a/libvpx/vp9/encoder/vp9_rd.c b/libvpx/vp9/encoder/vp9_rd.c index 2f2f7c1b..b085c7a0 100644 --- a/libvpx/vp9/encoder/vp9_rd.c +++ b/libvpx/vp9/encoder/vp9_rd.c @@ -14,6 +14,7 @@ #include "./vp9_rtcd.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/bitops.h" #include "vpx_ports/mem.h" @@ -172,7 +173,7 @@ int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { const GF_GROUP *const gf_group = &cpi->twopass.gf_group; const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index]; - const int boost_index = MIN(15, (cpi->rc.gfu_boost / 100)); + const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100)); rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7; rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7); @@ -204,7 +205,7 @@ static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) { q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; #endif // CONFIG_VP9_HIGHBITDEPTH // TODO(debargha): Adjust the function below. - return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8); + return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8); } void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) { @@ -404,7 +405,7 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, static const uint32_t MAX_XSQ_Q10 = 245727; const uint64_t xsq_q10_64 = (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var; - const int xsq_q10 = (int)MIN(xsq_q10_64, MAX_XSQ_Q10); + const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10); model_rd_norm(xsq_q10, &r_q10, &d_q10); *rate = ((r_q10 << n_log2) + 2) >> 2; *dist = (var * (int64_t)d_q10 + 512) >> 10; @@ -485,7 +486,7 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, continue; fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3; fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3; - max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3); + max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3); if (fp_row ==0 && fp_col == 0 && zero_seen) continue; @@ -629,16 +630,15 @@ void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES; int mode; for (mode = 0; mode < top_mode; ++mode) { - const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4); - const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64); + const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4); + const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64); BLOCK_SIZE bs; for (bs = min_size; bs <= max_size; ++bs) { int *const fact = &factor_buf[bs][mode]; if (mode == best_mode_index) { *fact -= (*fact >> 4); } else { - *fact = MIN(*fact + RD_THRESH_INC, - rd_thresh * RD_THRESH_MAX_FACT); + *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT); } } } diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c index 96c64744..4f3a06e9 100644 --- a/libvpx/vp9/encoder/vp9_rdopt.c +++ b/libvpx/vp9/encoder/vp9_rdopt.c @@ -14,6 +14,7 @@ #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" @@ -192,8 +193,8 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, const int64_t ac_thr = p->quant_thred[1] >> shift; // The low thresholds are used to measure if the prediction errors are // low enough so that we can skip the mode search. - const int64_t low_dc_thr = MIN(50, dc_thr >> 2); - const int64_t low_ac_thr = MIN(80, ac_thr >> 2); + const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2); + const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2); int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]); int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]); int idx, idy; @@ -268,57 +269,79 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, *out_dist_sum = dist_sum << 4; } -int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, - intptr_t block_size, int64_t *ssz) { +#if CONFIG_VP9_HIGHBITDEPTH +int64_t vp9_highbd_block_error_c(const tran_low_t *coeff, + const tran_low_t *dqcoeff, + intptr_t block_size, + int64_t *ssz, int bd) { int i; int64_t error = 0, sqcoeff = 0; + int shift = 2 * (bd - 8); + int rounding = shift > 0 ? 1 << (shift - 1) : 0; for (i = 0; i < block_size; i++) { - const int diff = coeff[i] - dqcoeff[i]; + const int64_t diff = coeff[i] - dqcoeff[i]; error += diff * diff; - sqcoeff += coeff[i] * coeff[i]; + sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i]; } + assert(error >= 0 && sqcoeff >= 0); + error = (error + rounding) >> shift; + sqcoeff = (sqcoeff + rounding) >> shift; *ssz = sqcoeff; return error; } -int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, - int block_size) { +int64_t vp9_highbd_block_error_8bit_c(const tran_low_t *coeff, + const tran_low_t *dqcoeff, + intptr_t block_size, + int64_t *ssz) { + // Note that the C versions of these 2 functions (vp9_block_error and + // vp9_highbd_block_error_8bit are the same, but the optimized assembly + // routines are not compatible in the non high bitdepth configuration, so + // they still cannot share the same name. + return vp9_block_error_c(coeff, dqcoeff, block_size, ssz); +} + +static int64_t vp9_highbd_block_error_dispatch(const tran_low_t *coeff, + const tran_low_t *dqcoeff, + intptr_t block_size, + int64_t *ssz, int bd) { + if (bd == 8) { + return vp9_highbd_block_error_8bit(coeff, dqcoeff, block_size, ssz); + } else { + return vp9_highbd_block_error(coeff, dqcoeff, block_size, ssz, bd); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, + intptr_t block_size, int64_t *ssz) { int i; - int64_t error = 0; + int64_t error = 0, sqcoeff = 0; for (i = 0; i < block_size; i++) { const int diff = coeff[i] - dqcoeff[i]; error += diff * diff; + sqcoeff += coeff[i] * coeff[i]; } + *ssz = sqcoeff; return error; } -#if CONFIG_VP9_HIGHBITDEPTH -int64_t vp9_highbd_block_error_c(const tran_low_t *coeff, - const tran_low_t *dqcoeff, - intptr_t block_size, - int64_t *ssz, int bd) { +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, + int block_size) { int i; - int64_t error = 0, sqcoeff = 0; - int shift = 2 * (bd - 8); - int rounding = shift > 0 ? 1 << (shift - 1) : 0; + int64_t error = 0; for (i = 0; i < block_size; i++) { - const int64_t diff = coeff[i] - dqcoeff[i]; + const int diff = coeff[i] - dqcoeff[i]; error += diff * diff; - sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i]; } - assert(error >= 0 && sqcoeff >= 0); - error = (error + rounding) >> shift; - sqcoeff = (sqcoeff + rounding) >> shift; - *ssz = sqcoeff; return error; } -#endif // CONFIG_VP9_HIGHBITDEPTH /* The trailing '0' is a terminator which is used inside cost_coeffs() to * decide whether to include cost of a trailing EOB node or not (i.e. we @@ -340,8 +363,7 @@ static int cost_coeffs(MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const struct macroblock_plane *p = &x->plane[plane]; - const struct macroblockd_plane *pd = &xd->plane[plane]; - const PLANE_TYPE type = pd->plane_type; + const PLANE_TYPE type = get_plane_type(plane); const int16_t *band_count = &band_counts[tx_size][1]; const int eob = p->eobs[block]; const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); @@ -357,8 +379,8 @@ static int cost_coeffs(MACROBLOCK *x, #endif // Check for consistency of tx_size with mode info - assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size - : get_uv_tx_size(mbmi, pd) == tx_size); + assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size : + get_uv_tx_size(mbmi, &xd->plane[plane]) == tx_size); if (eob == 0) { // single eob token @@ -430,8 +452,9 @@ static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); #if CONFIG_VP9_HIGHBITDEPTH const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8; - *out_dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, - &this_sse, bd) >> shift; + *out_dist = vp9_highbd_block_error_dispatch(coeff, dqcoeff, + 16 << ss_txfrm_size, + &this_sse, bd) >> shift; #else *out_dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >> shift; @@ -505,7 +528,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, if (tx_size != TX_32X32) dc_correct >>= 2; - dist = MAX(0, sse - dc_correct); + dist = VPXMAX(0, sse - dc_correct); } } else { // SKIP_TXFM_AC_DC @@ -531,7 +554,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse); // TODO(jingning): temporarily enabled only for luma component - rd = MIN(rd1, rd2); + rd = VPXMIN(rd1, rd2); if (plane == 0) x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] || (rd1 > rd2 && !xd->lossless); @@ -569,7 +592,7 @@ static void txfm_rd_in_plane(MACROBLOCK *x, vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); - args.so = get_scan(xd, tx_size, pd->plane_type, 0); + args.so = get_scan(xd, tx_size, get_plane_type(plane), 0); vp9_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm, &args); @@ -597,7 +620,7 @@ static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - mbmi->tx_size = MIN(max_tx_size, largest_tx_size); + mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size); txfm_rd_in_plane(x, rate, distortion, skip, sse, ref_best_rd, 0, bs, @@ -637,8 +660,8 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, start_tx = max_tx_size; end_tx = 0; } else { - TX_SIZE chosen_tx_size = MIN(max_tx_size, - tx_mode_to_biggest_tx_size[cm->tx_mode]); + TX_SIZE chosen_tx_size = VPXMIN(max_tx_size, + tx_mode_to_biggest_tx_size[cm->tx_mode]); start_tx = chosen_tx_size; end_tx = chosen_tx_size; } @@ -663,6 +686,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, } else if (s[n]) { if (is_inter_block(mbmi)) { rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); + r[n][1] -= r_tx_size; } else { rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]); @@ -672,6 +696,11 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } + if (is_inter_block(mbmi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) { + rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n])); + rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n])); + } + // Early termination in transform size search. if (cpi->sf.tx_size_search_breakout && (rd[n][1] == INT64_MAX || @@ -825,7 +854,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); - distortion += vp9_highbd_block_error( + distortion += vp9_highbd_block_error_dispatch( coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused, xd->bd) >> 2; if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) @@ -923,8 +952,13 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); +#if CONFIG_VP9_HIGHBITDEPTH + distortion += vp9_highbd_block_error_8bit( + coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused) >> 2; +#else distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused) >> 2; +#endif if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next; vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), @@ -1362,6 +1396,9 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, k = i; for (idy = 0; idy < height / 4; ++idy) { for (idx = 0; idx < width / 4; ++idx) { +#if CONFIG_VP9_HIGHBITDEPTH + const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8; +#endif int64_t ssz, rd, rd1, rd2; tran_low_t* coeff; @@ -1371,14 +1408,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, coeff, 8); vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan); #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - thisdistortion += vp9_highbd_block_error(coeff, - BLOCK_OFFSET(pd->dqcoeff, k), - 16, &ssz, xd->bd); - } else { - thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), - 16, &ssz); - } + thisdistortion += vp9_highbd_block_error_dispatch( + coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz, bd); #else thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz); @@ -1389,7 +1420,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, cpi->sf.use_fast_coef_costing); rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); - rd = MIN(rd1, rd2); + rd = VPXMIN(rd1, rd2); if (rd >= best_yrd) return INT64_MAX; } @@ -1808,7 +1839,8 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, if (i == 0) max_mv = x->max_mv_context[mbmi->ref_frame[0]]; else - max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; + max_mv = + VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { // Take wtd average of the step_params based on the last frame's @@ -1826,7 +1858,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->sf.adaptive_motion_search) { mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3; mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3; - step_param = MAX(step_param, 8); + step_param = VPXMAX(step_param, 8); } // adjust src pointer for this block @@ -2231,7 +2263,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, vp9_set_mv_search_range(x, &ref_mv); // Work out the size of the first step in the mv step search. - // 0 here is maximum length first step. 1 is MAX >> 1 etc. + // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc. if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { // Take wtd average of the step_params based on the last frame's // max mv magnitude and that based on the best ref mvs of the current @@ -2243,9 +2275,10 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) { - int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] - - MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); - step_param = MAX(step_param, boffset); + int boffset = + 2 * (b_width_log2_lookup[BLOCK_64X64] - + VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); + step_param = VPXMAX(step_param, boffset); } if (cpi->sf.adaptive_motion_search) { @@ -2466,7 +2499,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // motion field, where the distortion gain for a single block may not // be enough to overcome the cost of a new mv. if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) { - *rate2 += MAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); + *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); } else { *rate2 += rate_mv; } @@ -2502,10 +2535,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // initiation of a motion field. if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv, refs[0])) { - *rate2 += MIN(cost_mv_ref(cpi, this_mode, - mbmi_ext->mode_context[refs[0]]), - cost_mv_ref(cpi, NEARESTMV, - mbmi_ext->mode_context[refs[0]])); + *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode, + mbmi_ext->mode_context[refs[0]]), + cost_mv_ref(cpi, NEARESTMV, + mbmi_ext->mode_context[refs[0]])); } else { *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]); } @@ -2547,10 +2580,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum); filter_cache[i] = rd; filter_cache[SWITCHABLE_FILTERS] = - MIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); + VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); if (cm->interp_filter == SWITCHABLE) rd += rs_rd; - *mask_filter = MAX(*mask_filter, rd); + *mask_filter = VPXMAX(*mask_filter, rd); } else { int rate_sum = 0; int64_t dist_sum = 0; @@ -2580,10 +2613,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum); filter_cache[i] = rd; filter_cache[SWITCHABLE_FILTERS] = - MIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); + VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); if (cm->interp_filter == SWITCHABLE) rd += rs_rd; - *mask_filter = MAX(*mask_filter, rd); + *mask_filter = VPXMAX(*mask_filter, rd); if (i == 0 && intpel_mv) { tmp_rate_sum = rate_sum; @@ -2694,7 +2727,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *distortion += distortion_y; rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); - rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); + rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv, &sseuv, bsize, ref_best_rd - rdcosty)) { @@ -2759,7 +2792,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, pd[1].subsampling_x, pd[1].subsampling_y); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, - &dist_uv, &uv_skip, MAX(BLOCK_8X8, bsize), + &dist_uv, &uv_skip, VPXMAX(BLOCK_8X8, bsize), max_uv_tx_size); if (y_skip && uv_skip) { @@ -2826,12 +2859,12 @@ static void rd_variance_adjustment(VP9_COMP *cpi, // to a predictor with a low spatial complexity compared to the source. if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) && (source_variance > recon_variance)) { - var_factor = MIN(absvar_diff, MIN(VLOW_ADJ_MAX, var_error)); + var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error)); // A second possible case of interest is where the source variance // is very low and we wish to discourage false texture or motion trails. } else if ((source_variance < (LOW_VAR_THRESH >> 1)) && (recon_variance > source_variance)) { - var_factor = MIN(absvar_diff, MIN(VHIGH_ADJ_MAX, var_error)); + var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error)); } *this_rd += (*this_rd * var_factor) / 100; } @@ -2861,7 +2894,7 @@ int vp9_active_h_edge(VP9_COMP *cpi, int mi_row, int mi_step) { top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2); bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2); - bottom_edge = MAX(top_edge, bottom_edge); + bottom_edge = VPXMAX(top_edge, bottom_edge); } if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) || @@ -2888,7 +2921,7 @@ int vp9_active_v_edge(VP9_COMP *cpi, int mi_col, int mi_step) { left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2); right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2); - right_edge = MAX(left_edge, right_edge); + right_edge = VPXMAX(left_edge, right_edge); } if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) || @@ -3135,7 +3168,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, } if ((ref_frame_skip_mask[0] & (1 << ref_frame)) && - (ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame)))) + (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame)))) continue; if (mode_skip_mask[ref_frame] & (1 << this_mode)) @@ -3149,10 +3182,10 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, continue; if (sf->motion_field_mode_search) { - const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize], - tile_info->mi_col_end - mi_col); - const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize], - tile_info->mi_row_end - mi_row); + const int mi_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize], + tile_info->mi_col_end - mi_col); + const int mi_height = VPXMIN(num_8x8_blocks_high_lookup[bsize], + tile_info->mi_row_end - mi_row); const int bsl = mi_width_log2_lookup[bsize]; int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl) + get_chessboard_index(cm->current_video_frame)) & 0x1; @@ -3370,9 +3403,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, if (!disable_skip && ref_frame == INTRA_FRAME) { for (i = 0; i < REFERENCE_MODES; ++i) - best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); + best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) - best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); + best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd); } // Did this mode help.. i.e. is it the new best mode @@ -3471,7 +3504,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, adj_rd = filter_cache[i] - ref; adj_rd += this_rd; - best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); + best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd); } } } @@ -3783,6 +3816,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, int this_skip2 = 0; int64_t total_sse = INT_MAX; int early_term = 0; + struct buf_2d backup_yv12[2][MAX_MB_PLANE]; ref_frame = vp9_ref_order[ref_index].ref_frame[0]; second_ref_frame = vp9_ref_order[ref_index].ref_frame[1]; @@ -3814,7 +3848,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, } if ((ref_frame_skip_mask[0] & (1 << ref_frame)) && - (ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame)))) + (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame)))) continue; // Test best rd so far against threshold for trying this mode. @@ -3840,16 +3874,6 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, continue; } - // TODO(jingning, jkoleszar): scaling reference frame not supported for - // sub8x8 blocks. - if (ref_frame > INTRA_FRAME && - vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) - continue; - - if (second_ref_frame > INTRA_FRAME && - vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf)) - continue; - if (comp_pred) mode_excluded = cm->reference_mode == SINGLE_REFERENCE; else if (ref_frame != INTRA_FRAME) @@ -3928,6 +3952,25 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, int pred_exists = 0; int uv_skippable; + YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL}; + int ref; + + for (ref = 0; ref < 2; ++ref) { + scaled_ref_frame[ref] = mbmi->ref_frame[ref] > INTRA_FRAME ? + vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[ref]) : NULL; + + if (scaled_ref_frame[ref]) { + int i; + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + for (i = 0; i < MAX_MB_PLANE; i++) + backup_yv12[ref][i] = xd->plane[i].pre[ref]; + vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, + NULL); + } + } + this_rd_thresh = (ref_frame == LAST_FRAME) ? rd_opt->threshes[segment_id][bsize][THR_LAST] : rd_opt->threshes[segment_id][bsize][THR_ALTR]; @@ -3969,12 +4012,11 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); filter_cache[switchable_filter_index] = tmp_rd; filter_cache[SWITCHABLE_FILTERS] = - MIN(filter_cache[SWITCHABLE_FILTERS], - tmp_rd + rs_rd); + VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd); if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd; - mask_filter = MAX(mask_filter, tmp_rd); + mask_filter = VPXMAX(mask_filter, tmp_rd); newbest = (tmp_rd < tmp_best_rd); if (newbest) { @@ -4051,9 +4093,9 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); - tmp_best_rdu = best_rd - - MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2), - RDCOST(x->rdmult, x->rddiv, 0, total_sse)); + tmp_best_rdu = + best_rd - VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2), + RDCOST(x->rdmult, x->rddiv, 0, total_sse)); if (tmp_best_rdu > 0) { // If even the 'Y' rd value of split is higher than best so far @@ -4062,14 +4104,31 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, BLOCK_8X8); memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, - &uv_sse, BLOCK_8X8, tmp_best_rdu)) + &uv_sse, BLOCK_8X8, tmp_best_rdu)) { + for (ref = 0; ref < 2; ++ref) { + if (scaled_ref_frame[ref]) { + int i; + for (i = 0; i < MAX_MB_PLANE; ++i) + xd->plane[i].pre[ref] = backup_yv12[ref][i]; + } + } continue; + } rate2 += rate_uv; distortion2 += distortion_uv; skippable = skippable && uv_skippable; total_sse += uv_sse; } + + for (ref = 0; ref < 2; ++ref) { + if (scaled_ref_frame[ref]) { + // Restore the prediction frame pointers to their unscaled versions. + int i; + for (i = 0; i < MAX_MB_PLANE; ++i) + xd->plane[i].pre[ref] = backup_yv12[ref][i]; + } + } } if (cm->reference_mode == REFERENCE_MODE_SELECT) @@ -4113,9 +4172,9 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, if (!disable_skip && ref_frame == INTRA_FRAME) { for (i = 0; i < REFERENCE_MODES; ++i) - best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); + best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) - best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); + best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd); } // Did this mode help.. i.e. is it the new best mode @@ -4214,7 +4273,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, adj_rd = filter_cache[i] - ref; adj_rd += this_rd; - best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); + best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd); } } diff --git a/libvpx/vp9/encoder/vp9_resize.h b/libvpx/vp9/encoder/vp9_resize.h index 067af53f..b5feb386 100644 --- a/libvpx/vp9/encoder/vp9_resize.h +++ b/libvpx/vp9/encoder/vp9_resize.h @@ -14,6 +14,10 @@ #include <stdio.h> #include "vpx/vpx_integer.h" +#ifdef __cplusplus +extern "C" { +#endif + void vp9_resize_plane(const uint8_t *const input, int height, int width, @@ -121,4 +125,9 @@ void vp9_highbd_resize_frame444(const uint8_t *const y, int owidth, int bd); #endif // CONFIG_VP9_HIGHBITDEPTH + +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_RESIZE_H_ diff --git a/libvpx/vp9/encoder/vp9_skin_detection.c b/libvpx/vp9/encoder/vp9_skin_detection.c index aaa8ea07..c2763b7d 100644 --- a/libvpx/vp9/encoder/vp9_skin_detection.c +++ b/libvpx/vp9/encoder/vp9_skin_detection.c @@ -98,12 +98,13 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) { uint8_t ysource4 = src_y[(ypos + 1) * src_ystride + (ypos + 1)]; uint8_t usource4 = src_u[(uvpos + 1) * src_uvstride + (uvpos + 1)]; uint8_t vsource4 = src_v[(uvpos + 1) * src_uvstride + (uvpos + 1)]; + int is_skin = 0; if (mode_filter == 1) { ysource = (ysource + ysource2 + ysource3 + ysource4) >> 2; usource = (usource + usource2 + usource3 + usource4) >> 2; vsource = (vsource + vsource2 + vsource3 + vsource4) >> 2; } - const int is_skin = vp9_skin_pixel(ysource, usource, vsource); + is_skin = vp9_skin_pixel(ysource, usource, vsource); for (i = 0; i < y_bsize; i++) { for (j = 0; j < y_bsize; j++) { if (is_skin) diff --git a/libvpx/vp9/encoder/vp9_skin_detection.h b/libvpx/vp9/encoder/vp9_skin_detection.h index 3d4e7375..0a87ef9f 100644 --- a/libvpx/vp9/encoder/vp9_skin_detection.h +++ b/libvpx/vp9/encoder/vp9_skin_detection.h @@ -25,7 +25,8 @@ int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr); #ifdef OUTPUT_YUV_SKINMAP // For viewing skin map on input source. -void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file); +void vp9_compute_skin_map(struct VP9_COMP *const cpi, FILE *yuv_skinmap_file); +extern void vp9_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f); #endif #ifdef __cplusplus diff --git a/libvpx/vp9/encoder/vp9_speed_features.c b/libvpx/vp9/encoder/vp9_speed_features.c index 5e72c4cb..a5396298 100644 --- a/libvpx/vp9/encoder/vp9_speed_features.c +++ b/libvpx/vp9/encoder/vp9_speed_features.c @@ -13,6 +13,7 @@ #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_rdopt.h" +#include "vpx_dsp/vpx_dsp_common.h" // Intra only frames, golden frames (except alt ref overlays) and @@ -49,7 +50,7 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, VP9_COMMON *const cm = &cpi->common; if (speed >= 1) { - if (MIN(cm->width, cm->height) >= 720) { + if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->partition_search_breakout_dist_thr = (1 << 23); @@ -60,7 +61,7 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, } if (speed >= 2) { - if (MIN(cm->width, cm->height) >= 720) { + if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->adaptive_pred_interp_filter = 0; @@ -75,7 +76,7 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, } if (speed >= 3) { - if (MIN(cm->width, cm->height) >= 720) { + if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0; sf->partition_search_breakout_dist_thr = (1 << 25); @@ -99,7 +100,7 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, } if (speed >= 4) { - if (MIN(cm->width, cm->height) >= 720) { + if (VPXMIN(cm->width, cm->height) >= 720) { sf->partition_search_breakout_dist_thr = (1 << 26); } else { sf->partition_search_breakout_dist_thr = (1 << 24); @@ -112,8 +113,14 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, SPEED_FEATURES *sf, int speed) { const int boosted = frame_is_boosted(cpi); + sf->partition_search_breakout_dist_thr = (1 << 20); + sf->partition_search_breakout_rate_thr = 80; + sf->tx_size_search_breakout = 1; sf->adaptive_rd_thresh = 1; sf->allow_skip_recode = 1; + sf->less_rectangular_check = 1; + sf->use_square_partition_only = !frame_is_boosted(cpi); + sf->use_square_only_threshold = BLOCK_16X16; if (speed >= 1) { if ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) || @@ -122,6 +129,7 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, } else { sf->use_square_partition_only = !frame_is_intra_only(cm); } + sf->use_square_only_threshold = BLOCK_4X4; sf->less_rectangular_check = 1; @@ -138,9 +146,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - - sf->tx_size_search_breakout = 1; - sf->partition_search_breakout_rate_thr = 80; } if (speed >= 2) { @@ -215,7 +220,7 @@ static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi, VP9_COMMON *const cm = &cpi->common; if (speed >= 1) { - if (MIN(cm->width, cm->height) >= 720) { + if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; } else { @@ -224,7 +229,7 @@ static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi, } if (speed >= 2) { - if (MIN(cm->width, cm->height) >= 720) { + if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; } else { @@ -233,7 +238,7 @@ static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi, } if (speed >= 5) { - if (MIN(cm->width, cm->height) >= 720) { + if (VPXMIN(cm->width, cm->height) >= 720) { sf->partition_search_breakout_dist_thr = (1 << 25); } else { sf->partition_search_breakout_dist_thr = (1 << 23); @@ -241,7 +246,7 @@ static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi, } if (speed >= 7) { - sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ? + sf->encode_breakout_thresh = (VPXMIN(cm->width, cm->height) >= 720) ? 800 : 300; } } @@ -381,7 +386,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, } if (speed >= 6) { - // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION. sf->partition_search_type = VAR_BASED_PARTITION; // Turn on this to use non-RD key frame coding mode. sf->use_nonrd_pick_mode = 1; @@ -471,6 +475,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->partition_search_type = SEARCH_PARTITION; sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; + sf->use_square_only_threshold = BLOCK_SIZES; sf->auto_min_max_partition_size = NOT_IN_USE; sf->rd_auto_partition_min_limit = BLOCK_4X4; sf->default_max_partition_size = BLOCK_64X64; diff --git a/libvpx/vp9/encoder/vp9_speed_features.h b/libvpx/vp9/encoder/vp9_speed_features.h index 95038cee..575e98cf 100644 --- a/libvpx/vp9/encoder/vp9_speed_features.h +++ b/libvpx/vp9/encoder/vp9_speed_features.h @@ -267,6 +267,7 @@ typedef struct SPEED_FEATURES { // Disable testing non square partitions. (eg 16x32) int use_square_partition_only; + BLOCK_SIZE use_square_only_threshold; // Sets min and max partition sizes for this 64x64 region based on the // same 64x64 in last encoded frame, and the left and above neighbor. diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.c b/libvpx/vp9/encoder/vp9_svc_layercontext.c index e69404ad..8a6818c8 100644 --- a/libvpx/vp9/encoder/vp9_svc_layercontext.c +++ b/libvpx/vp9/encoder/vp9_svc_layercontext.c @@ -10,9 +10,11 @@ #include <math.h> +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_extend.h" +#include "vpx_dsp/vpx_dsp_common.h" #define SMALL_FRAME_FB_IDX 7 #define SMALL_FRAME_WIDTH 32 @@ -21,11 +23,14 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; + int mi_rows = cpi->common.mi_rows; + int mi_cols = cpi->common.mi_cols; int sl, tl; int alt_ref_idx = svc->number_spatial_layers; svc->spatial_layer_id = 0; svc->temporal_layer_id = 0; + svc->first_spatial_layer_to_encode = 0; if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img, @@ -93,6 +98,26 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { lrc->buffer_level = oxcf->starting_buffer_level_ms * lc->target_bandwidth / 1000; lrc->bits_off_target = lrc->buffer_level; + + // Initialize the cyclic refresh parameters. If spatial layers are used + // (i.e., ss_number_layers > 1), these need to be updated per spatial + // layer. + // Cyclic refresh is only applied on base temporal layer. + if (oxcf->ss_number_layers > 1 && + tl == 0) { + size_t last_coded_q_map_size; + size_t consec_zero_mv_size; + lc->sb_index = 0; + lc->map = vpx_malloc(mi_rows * mi_cols * sizeof(signed char)); + memset(lc->map, 0, mi_rows * mi_cols); + last_coded_q_map_size = mi_rows * mi_cols * sizeof(uint8_t); + lc->last_coded_q_map = vpx_malloc(last_coded_q_map_size); + assert(MAXQ <= 255); + memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size); + consec_zero_mv_size = mi_rows * mi_cols * sizeof(uint8_t); + lc->consec_zero_mv = vpx_malloc(consec_zero_mv_size); + memset(lc->consec_zero_mv, 0, consec_zero_mv_size); + } } } @@ -113,8 +138,6 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { - spatial_layer_target = 0; - for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers); svc->layer_context[layer].target_bandwidth = @@ -141,8 +164,8 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, lrc->maximum_buffer_size = (int64_t)(rc->maximum_buffer_size * bitrate_alloc); lrc->bits_off_target = - MIN(lrc->bits_off_target, lrc->maximum_buffer_size); - lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); + VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size); + lrc->buffer_level = VPXMIN(lrc->buffer_level, lrc->maximum_buffer_size); lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl]; lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = rc->max_frame_bandwidth; @@ -173,9 +196,9 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, (int64_t)(rc->optimal_buffer_level * bitrate_alloc); lrc->maximum_buffer_size = (int64_t)(rc->maximum_buffer_size * bitrate_alloc); - lrc->bits_off_target = MIN(lrc->bits_off_target, - lrc->maximum_buffer_size); - lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); + lrc->bits_off_target = VPXMIN(lrc->bits_off_target, + lrc->maximum_buffer_size); + lrc->buffer_level = VPXMIN(lrc->buffer_level, lrc->maximum_buffer_size); // Update framerate-related quantities. if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; @@ -258,6 +281,24 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { cpi->rc.frames_since_key = old_frame_since_key; cpi->rc.frames_to_key = old_frame_to_key; } + + // For spatial-svc, allow cyclic-refresh to be applied on the spatial layers, + // for the base temporal layer. + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && + cpi->svc.number_spatial_layers > 1 && + cpi->svc.temporal_layer_id == 0) { + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + signed char *temp = cr->map; + uint8_t *temp2 = cr->last_coded_q_map; + uint8_t *temp3 = cr->consec_zero_mv; + cr->map = lc->map; + lc->map = temp; + cr->last_coded_q_map = lc->last_coded_q_map; + lc->last_coded_q_map = temp2; + cr->consec_zero_mv = lc->consec_zero_mv; + lc->consec_zero_mv = temp3; + cr->sb_index = lc->sb_index; + } } void vp9_save_layer_context(VP9_COMP *const cpi) { @@ -268,6 +309,24 @@ void vp9_save_layer_context(VP9_COMP *const cpi) { lc->twopass = cpi->twopass; lc->target_bandwidth = (int)oxcf->target_bandwidth; lc->alt_ref_source = cpi->alt_ref_source; + + // For spatial-svc, allow cyclic-refresh to be applied on the spatial layers, + // for the base temporal layer. + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && + cpi->svc.number_spatial_layers > 1 && + cpi->svc.temporal_layer_id == 0) { + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + signed char *temp = lc->map; + uint8_t *temp2 = lc->last_coded_q_map; + uint8_t *temp3 = lc->consec_zero_mv; + lc->map = cr->map; + cr->map = temp; + lc->last_coded_q_map = cr->last_coded_q_map; + cr->last_coded_q_map = temp2; + lc->consec_zero_mv = cr->consec_zero_mv; + cr->consec_zero_mv = temp3; + lc->sb_index = cr->sb_index; + } } void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { @@ -492,19 +551,35 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { set_flags_and_fb_idx_for_temporal_mode2(cpi); } else if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { - // VP9E_TEMPORAL_LAYERING_MODE_BYPASS : - // if the code goes here, it means the encoder will be relying on the - // flags from outside for layering. - // However, since when spatial+temporal layering is used, the buffer indices - // cannot be derived automatically, the bypass mode will only work when the - // number of spatial layers equals 1. - assert(cpi->svc.number_spatial_layers == 1); + // In the BYPASS/flexible mode, the encoder is relying on the application + // to specify, for each spatial layer, the flags and buffer indices for the + // layering. + // Note that the check (cpi->ext_refresh_frame_flags_pending == 0) is + // needed to support the case where the frame flags may be passed in via + // vpx_codec_encode(), which can be used for the temporal-only svc case. + if (cpi->ext_refresh_frame_flags_pending == 0) { + int sl; + cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; + sl = cpi->svc.spatial_layer_id; + vp9_apply_encoding_flags(cpi, cpi->svc.ext_frame_flags[sl]); + cpi->lst_fb_idx = cpi->svc.ext_lst_fb_idx[sl]; + cpi->gld_fb_idx = cpi->svc.ext_gld_fb_idx[sl]; + cpi->alt_fb_idx = cpi->svc.ext_alt_fb_idx[sl]; + } } lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id]; + // Setting the worst/best_quality via the encoder control: SET_SVC_PARAMETERS, + // only for non-BYPASS mode for now. + if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + RATE_CONTROL *const lrc = &lc->rc; + lrc->worst_quality = vp9_quantizer_to_qindex(lc->max_q); + lrc->best_quality = vp9_quantizer_to_qindex(lc->min_q); + } + get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height, lc->scaling_factor_num, lc->scaling_factor_den, &width, &height); @@ -643,3 +718,21 @@ struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi, } return buf; } + +void vp9_free_svc_cyclic_refresh(VP9_COMP *const cpi) { + int sl, tl; + SVC *const svc = &cpi->svc; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; + for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { + for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { + int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + if (lc->map) + vpx_free(lc->map); + if (lc->last_coded_q_map) + vpx_free(lc->last_coded_q_map); + if (lc->consec_zero_mv) + vpx_free(lc->consec_zero_mv); + } + } +} diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.h b/libvpx/vp9/encoder/vp9_svc_layercontext.h index b6a5ea54..694b5abd 100644 --- a/libvpx/vp9/encoder/vp9_svc_layercontext.h +++ b/libvpx/vp9/encoder/vp9_svc_layercontext.h @@ -41,6 +41,11 @@ typedef struct { int has_alt_frame; size_t layer_size; struct vpx_psnr_pkt psnr_pkt; + // Cyclic refresh parameters (aq-mode=3), that need to be updated per-frame. + int sb_index; + signed char *map; + uint8_t *last_coded_q_map; + uint8_t *consec_zero_mv; } LAYER_CONTEXT; typedef struct { @@ -50,6 +55,7 @@ typedef struct { int number_temporal_layers; int spatial_layer_to_encode; + int first_spatial_layer_to_encode; // Workaround for multiple frame contexts enum { @@ -70,6 +76,12 @@ typedef struct { // Indicates what sort of temporal layering is used. // Currently, this only works for CBR mode. VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode; + // Frame flags and buffer indexes for each spatial layer, set by the + // application (external settings). + int ext_frame_flags[VPX_MAX_LAYERS]; + int ext_lst_fb_idx[VPX_MAX_LAYERS]; + int ext_gld_fb_idx[VPX_MAX_LAYERS]; + int ext_alt_fb_idx[VPX_MAX_LAYERS]; } SVC; struct VP9_COMP; @@ -115,6 +127,8 @@ int vp9_svc_start_frame(struct VP9_COMP *const cpi); int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi); +void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_temporal_filter.c b/libvpx/vp9/encoder/vp9_temporal_filter.c index 439eac6b..16f9c857 100644 --- a/libvpx/vp9/encoder/vp9_temporal_filter.c +++ b/libvpx/vp9/encoder/vp9_temporal_filter.c @@ -23,6 +23,7 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_temporal_filter.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/vpx_timer.h" @@ -216,7 +217,8 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, int stride) { MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; - const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; + MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; + const SEARCH_METHODS old_search_method = mv_sf->search_method; int step_param; int sadpb = x->sadperbit16; int bestsme = INT_MAX; @@ -242,12 +244,13 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, xd->plane[0].pre[0].stride = stride; step_param = mv_sf->reduce_first_step_size; - step_param = MIN(step_param, MAX_MVSEARCH_STEPS - 2); + step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); - // Ignore mv costing by sending NULL pointer instead of cost arrays - vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1, - cond_cost_list(cpi, cost_list), - &cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1, ref_mv); + mv_sf->search_method = HEX; + vp9_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param, + sadpb, cond_cost_list(cpi, cost_list), &best_ref_mv1, + ref_mv, 0, 0); + mv_sf->search_method = old_search_method; // Ignore mv costing by sending NULL pointer instead of cost array bestsme = cpi->find_fractional_mv_step(x, ref_mv, @@ -718,7 +721,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) { "Failed to reallocate alt_ref_buffer"); } frames[frame] = vp9_scale_if_required( - cm, frames[frame], &cpi->svc.scaled_frames[frame_used]); + cm, frames[frame], &cpi->svc.scaled_frames[frame_used], 0); ++frame_used; } } diff --git a/libvpx/vp9/encoder/vp9_tokenize.c b/libvpx/vp9/encoder/vp9_tokenize.c index 85cb2fce..6076e2a6 100644 --- a/libvpx/vp9/encoder/vp9_tokenize.c +++ b/libvpx/vp9/encoder/vp9_tokenize.c @@ -66,14 +66,6 @@ const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = { -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 10 = CAT_FIVE }; -static const vpx_tree_index cat1[2] = {0, 0}; -static const vpx_tree_index cat2[4] = {2, 2, 0, 0}; -static const vpx_tree_index cat3[6] = {2, 2, 4, 4, 0, 0}; -static const vpx_tree_index cat4[8] = {2, 2, 4, 4, 6, 6, 0, 0}; -static const vpx_tree_index cat5[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0}; -static const vpx_tree_index cat6[28] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, - 14, 14, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 0, 0}; - static const int16_t zero_cost[] = {0}; static const int16_t one_cost[] = {255, 257}; static const int16_t two_cost[] = {255, 257}; @@ -366,68 +358,49 @@ const int16_t vp9_cat6_high12_high_cost[2048] = { }; #endif -#if CONFIG_VP9_HIGHBITDEPTH -static const vpx_tree_index cat1_high10[2] = {0, 0}; -static const vpx_tree_index cat2_high10[4] = {2, 2, 0, 0}; -static const vpx_tree_index cat3_high10[6] = {2, 2, 4, 4, 0, 0}; -static const vpx_tree_index cat4_high10[8] = {2, 2, 4, 4, 6, 6, 0, 0}; -static const vpx_tree_index cat5_high10[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0}; -static const vpx_tree_index cat6_high10[32] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10, - 12, 12, 14, 14, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 28, 28, - 30, 30, 0, 0}; -static const vpx_tree_index cat1_high12[2] = {0, 0}; -static const vpx_tree_index cat2_high12[4] = {2, 2, 0, 0}; -static const vpx_tree_index cat3_high12[6] = {2, 2, 4, 4, 0, 0}; -static const vpx_tree_index cat4_high12[8] = {2, 2, 4, 4, 6, 6, 0, 0}; -static const vpx_tree_index cat5_high12[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0}; -static const vpx_tree_index cat6_high12[36] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10, - 12, 12, 14, 14, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 28, 28, - 30, 30, 32, 32, 34, 34, 0, 0}; -#endif - const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS] = { - {0, 0, 0, 0, zero_cost}, // ZERO_TOKEN - {0, 0, 0, 1, one_cost}, // ONE_TOKEN - {0, 0, 0, 2, two_cost}, // TWO_TOKEN - {0, 0, 0, 3, three_cost}, // THREE_TOKEN - {0, 0, 0, 4, four_cost}, // FOUR_TOKEN - {cat1, vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CATEGORY1_TOKEN - {cat2, vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CATEGORY2_TOKEN - {cat3, vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CATEGORY3_TOKEN - {cat4, vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CATEGORY4_TOKEN - {cat5, vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CATEGORY5_TOKEN - {cat6, vp9_cat6_prob, 14, CAT6_MIN_VAL, 0}, // CATEGORY6_TOKEN - {0, 0, 0, 0, zero_cost} // EOB_TOKEN + {0, 0, 0, zero_cost}, // ZERO_TOKEN + {0, 0, 1, one_cost}, // ONE_TOKEN + {0, 0, 2, two_cost}, // TWO_TOKEN + {0, 0, 3, three_cost}, // THREE_TOKEN + {0, 0, 4, four_cost}, // FOUR_TOKEN + {vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CATEGORY1_TOKEN + {vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CATEGORY2_TOKEN + {vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CATEGORY3_TOKEN + {vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CATEGORY4_TOKEN + {vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CATEGORY5_TOKEN + {vp9_cat6_prob, 14, CAT6_MIN_VAL, 0}, // CATEGORY6_TOKEN + {0, 0, 0, zero_cost} // EOB_TOKEN }; #if CONFIG_VP9_HIGHBITDEPTH const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS] = { - {0, 0, 0, 0, zero_cost}, // ZERO - {0, 0, 0, 1, one_cost}, // ONE - {0, 0, 0, 2, two_cost}, // TWO - {0, 0, 0, 3, three_cost}, // THREE - {0, 0, 0, 4, four_cost}, // FOUR - {cat1_high10, vp9_cat1_prob_high10, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1 - {cat2_high10, vp9_cat2_prob_high10, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2 - {cat3_high10, vp9_cat3_prob_high10, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3 - {cat4_high10, vp9_cat4_prob_high10, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4 - {cat5_high10, vp9_cat5_prob_high10, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5 - {cat6_high10, vp9_cat6_prob_high10, 16, CAT6_MIN_VAL, 0}, // CAT6 - {0, 0, 0, 0, zero_cost} // EOB + {0, 0, 0, zero_cost}, // ZERO + {0, 0, 1, one_cost}, // ONE + {0, 0, 2, two_cost}, // TWO + {0, 0, 3, three_cost}, // THREE + {0, 0, 4, four_cost}, // FOUR + {vp9_cat1_prob_high10, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1 + {vp9_cat2_prob_high10, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2 + {vp9_cat3_prob_high10, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3 + {vp9_cat4_prob_high10, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4 + {vp9_cat5_prob_high10, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5 + {vp9_cat6_prob_high10, 16, CAT6_MIN_VAL, 0}, // CAT6 + {0, 0, 0, zero_cost} // EOB }; const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS] = { - {0, 0, 0, 0, zero_cost}, // ZERO - {0, 0, 0, 1, one_cost}, // ONE - {0, 0, 0, 2, two_cost}, // TWO - {0, 0, 0, 3, three_cost}, // THREE - {0, 0, 0, 4, four_cost}, // FOUR - {cat1_high12, vp9_cat1_prob_high12, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1 - {cat2_high12, vp9_cat2_prob_high12, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2 - {cat3_high12, vp9_cat3_prob_high12, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3 - {cat4_high12, vp9_cat4_prob_high12, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4 - {cat5_high12, vp9_cat5_prob_high12, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5 - {cat6_high12, vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0}, // CAT6 - {0, 0, 0, 0, zero_cost} // EOB + {0, 0, 0, zero_cost}, // ZERO + {0, 0, 1, one_cost}, // ONE + {0, 0, 2, two_cost}, // TWO + {0, 0, 3, three_cost}, // THREE + {0, 0, 4, four_cost}, // FOUR + {vp9_cat1_prob_high12, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1 + {vp9_cat2_prob_high12, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2 + {vp9_cat3_prob_high12, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3 + {vp9_cat4_prob_high12, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4 + {vp9_cat5_prob_high12, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5 + {vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0}, // CAT6 + {0, 0, 0, zero_cost} // EOB }; #endif @@ -503,7 +476,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, int c; TOKENEXTRA *t = *tp; /* store tokens starting here */ int eob = p->eobs[block]; - const PLANE_TYPE type = pd->plane_type; + const PLANE_TYPE type = get_plane_type(plane); const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); const int segment_id = mbmi->segment_id; const int16_t *scan, *nb; diff --git a/libvpx/vp9/encoder/vp9_tokenize.h b/libvpx/vp9/encoder/vp9_tokenize.h index 11b78ba3..c0f09c7b 100644 --- a/libvpx/vp9/encoder/vp9_tokenize.h +++ b/libvpx/vp9/encoder/vp9_tokenize.h @@ -54,6 +54,20 @@ struct ThreadData; void vp9_tokenize_sb(struct VP9_COMP *cpi, struct ThreadData *td, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize); +typedef struct { + const vpx_prob *prob; + int len; + int base_val; + const int16_t *cost; +} vp9_extra_bit; + +// indexed by token value +extern const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS]; +#if CONFIG_VP9_HIGHBITDEPTH +extern const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS]; +extern const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS]; +#endif // CONFIG_VP9_HIGHBITDEPTH + extern const int16_t *vp9_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to * improve cache locality, since it's needed for costing when the rest of the diff --git a/libvpx/vp9/encoder/x86/vp9_highbd_error_avx.asm b/libvpx/vp9/encoder/x86/vp9_highbd_error_avx.asm new file mode 100644 index 00000000..e476323e --- /dev/null +++ b/libvpx/vp9/encoder/x86/vp9_highbd_error_avx.asm @@ -0,0 +1,261 @@ +; +; Copyright (c) 2015 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%define private_prefix vp9 + +%include "third_party/x86inc/x86inc.asm" + +SECTION .text +ALIGN 16 + +; +; int64_t vp9_highbd_block_error_8bit(int32_t *coeff, int32_t *dqcoeff, +; intptr_t block_size, int64_t *ssz) +; + +INIT_XMM avx +cglobal highbd_block_error_8bit, 4, 5, 8, uqc, dqc, size, ssz + vzeroupper + + ; If only one iteration is required, then handle this as a special case. + ; It is the most frequent case, so we can have a significant gain here + ; by not setting up a loop and accumulators. + cmp sizeq, 16 + jne .generic + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; Common case of size == 16 + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Load input vectors + mova xm0, [dqcq] + packssdw xm0, [dqcq+16] + mova xm2, [uqcq] + packssdw xm2, [uqcq+16] + + mova xm1, [dqcq+32] + packssdw xm1, [dqcq+48] + mova xm3, [uqcq+32] + packssdw xm3, [uqcq+48] + + ; Compute the errors. + psubw xm0, xm2 + psubw xm1, xm3 + + ; Individual errors are max 15bit+sign, so squares are 30bit, and + ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit). + pmaddwd xm2, xm2 + pmaddwd xm3, xm3 + + pmaddwd xm0, xm0 + pmaddwd xm1, xm1 + + ; Squares are always positive, so we can use unsigned arithmetic after + ; squaring. As mentioned earlier 2 sums fit in 31 bits, so 4 sums will + ; fit in 32bits + paddd xm2, xm3 + paddd xm0, xm1 + + ; Accumulate horizontally in 64 bits, there is no chance of overflow here + pxor xm5, xm5 + + pblendw xm3, xm5, xm2, 0x33 ; Zero extended low of a pair of 32 bits + psrlq xm2, 32 ; Zero extended high of a pair of 32 bits + + pblendw xm1, xm5, xm0, 0x33 ; Zero extended low of a pair of 32 bits + psrlq xm0, 32 ; Zero extended high of a pair of 32 bits + + paddq xm2, xm3 + paddq xm0, xm1 + + psrldq xm3, xm2, 8 + psrldq xm1, xm0, 8 + + paddq xm2, xm3 + paddq xm0, xm1 + + ; Store the return value +%if ARCH_X86_64 + movq rax, xm0 + movq [sszq], xm2 +%else + movd eax, xm0 + pextrd edx, xm0, 1 + movq [sszd], xm2 +%endif + RET + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; Generic case of size != 16, speculative low precision + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ALIGN 16 +.generic: + pxor xm4, xm4 ; sse accumulator + pxor xm5, xm5 ; overflow detection register for xm4 + pxor xm6, xm6 ; ssz accumulator + pxor xm7, xm7 ; overflow detection register for xm6 + lea uqcq, [uqcq+sizeq*4] + lea dqcq, [dqcq+sizeq*4] + neg sizeq + + ; Push the negative size as the high precision code might need it + push sizeq + +.loop: + ; Load input vectors + mova xm0, [dqcq+sizeq*4] + packssdw xm0, [dqcq+sizeq*4+16] + mova xm2, [uqcq+sizeq*4] + packssdw xm2, [uqcq+sizeq*4+16] + + mova xm1, [dqcq+sizeq*4+32] + packssdw xm1, [dqcq+sizeq*4+48] + mova xm3, [uqcq+sizeq*4+32] + packssdw xm3, [uqcq+sizeq*4+48] + + add sizeq, 16 + + ; Compute the squared errors. + ; Individual errors are max 15bit+sign, so squares are 30bit, and + ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit). + psubw xm0, xm2 + pmaddwd xm2, xm2 + pmaddwd xm0, xm0 + + psubw xm1, xm3 + pmaddwd xm3, xm3 + pmaddwd xm1, xm1 + + ; Squares are always positive, so we can use unsigned arithmetic after + ; squaring. As mentioned earlier 2 sums fit in 31 bits, so 4 sums will + ; fit in 32bits + paddd xm2, xm3 + paddd xm0, xm1 + + ; We accumulate using 32 bit arithmetic, but detect potential overflow + ; by checking if the MSB of the accumulators have ever been a set bit. + ; If yes, we redo the whole compute at the end on higher precision, but + ; this happens extremely rarely, so we still achieve a net gain. + paddd xm4, xm0 + paddd xm6, xm2 + por xm5, xm4 ; OR in the accumulator for overflow detection + por xm7, xm6 ; OR in the accumulator for overflow detection + + jnz .loop + + ; Add pairs horizontally (still only on 32 bits) + phaddd xm4, xm4 + por xm5, xm4 ; OR in the accumulator for overflow detection + phaddd xm6, xm6 + por xm7, xm6 ; OR in the accumulator for overflow detection + + ; Check for possibility of overflow by testing if bit 32 of each dword lane + ; have ever been set. If they were not, then there was no overflow and the + ; final sum will fit in 32 bits. If overflow happened, then + ; we redo the whole computation on higher precision. + por xm7, xm5 + pmovmskb r4, xm7 + test r4, 0x8888 + jnz .highprec + + phaddd xm4, xm4 + phaddd xm6, xm6 + pmovzxdq xm4, xm4 + pmovzxdq xm6, xm6 + + ; Restore stack + pop sizeq + + ; Store the return value +%if ARCH_X86_64 + movq rax, xm4 + movq [sszq], xm6 +%else + movd eax, xm4 + pextrd edx, xm4, 1 + movq [sszd], xm6 +%endif + RET + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; Generic case of size != 16, high precision case + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +.highprec: + pxor xm4, xm4 ; sse accumulator + pxor xm5, xm5 ; dedicated zero register + pxor xm6, xm6 ; ssz accumulator + pop sizeq + +.loophp: + mova xm0, [dqcq+sizeq*4] + packssdw xm0, [dqcq+sizeq*4+16] + mova xm2, [uqcq+sizeq*4] + packssdw xm2, [uqcq+sizeq*4+16] + + mova xm1, [dqcq+sizeq*4+32] + packssdw xm1, [dqcq+sizeq*4+48] + mova xm3, [uqcq+sizeq*4+32] + packssdw xm3, [uqcq+sizeq*4+48] + + add sizeq, 16 + + ; individual errors are max. 15bit+sign, so squares are 30bit, and + ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit) + + psubw xm0, xm2 + pmaddwd xm2, xm2 + pmaddwd xm0, xm0 + + psubw xm1, xm3 + pmaddwd xm3, xm3 + pmaddwd xm1, xm1 + + ; accumulate in 64bit + punpckldq xm7, xm0, xm5 + punpckhdq xm0, xm5 + paddq xm4, xm7 + + punpckldq xm7, xm2, xm5 + punpckhdq xm2, xm5 + paddq xm6, xm7 + + punpckldq xm7, xm1, xm5 + punpckhdq xm1, xm5 + paddq xm4, xm7 + + punpckldq xm7, xm3, xm5 + punpckhdq xm3, xm5 + paddq xm6, xm7 + + paddq xm4, xm0 + paddq xm4, xm1 + paddq xm6, xm2 + paddq xm6, xm3 + + jnz .loophp + + ; Accumulate horizontally + movhlps xm5, xm4 + movhlps xm7, xm6 + paddq xm4, xm5 + paddq xm6, xm7 + + ; Store the return value +%if ARCH_X86_64 + movq rax, xm4 + movq [sszq], xm6 +%else + movd eax, xm4 + pextrd edx, xm4, 1 + movq [sszd], xm6 +%endif + RET + +END diff --git a/libvpx/vp9/encoder/x86/vp9_highbd_error_sse2.asm b/libvpx/vp9/encoder/x86/vp9_highbd_error_sse2.asm new file mode 100644 index 00000000..f3b8f019 --- /dev/null +++ b/libvpx/vp9/encoder/x86/vp9_highbd_error_sse2.asm @@ -0,0 +1,98 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%define private_prefix vp9 + +%include "third_party/x86inc/x86inc.asm" + +SECTION .text +ALIGN 16 + +; +; int64_t vp9_highbd_block_error_8bit(int32_t *coeff, int32_t *dqcoeff, +; intptr_t block_size, int64_t *ssz) +; + +INIT_XMM sse2 +cglobal highbd_block_error_8bit, 3, 3, 8, uqc, dqc, size, ssz + pxor m4, m4 ; sse accumulator + pxor m6, m6 ; ssz accumulator + pxor m5, m5 ; dedicated zero register + lea uqcq, [uqcq+sizeq*4] + lea dqcq, [dqcq+sizeq*4] + neg sizeq + + ALIGN 16 + +.loop: + mova m0, [dqcq+sizeq*4] + packssdw m0, [dqcq+sizeq*4+mmsize] + mova m2, [uqcq+sizeq*4] + packssdw m2, [uqcq+sizeq*4+mmsize] + + mova m1, [dqcq+sizeq*4+mmsize*2] + packssdw m1, [dqcq+sizeq*4+mmsize*3] + mova m3, [uqcq+sizeq*4+mmsize*2] + packssdw m3, [uqcq+sizeq*4+mmsize*3] + + add sizeq, mmsize + + ; individual errors are max. 15bit+sign, so squares are 30bit, and + ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit) + + psubw m0, m2 + pmaddwd m2, m2 + pmaddwd m0, m0 + + psubw m1, m3 + pmaddwd m3, m3 + pmaddwd m1, m1 + + ; accumulate in 64bit + punpckldq m7, m0, m5 + punpckhdq m0, m5 + paddq m4, m7 + + punpckldq m7, m2, m5 + punpckhdq m2, m5 + paddq m6, m7 + + punpckldq m7, m1, m5 + punpckhdq m1, m5 + paddq m4, m7 + + punpckldq m7, m3, m5 + punpckhdq m3, m5 + paddq m6, m7 + + paddq m4, m0 + paddq m4, m1 + paddq m6, m2 + paddq m6, m3 + + jnz .loop + + ; accumulate horizontally and store in return value + movhlps m5, m4 + movhlps m7, m6 + paddq m4, m5 + paddq m6, m7 + +%if ARCH_X86_64 + movq rax, m4 + movq [sszq], m6 +%else + mov eax, sszm + pshufd m5, m4, 0x1 + movq [eax], m6 + movd eax, m4 + movd edx, m5 +%endif + RET diff --git a/libvpx/vp9/vp9_cx_iface.c b/libvpx/vp9/vp9_cx_iface.c index f155b9ae..6ccba0f8 100644 --- a/libvpx/vp9/vp9_cx_iface.c +++ b/libvpx/vp9/vp9_cx_iface.c @@ -45,6 +45,9 @@ struct vp9_extracfg { vpx_bit_depth_t bit_depth; vp9e_tune_content content; vpx_color_space_t color_space; + vpx_color_range_t color_range; + int render_width; + int render_height; }; static struct vp9_extracfg default_extra_cfg = { @@ -71,6 +74,9 @@ static struct vp9_extracfg default_extra_cfg = { VPX_BITS_8, // Bit depth VP9E_CONTENT_DEFAULT, // content VPX_CS_UNKNOWN, // color space + 0, // color range + 0, // render width + 0, // render height }; struct vpx_codec_alg_priv { @@ -321,6 +327,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, ERROR("Codec bit-depth 8 not supported in profile > 1"); } RANGE_CHECK(extra_cfg, color_space, VPX_CS_UNKNOWN, VPX_CS_SRGB); + RANGE_CHECK(extra_cfg, color_range, + VPX_CR_STUDIO_RANGE, VPX_CR_FULL_RANGE); return VPX_CODEC_OK; } @@ -465,6 +473,9 @@ static vpx_codec_err_t set_encoder_config( #endif oxcf->color_space = extra_cfg->color_space; + oxcf->color_range = extra_cfg->color_range; + oxcf->render_width = extra_cfg->render_width; + oxcf->render_height = extra_cfg->render_height; oxcf->arnr_max_frames = extra_cfg->arnr_max_frames; oxcf->arnr_strength = extra_cfg->arnr_strength; oxcf->min_gf_interval = extra_cfg->min_gf_interval; @@ -1256,30 +1267,6 @@ static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) { } } -static vpx_codec_err_t ctrl_update_entropy(vpx_codec_alg_priv_t *ctx, - va_list args) { - const int update = va_arg(args, int); - - vp9_update_entropy(ctx->cpi, update); - return VPX_CODEC_OK; -} - -static vpx_codec_err_t ctrl_update_reference(vpx_codec_alg_priv_t *ctx, - va_list args) { - const int ref_frame_flags = va_arg(args, int); - - vp9_update_reference(ctx->cpi, ref_frame_flags); - return VPX_CODEC_OK; -} - -static vpx_codec_err_t ctrl_use_reference(vpx_codec_alg_priv_t *ctx, - va_list args) { - const int reference_flag = va_arg(args, int); - - vp9_use_as_reference(ctx->cpi, reference_flag); - return VPX_CODEC_OK; -} - static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx, va_list args) { (void)ctx; @@ -1362,17 +1349,21 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; SVC *const svc = &cpi->svc; - svc->spatial_layer_id = data->spatial_layer_id; + svc->first_spatial_layer_to_encode = data->spatial_layer_id; + svc->spatial_layer_to_encode = data->spatial_layer_id; svc->temporal_layer_id = data->temporal_layer_id; // Checks on valid layer_id input. if (svc->temporal_layer_id < 0 || svc->temporal_layer_id >= (int)ctx->cfg.ts_number_layers) { return VPX_CODEC_INVALID_PARAM; } - if (svc->spatial_layer_id < 0 || - svc->spatial_layer_id >= (int)ctx->cfg.ss_number_layers) { + if (svc->first_spatial_layer_to_encode < 0 || + svc->first_spatial_layer_to_encode >= (int)ctx->cfg.ss_number_layers) { return VPX_CODEC_INVALID_PARAM; } + // First spatial layer to encode not implemented for two-pass. + if (is_two_pass_svc(cpi) && svc->first_spatial_layer_to_encode > 0) + return VPX_CODEC_INVALID_PARAM; return VPX_CODEC_OK; } @@ -1412,6 +1403,20 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } +static vpx_codec_err_t ctrl_set_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx, + va_list args) { + VP9_COMP *const cpi = ctx->cpi; + vpx_svc_ref_frame_config_t *data = va_arg(args, vpx_svc_ref_frame_config_t *); + int sl; + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { + cpi->svc.ext_frame_flags[sl] = data->frame_flags[sl]; + cpi->svc.ext_lst_fb_idx[sl] = data->lst_fb_idx[sl]; + cpi->svc.ext_gld_fb_idx[sl] = data->gld_fb_idx[sl]; + cpi->svc.ext_alt_fb_idx[sl] = data->alt_fb_idx[sl]; + } + return VPX_CODEC_OK; +} + static vpx_codec_err_t ctrl_register_cx_callback(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_codec_priv_output_cx_pkt_cb_pair_t *cbp = @@ -1436,11 +1441,24 @@ static vpx_codec_err_t ctrl_set_color_space(vpx_codec_alg_priv_t *ctx, return update_extra_cfg(ctx, &extra_cfg); } +static vpx_codec_err_t ctrl_set_color_range(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.color_range = CAST(VP9E_SET_COLOR_RANGE, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + int *const render_size = va_arg(args, int *); + extra_cfg.render_width = render_size[0]; + extra_cfg.render_height = render_size[1]; + return update_extra_cfg(ctx, &extra_cfg); +} + static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP8_COPY_REFERENCE, ctrl_copy_reference}, - {VP8E_UPD_ENTROPY, ctrl_update_entropy}, - {VP8E_UPD_REFERENCE, ctrl_update_reference}, - {VP8E_USE_REFERENCE, ctrl_use_reference}, // Setters {VP8_SET_REFERENCE, ctrl_set_reference}, @@ -1472,9 +1490,12 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP9E_SET_SVC_LAYER_ID, ctrl_set_svc_layer_id}, {VP9E_SET_TUNE_CONTENT, ctrl_set_tune_content}, {VP9E_SET_COLOR_SPACE, ctrl_set_color_space}, + {VP9E_SET_COLOR_RANGE, ctrl_set_color_range}, {VP9E_SET_NOISE_SENSITIVITY, ctrl_set_noise_sensitivity}, {VP9E_SET_MIN_GF_INTERVAL, ctrl_set_min_gf_interval}, {VP9E_SET_MAX_GF_INTERVAL, ctrl_set_max_gf_interval}, + {VP9E_SET_SVC_REF_FRAME_CONFIG, ctrl_set_svc_ref_frame_config}, + {VP9E_SET_RENDER_SIZE, ctrl_set_render_size}, // Getters {VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer}, diff --git a/libvpx/vp9/vp9_dx_iface.c b/libvpx/vp9/vp9_dx_iface.c index 96ede3c4..be5d1600 100644 --- a/libvpx/vp9/vp9_dx_iface.c +++ b/libvpx/vp9/vp9_dx_iface.c @@ -18,67 +18,19 @@ #include "vpx/vp8dx.h" #include "vpx/vpx_decoder.h" #include "vpx_dsp/bitreader_buffer.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_util/vpx_thread.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_frame_buffers.h" -#include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_decodeframe.h" +#include "vp9/vp9_dx_iface.h" #include "vp9/vp9_iface_common.h" #define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) -typedef vpx_codec_stream_info_t vp9_stream_info_t; - -// This limit is due to framebuffer numbers. -// TODO(hkuang): Remove this limit after implementing ondemand framebuffers. -#define FRAME_CACHE_SIZE 6 // Cache maximum 6 decoded frames. - -typedef struct cache_frame { - int fb_idx; - vpx_image_t img; -} cache_frame; - -struct vpx_codec_alg_priv { - vpx_codec_priv_t base; - vpx_codec_dec_cfg_t cfg; - vp9_stream_info_t si; - int postproc_cfg_set; - vp8_postproc_cfg_t postproc_cfg; - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; - vpx_image_t img; - int img_avail; - int flushed; - int invert_tile_order; - int last_show_frame; // Index of last output frame. - int byte_alignment; - int skip_loop_filter; - - // Frame parallel related. - int frame_parallel_decode; // frame-based threading. - VPxWorker *frame_workers; - int num_frame_workers; - int next_submit_worker_id; - int last_submit_worker_id; - int next_output_worker_id; - int available_threads; - cache_frame frame_cache[FRAME_CACHE_SIZE]; - int frame_cache_write; - int frame_cache_read; - int num_cache_frames; - int need_resync; // wait for key/intra-only frame - // BufferPool that holds all reference frames. Shared by all the FrameWorkers. - BufferPool *buffer_pool; - - // External frame buffer info to save for VP9 common. - void *ext_priv; // Private data associated with the external frame buffers. - vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb; - vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb; -}; - static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, vpx_codec_priv_enc_mr_cfg_t *data) { // This function only allocates space for the vpx_codec_alg_priv_t @@ -87,7 +39,8 @@ static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, (void)data; if (!ctx->priv) { - vpx_codec_alg_priv_t *const priv = vpx_calloc(1, sizeof(*priv)); + vpx_codec_alg_priv_t *const priv = + (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv)); if (priv == NULL) return VPX_CODEC_MEM_ERROR; @@ -183,7 +136,7 @@ static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, si->w = si->h = 0; if (decrypt_cb) { - data_sz = MIN(sizeof(clear_buffer), data_sz); + data_sz = VPXMIN(sizeof(clear_buffer), data_sz); decrypt_cb(decrypt_state, data, clear_buffer, data_sz); data = clear_buffer; } @@ -977,9 +930,9 @@ static vpx_codec_err_t ctrl_get_frame_size(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INVALID_PARAM; } -static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, - va_list args) { - int *const display_size = va_arg(args, int *); +static vpx_codec_err_t ctrl_get_render_size(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const render_size = va_arg(args, int *); // Only support this function in serial decode. if (ctx->frame_parallel_decode) { @@ -987,14 +940,14 @@ static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INCAPABLE; } - if (display_size) { + if (render_size) { if (ctx->frame_workers) { VPxWorker *const worker = ctx->frame_workers; FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; const VP9_COMMON *const cm = &frame_worker_data->pbi->common; - display_size[0] = cm->display_width; - display_size[1] = cm->display_height; + render_size[0] = cm->render_width; + render_size[1] = cm->render_height; return VPX_CODEC_OK; } else { return VPX_CODEC_ERROR; @@ -1093,7 +1046,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { {VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates}, {VP8D_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted}, {VP9_GET_REFERENCE, ctrl_get_reference}, - {VP9D_GET_DISPLAY_SIZE, ctrl_get_display_size}, + {VP9D_GET_DISPLAY_SIZE, ctrl_get_render_size}, {VP9D_GET_BIT_DEPTH, ctrl_get_bit_depth}, {VP9D_GET_FRAME_SIZE, ctrl_get_frame_size}, diff --git a/libvpx/vp9/vp9_dx_iface.h b/libvpx/vp9/vp9_dx_iface.h new file mode 100644 index 00000000..e0e948e1 --- /dev/null +++ b/libvpx/vp9/vp9_dx_iface.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_VP9_DX_IFACE_H_ +#define VP9_VP9_DX_IFACE_H_ + +#include "vp9/decoder/vp9_decoder.h" + +typedef vpx_codec_stream_info_t vp9_stream_info_t; + +// This limit is due to framebuffer numbers. +// TODO(hkuang): Remove this limit after implementing ondemand framebuffers. +#define FRAME_CACHE_SIZE 6 // Cache maximum 6 decoded frames. + +typedef struct cache_frame { + int fb_idx; + vpx_image_t img; +} cache_frame; + +struct vpx_codec_alg_priv { + vpx_codec_priv_t base; + vpx_codec_dec_cfg_t cfg; + vp9_stream_info_t si; + int postproc_cfg_set; + vp8_postproc_cfg_t postproc_cfg; + vpx_decrypt_cb decrypt_cb; + void *decrypt_state; + vpx_image_t img; + int img_avail; + int flushed; + int invert_tile_order; + int last_show_frame; // Index of last output frame. + int byte_alignment; + int skip_loop_filter; + + // Frame parallel related. + int frame_parallel_decode; // frame-based threading. + VPxWorker *frame_workers; + int num_frame_workers; + int next_submit_worker_id; + int last_submit_worker_id; + int next_output_worker_id; + int available_threads; + cache_frame frame_cache[FRAME_CACHE_SIZE]; + int frame_cache_write; + int frame_cache_read; + int num_cache_frames; + int need_resync; // wait for key/intra-only frame + // BufferPool that holds all reference frames. Shared by all the FrameWorkers. + BufferPool *buffer_pool; + + // External frame buffer info to save for VP9 common. + void *ext_priv; // Private data associated with the external frame buffers. + vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb; + vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb; +}; + +#endif // VP9_VP9_DX_IFACE_H_ diff --git a/libvpx/vp9/vp9_iface_common.h b/libvpx/vp9/vp9_iface_common.h index 58bb7d5d..938d4224 100644 --- a/libvpx/vp9/vp9_iface_common.h +++ b/libvpx/vp9/vp9_iface_common.h @@ -37,11 +37,14 @@ static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, } } img->cs = yv12->color_space; + img->range = yv12->color_range; img->bit_depth = 8; img->w = yv12->y_stride; img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3); img->d_w = yv12->y_crop_width; img->d_h = yv12->y_crop_height; + img->r_w = yv12->render_width; + img->r_h = yv12->render_height; img->x_chroma_shift = yv12->subsampling_x; img->y_chroma_shift = yv12->subsampling_y; img->planes[VPX_PLANE_Y] = yv12->y_buffer; @@ -56,7 +59,7 @@ static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) { // vpx_image_t uses byte strides and a pointer to the first byte // of the image. - img->fmt |= VPX_IMG_FMT_HIGHBITDEPTH; + img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH); img->bit_depth = yv12->bit_depth; img->planes[VPX_PLANE_Y] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->y_buffer); img->planes[VPX_PLANE_U] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->u_buffer); @@ -83,6 +86,8 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, yv12->y_crop_width = img->d_w; yv12->y_crop_height = img->d_h; + yv12->render_width = img->r_w; + yv12->render_height = img->r_h; yv12->y_width = img->d_w; yv12->y_height = img->d_h; @@ -96,6 +101,7 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, yv12->y_stride = img->stride[VPX_PLANE_Y]; yv12->uv_stride = img->stride[VPX_PLANE_U]; yv12->color_space = img->cs; + yv12->color_range = img->range; #if CONFIG_VP9_HIGHBITDEPTH if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { diff --git a/libvpx/vp9/vp9cx.mk b/libvpx/vp9/vp9cx.mk index 84b12d78..25a176f8 100644 --- a/libvpx/vp9/vp9cx.mk +++ b/libvpx/vp9/vp9cx.mk @@ -100,8 +100,13 @@ endif ifeq ($(CONFIG_USE_X86INC),yes) VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm +ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_error_sse2.asm +VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_highbd_error_avx.asm +else VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm endif +endif ifeq ($(ARCH_X86_64),yes) ifeq ($(CONFIG_USE_X86INC),yes) diff --git a/libvpx/vp9/vp9dx.mk b/libvpx/vp9/vp9dx.mk index 0e9cf161..4c6fd007 100644 --- a/libvpx/vp9/vp9dx.mk +++ b/libvpx/vp9/vp9dx.mk @@ -16,6 +16,7 @@ VP9_DX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes) VP9_DX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) VP9_DX_SRCS-yes += vp9_dx_iface.c +VP9_DX_SRCS-yes += vp9_dx_iface.h VP9_DX_SRCS-yes += decoder/vp9_decodemv.c VP9_DX_SRCS-yes += decoder/vp9_decodeframe.c diff --git a/libvpx/vpx/src/svc_encodeframe.c b/libvpx/vpx/src/svc_encodeframe.c index 9844ace5..ff600830 100644 --- a/libvpx/vpx/src/svc_encodeframe.c +++ b/libvpx/vpx/src/svc_encodeframe.c @@ -339,7 +339,8 @@ void assign_layer_bitrates(const SvcContext *svc_ctx, (spatial_layer_target >> 1) + (spatial_layer_target >> 2); enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] = spatial_layer_target; - } else if (svc_ctx->temporal_layering_mode == 2) { + } else if (svc_ctx->temporal_layering_mode == 2 || + svc_ctx->temporal_layering_mode == 1) { enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] = spatial_layer_target * 2 / 3; enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] = @@ -417,7 +418,8 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, // si->svc_params.temporal_layering_mode = svc_ctx->temporal_layering_mode; if (svc_ctx->temporal_layering_mode == 3) { svc_ctx->temporal_layers = 3; - } else if (svc_ctx->temporal_layering_mode == 2) { + } else if (svc_ctx->temporal_layering_mode == 2 || + svc_ctx->temporal_layering_mode == 1) { svc_ctx->temporal_layers = 2; } @@ -477,10 +479,10 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, if (enc_cfg->rc_end_usage == VPX_CBR) { enc_cfg->rc_resize_allowed = 0; enc_cfg->rc_min_quantizer = 2; - enc_cfg->rc_max_quantizer = 63; + enc_cfg->rc_max_quantizer = 56; enc_cfg->rc_undershoot_pct = 50; enc_cfg->rc_overshoot_pct = 50; - enc_cfg->rc_buf_initial_sz = 20; + enc_cfg->rc_buf_initial_sz = 500; enc_cfg->rc_buf_optimal_sz = 600; enc_cfg->rc_buf_sz = 1000; } @@ -494,10 +496,10 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, svc_log(svc_ctx, SVC_LOG_ERROR, "svc_enc_init error\n"); return res; } - - vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1); - vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &si->svc_params); - + if (svc_ctx->spatial_layers > 1 || svc_ctx->temporal_layers > 1) { + vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1); + vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &si->svc_params); + } return VPX_CODEC_OK; } diff --git a/libvpx/vpx/svc_context.h b/libvpx/vpx/svc_context.h index a09651cc..5bc25189 100644 --- a/libvpx/vpx/svc_context.h +++ b/libvpx/vpx/svc_context.h @@ -40,6 +40,7 @@ typedef struct { int output_rc_stat; // for outputting rc stats int speed; // speed setting for codec int threads; + int aqmode; // turns on aq-mode=3 (cyclic_refresh): 0=off, 1=on. // private storage for vpx_svc_encode void *internal; } SvcContext; diff --git a/libvpx/vpx/vp8.h b/libvpx/vpx/vp8.h index 2a31af6d..8a035f97 100644 --- a/libvpx/vpx/vp8.h +++ b/libvpx/vpx/vp8.h @@ -116,19 +116,29 @@ typedef struct vp9_ref_frame { vpx_image_t img; /**< img structure to populate (output) */ } vp9_ref_frame_t; +/*!\cond */ /*!\brief vp8 decoder control function parameter type * * defines the data type for each of VP8 decoder control function requires */ VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE, vpx_ref_frame_t *) +#define VPX_CTRL_VP8_SET_REFERENCE VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *) +#define VPX_CTRL_VP8_COPY_REFERENCE VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *) +#define VPX_CTRL_VP8_SET_POSTPROC VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_REF_FRAME, int) +#define VPX_CTRL_VP8_SET_DBG_COLOR_REF_FRAME VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES, int) +#define VPX_CTRL_VP8_SET_DBG_COLOR_MB_MODES VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES, int) +#define VPX_CTRL_VP8_SET_DBG_COLOR_B_MODES VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int) +#define VPX_CTRL_VP8_SET_DBG_DISPLAY_MV VPX_CTRL_USE_TYPE(VP9_GET_REFERENCE, vp9_ref_frame_t *) +#define VPX_CTRL_VP9_GET_REFERENCE +/*!\endcond */ /*! @} - end defgroup vp8 */ #ifdef __cplusplus diff --git a/libvpx/vpx/vp8cx.h b/libvpx/vpx/vp8cx.h index 31120df2..bd99c6dc 100644 --- a/libvpx/vpx/vp8cx.h +++ b/libvpx/vpx/vp8cx.h @@ -141,29 +141,11 @@ extern vpx_codec_iface_t *vpx_codec_vp10_cx(void); * \sa #vpx_codec_control */ enum vp8e_enc_control_id { - /*!\brief Codec control function to set mode of entropy update in encoder. - * - * Supported in codecs: VP8, VP9 - */ - VP8E_UPD_ENTROPY = 5, - - /*!\brief Codec control function to set reference update mode in encoder. - * - * Supported in codecs: VP8, VP9 - */ - VP8E_UPD_REFERENCE, - - /*!\brief Codec control function to set which reference frame encoder can use. - * - * Supported in codecs: VP8, VP9 - */ - VP8E_USE_REFERENCE, - /*!\brief Codec control function to pass an ROI map to encoder. * * Supported in codecs: VP8, VP9 */ - VP8E_SET_ROI_MAP, + VP8E_SET_ROI_MAP = 8, /*!\brief Codec control function to pass an Active map to encoder. * @@ -547,6 +529,31 @@ enum vp8e_enc_control_id { * Supported in codecs: VP9 */ VP9E_GET_ACTIVEMAP, + + /*!\brief Codec control function to set color range bit. + * \note Valid ranges: 0..1, default is 0 + * 0 = Limited range (16..235 or HBD equivalent) + * 1 = Full range (0..255 or HBD equivalent) + * + * Supported in codecs: VP9 + */ + VP9E_SET_COLOR_RANGE, + + /*!\brief Codec control function to set the frame flags and buffer indices + * for spatial layers. The frame flags and buffer indices are set using the + * struct #vpx_svc_ref_frame_config defined below. + * + * Supported in codecs: VP9 + */ + VP9E_SET_SVC_REF_FRAME_CONFIG, + + /*!\brief Codec control function to set intended rendering image size. + * + * By default, this is identical to the image size in pixels. + * + * Supported in codecs: VP9 + */ + VP9E_SET_RENDER_SIZE, }; /*!\brief vpx 1-D scaling mode @@ -673,6 +680,22 @@ typedef struct vpx_svc_layer_id { int temporal_layer_id; /**< Temporal layer id number. */ } vpx_svc_layer_id_t; +/*!\brief vp9 svc frame flag parameters. + * + * This defines the frame flags and buffer indices for each spatial layer for + * svc encoding. + * This is used with the #VP9E_SET_SVC_REF_FRAME_CONFIG control to set frame + * flags and buffer indices for each spatial layer for the current (super)frame. + * + */ +typedef struct vpx_svc_ref_frame_config { + int frame_flags[VPX_TS_MAX_LAYERS]; /**< Frame flags. */ + int lst_fb_idx[VPX_TS_MAX_LAYERS]; /**< Last buffer index. */ + int gld_fb_idx[VPX_TS_MAX_LAYERS]; /**< Golden buffer index. */ + int alt_fb_idx[VPX_TS_MAX_LAYERS]; /**< Altref buffer index. */ +} vpx_svc_ref_frame_config_t; + +/*!\cond */ /*!\brief VP8 encoder control function parameter type * * Defines the data types that VP8E control functions take. Note that @@ -680,83 +703,113 @@ typedef struct vpx_svc_layer_id { * */ - -/* These controls have been deprecated in favor of the flags parameter to - * vpx_codec_encode(). See the definition of VP8_EFLAG_* above. - */ -VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_UPD_ENTROPY, int) -VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_UPD_REFERENCE, int) -VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_USE_REFERENCE, int) - VPX_CTRL_USE_TYPE(VP8E_SET_FRAME_FLAGS, int) +#define VPX_CTRL_VP8E_SET_FRAME_FLAGS VPX_CTRL_USE_TYPE(VP8E_SET_TEMPORAL_LAYER_ID, int) +#define VPX_CTRL_VP8E_SET_TEMPORAL_LAYER_ID VPX_CTRL_USE_TYPE(VP8E_SET_ROI_MAP, vpx_roi_map_t *) +#define VPX_CTRL_VP8E_SET_ROI_MAP VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP, vpx_active_map_t *) +#define VPX_CTRL_VP8E_SET_ACTIVEMAP VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE, vpx_scaling_mode_t *) +#define VPX_CTRL_VP8E_SET_SCALEMODE VPX_CTRL_USE_TYPE(VP9E_SET_SVC, int) +#define VPX_CTRL_VP9E_SET_SVC VPX_CTRL_USE_TYPE(VP9E_SET_SVC_PARAMETERS, void *) +#define VPX_CTRL_VP9E_SET_SVC_PARAMETERS VPX_CTRL_USE_TYPE(VP9E_REGISTER_CX_CALLBACK, void *) +#define VPX_CTRL_VP9E_REGISTER_CX_CALLBACK VPX_CTRL_USE_TYPE(VP9E_SET_SVC_LAYER_ID, vpx_svc_layer_id_t *) +#define VPX_CTRL_VP9E_SET_SVC_LAYER_ID VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED, int) +#define VPX_CTRL_VP8E_SET_CPUUSED VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF, unsigned int) +#define VPX_CTRL_VP8E_SET_ENABLEAUTOALTREF VPX_CTRL_USE_TYPE(VP8E_SET_NOISE_SENSITIVITY, unsigned int) +#define VPX_CTRL_VP8E_SET_NOISE_SENSITIVITY VPX_CTRL_USE_TYPE(VP8E_SET_SHARPNESS, unsigned int) +#define VPX_CTRL_VP8E_SET_SHARPNESS VPX_CTRL_USE_TYPE(VP8E_SET_STATIC_THRESHOLD, unsigned int) +#define VPX_CTRL_VP8E_SET_STATIC_THRESHOLD VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, int) /* vp8e_token_partitions */ +#define VPX_CTRL_VP8E_SET_TOKEN_PARTITIONS VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int) +#define VPX_CTRL_VP8E_SET_ARNR_MAXFRAMES VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH, unsigned int) +#define VPX_CTRL_VP8E_SET_ARNR_STRENGTH VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_SET_ARNR_TYPE, unsigned int) +#define VPX_CTRL_VP8E_SET_ARNR_TYPE VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, int) /* vp8e_tuning */ +#define VPX_CTRL_VP8E_SET_TUNING VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL, unsigned int) +#define VPX_CTRL_VP8E_SET_CQ_LEVEL VPX_CTRL_USE_TYPE(VP9E_SET_TILE_COLUMNS, int) +#define VPX_CTRL_VP9E_SET_TILE_COLUMNS VPX_CTRL_USE_TYPE(VP9E_SET_TILE_ROWS, int) +#define VPX_CTRL_VP9E_SET_TILE_ROWS VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *) +#define VPX_CTRL_VP8E_GET_LAST_QUANTIZER VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *) +#define VPX_CTRL_VP8E_GET_LAST_QUANTIZER_64 VPX_CTRL_USE_TYPE(VP9E_GET_SVC_LAYER_ID, vpx_svc_layer_id_t *) +#define VPX_CTRL_VP9E_GET_SVC_LAYER_ID VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int) +#define VPX_CTRL_VP8E_SET_MAX_INTRA_BITRATE_PCT VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTER_BITRATE_PCT, unsigned int) +#define VPX_CTRL_VP8E_SET_MAX_INTER_BITRATE_PCT VPX_CTRL_USE_TYPE(VP8E_SET_SCREEN_CONTENT_MODE, unsigned int) +#define VPX_CTRL_VP8E_SET_SCREEN_CONTENT_MODE VPX_CTRL_USE_TYPE(VP9E_SET_GF_CBR_BOOST_PCT, unsigned int) +#define VPX_CTRL_VP9E_SET_GF_CBR_BOOST_PCT VPX_CTRL_USE_TYPE(VP9E_SET_LOSSLESS, unsigned int) +#define VPX_CTRL_VP9E_SET_LOSSLESS VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PARALLEL_DECODING, unsigned int) +#define VPX_CTRL_VP9E_SET_FRAME_PARALLEL_DECODING VPX_CTRL_USE_TYPE(VP9E_SET_AQ_MODE, unsigned int) +#define VPX_CTRL_VP9E_SET_AQ_MODE VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PERIODIC_BOOST, unsigned int) +#define VPX_CTRL_VP9E_SET_FRAME_PERIODIC_BOOST VPX_CTRL_USE_TYPE(VP9E_SET_NOISE_SENSITIVITY, unsigned int) +#define VPX_CTRL_VP9E_SET_NOISE_SENSITIVITY VPX_CTRL_USE_TYPE(VP9E_SET_TUNE_CONTENT, int) /* vp9e_tune_content */ +#define VPX_CTRL_VP9E_SET_TUNE_CONTENT VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_SPACE, int) +#define VPX_CTRL_VP9E_SET_COLOR_SPACE VPX_CTRL_USE_TYPE(VP9E_SET_MIN_GF_INTERVAL, unsigned int) - -/*!\brief - * - * TODO(debargha) : add support of the control in ffmpeg - */ #define VPX_CTRL_VP9E_SET_MIN_GF_INTERVAL - VPX_CTRL_USE_TYPE(VP9E_SET_MAX_GF_INTERVAL, unsigned int) -/*!\brief - * - * TODO(debargha) : add support of the control in ffmpeg - */ #define VPX_CTRL_VP9E_SET_MAX_GF_INTERVAL VPX_CTRL_USE_TYPE(VP9E_GET_ACTIVEMAP, vpx_active_map_t *) +#define VPX_CTRL_VP9E_GET_ACTIVEMAP + +VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_RANGE, int) +#define VPX_CTRL_VP9E_SET_COLOR_RANGE + +VPX_CTRL_USE_TYPE(VP9E_SET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *) +#define VPX_CTRL_VP9E_SET_SVC_REF_FRAME_CONFIG + +VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *) +#define VPX_CTRL_VP9E_SET_RENDER_SIZE + +/*!\endcond */ /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vpx/vp8dx.h b/libvpx/vpx/vp8dx.h index 27b9f780..1f02fd59 100644 --- a/libvpx/vpx/vp8dx.h +++ b/libvpx/vpx/vp8dx.h @@ -147,6 +147,7 @@ typedef struct vpx_decrypt_init { typedef vpx_decrypt_init vp8_decrypt_init; +/*!\cond */ /*!\brief VP8 decoder control function parameter type * * Defines the data types that VP8D control functions take. Note that @@ -156,15 +157,25 @@ typedef vpx_decrypt_init vp8_decrypt_init; VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES, int *) +#define VPX_CTRL_VP8D_GET_LAST_REF_UPDATES VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED, int *) +#define VPX_CTRL_VP8D_GET_FRAME_CORRUPTED VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_USED, int *) +#define VPX_CTRL_VP8D_GET_LAST_REF_USED VPX_CTRL_USE_TYPE(VPXD_SET_DECRYPTOR, vpx_decrypt_init *) +#define VPX_CTRL_VPXD_SET_DECRYPTOR VPX_CTRL_USE_TYPE(VP8D_SET_DECRYPTOR, vpx_decrypt_init *) +#define VPX_CTRL_VP8D_SET_DECRYPTOR VPX_CTRL_USE_TYPE(VP9D_GET_DISPLAY_SIZE, int *) +#define VPX_CTRL_VP9D_GET_DISPLAY_SIZE VPX_CTRL_USE_TYPE(VP9D_GET_BIT_DEPTH, unsigned int *) +#define VPX_CTRL_VP9D_GET_BIT_DEPTH VPX_CTRL_USE_TYPE(VP9D_GET_FRAME_SIZE, int *) +#define VPX_CTRL_VP9D_GET_FRAME_SIZE VPX_CTRL_USE_TYPE(VP9_INVERT_TILE_DECODE_ORDER, int) +#define VPX_CTRL_VP9_INVERT_TILE_DECODE_ORDER +/*!\endcond */ /*! @} - end defgroup vp8_decoder */ #ifdef __cplusplus diff --git a/libvpx/vpx/vpx_encoder.h b/libvpx/vpx/vpx_encoder.h index 2b17f98a..955e8735 100644 --- a/libvpx/vpx/vpx_encoder.h +++ b/libvpx/vpx/vpx_encoder.h @@ -150,7 +150,7 @@ extern "C" { partitions can be decoded even though earlier partitions have been lost. Note that intra - predicition is still done over + prediction is still done over the partition boundary. */ /*!\brief Encoder output packet variants diff --git a/libvpx/vpx/vpx_image.h b/libvpx/vpx/vpx_image.h index c06d3510..e9e952c4 100644 --- a/libvpx/vpx/vpx_image.h +++ b/libvpx/vpx/vpx_image.h @@ -78,10 +78,17 @@ extern "C" { VPX_CS_SRGB = 7 /**< sRGB */ } vpx_color_space_t; /**< alias for enum vpx_color_space */ + /*!\brief List of supported color range */ + typedef enum vpx_color_range { + VPX_CR_STUDIO_RANGE = 0, /**< Y [16..235], UV [16..240] */ + VPX_CR_FULL_RANGE = 1 /**< YUV/RGB [0..255] */ + } vpx_color_range_t; /**< alias for enum vpx_color_range */ + /**\brief Image Descriptor */ typedef struct vpx_image { vpx_img_fmt_t fmt; /**< Image Format */ vpx_color_space_t cs; /**< Color Space */ + vpx_color_range_t range; /**< Color Range */ /* Image storage dimensions */ unsigned int w; /**< Stored image width */ @@ -92,6 +99,10 @@ extern "C" { unsigned int d_w; /**< Displayed image width */ unsigned int d_h; /**< Displayed image height */ + /* Image intended rendering dimensions */ + unsigned int r_w; /**< Intended rendering image width */ + unsigned int r_h; /**< Intended rendering image height */ + /* Chroma subsampling info */ unsigned int x_chroma_shift; /**< subsampling order, X */ unsigned int y_chroma_shift; /**< subsampling order, Y */ diff --git a/libvpx/vpx_dsp/bitreader.c b/libvpx/vpx_dsp/bitreader.c index 4420fade..6ad806ac 100644 --- a/libvpx/vpx_dsp/bitreader.c +++ b/libvpx/vpx_dsp/bitreader.c @@ -13,6 +13,7 @@ #include "vpx_dsp/bitreader.h" #include "vpx_dsp/prob.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #include "vpx_mem/vpx_mem.h" #include "vpx_util/endian_inl.h" @@ -48,7 +49,7 @@ void vpx_reader_fill(vpx_reader *r) { int shift = BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); if (r->decrypt_cb) { - size_t n = MIN(sizeof(r->clear_buffer), bytes_left); + size_t n = VPXMIN(sizeof(r->clear_buffer), bytes_left); r->decrypt_cb(r->decrypt_state, buffer, r->clear_buffer, (int)n); buffer = r->clear_buffer; buffer_start = r->clear_buffer; diff --git a/libvpx/vpx_dsp/bitreader_buffer.c b/libvpx/vpx_dsp/bitreader_buffer.c index fb04ee63..bb917263 100644 --- a/libvpx/vpx_dsp/bitreader_buffer.c +++ b/libvpx/vpx_dsp/bitreader_buffer.c @@ -7,6 +7,7 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ +#include "./vpx_config.h" #include "./bitreader_buffer.h" size_t vpx_rb_bytes_read(struct vpx_read_bit_buffer *rb) { @@ -39,3 +40,14 @@ int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb, const int value = vpx_rb_read_literal(rb, bits); return vpx_rb_read_bit(rb) ? -value : value; } + +int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, + int bits) { +#if CONFIG_MISC_FIXES + const int nbits = sizeof(unsigned) * 8 - bits - 1; + const unsigned value = vpx_rb_read_literal(rb, bits + 1) << nbits; + return ((int) value) >> nbits; +#else + return vpx_rb_read_signed_literal(rb, bits); +#endif +} diff --git a/libvpx/vpx_dsp/bitreader_buffer.h b/libvpx/vpx_dsp/bitreader_buffer.h index 03b156ba..8a48a95e 100644 --- a/libvpx/vpx_dsp/bitreader_buffer.h +++ b/libvpx/vpx_dsp/bitreader_buffer.h @@ -38,6 +38,8 @@ int vpx_rb_read_literal(struct vpx_read_bit_buffer *rb, int bits); int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb, int bits); +int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, int bits); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vpx_dsp/bitwriter_buffer.c b/libvpx/vpx_dsp/bitwriter_buffer.c index 0dfb859d..6182a722 100644 --- a/libvpx/vpx_dsp/bitwriter_buffer.c +++ b/libvpx/vpx_dsp/bitwriter_buffer.c @@ -9,7 +9,9 @@ */ #include <limits.h> +#include <stdlib.h> +#include "./vpx_config.h" #include "./bitwriter_buffer.h" size_t vpx_wb_bytes_written(const struct vpx_write_bit_buffer *wb) { @@ -34,3 +36,13 @@ void vpx_wb_write_literal(struct vpx_write_bit_buffer *wb, int data, int bits) { for (bit = bits - 1; bit >= 0; bit--) vpx_wb_write_bit(wb, (data >> bit) & 1); } + +void vpx_wb_write_inv_signed_literal(struct vpx_write_bit_buffer *wb, + int data, int bits) { +#if CONFIG_MISC_FIXES + vpx_wb_write_literal(wb, data, bits + 1); +#else + vpx_wb_write_literal(wb, abs(data), bits); + vpx_wb_write_bit(wb, data < 0); +#endif +} diff --git a/libvpx/vpx_dsp/bitwriter_buffer.h b/libvpx/vpx_dsp/bitwriter_buffer.h index 9397668e..a123a2fe 100644 --- a/libvpx/vpx_dsp/bitwriter_buffer.h +++ b/libvpx/vpx_dsp/bitwriter_buffer.h @@ -28,6 +28,8 @@ void vpx_wb_write_bit(struct vpx_write_bit_buffer *wb, int bit); void vpx_wb_write_literal(struct vpx_write_bit_buffer *wb, int data, int bits); +void vpx_wb_write_inv_signed_literal(struct vpx_write_bit_buffer *wb, int data, + int bits); #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vpx_dsp/intrapred.c b/libvpx/vpx_dsp/intrapred.c index 9ba0f644..a9669e51 100644 --- a/libvpx/vpx_dsp/intrapred.c +++ b/libvpx/vpx_dsp/intrapred.c @@ -44,6 +44,21 @@ static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs, dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; } +static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + (void) above; + + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1], + left[(c >> 1) + r + 2]) + : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]); + } + dst += stride; + } +} + static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; @@ -61,6 +76,20 @@ static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, } } +static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + (void) left; + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], + above[(r >> 1) + c + 2]) + : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); + } + dst += stride; + } +} + static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { const uint8_t above_right = above[bs - 1]; @@ -80,6 +109,19 @@ static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, } } +static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + (void) left; + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = AVG3(above[r + c], above[r + c + 1], + above[r + c + 1 + (r + c + 2 < bs * 2)]); + } + dst += stride; + } +} + static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; @@ -247,6 +289,38 @@ static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bs, } } +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int H = above[-1]; + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int L = left[3]; + + memset(dst + stride * 0, AVG3(H, I, J), 4); + memset(dst + stride * 1, AVG3(I, J, K), 4); + memset(dst + stride * 2, AVG3(J, K, L), 4); + memset(dst + stride * 3, AVG3(K, L, L), 4); +} + +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int H = above[-1]; + const int I = above[0]; + const int J = above[1]; + const int K = above[2]; + const int L = above[3]; + const int M = above[4]; + + dst[0] = AVG3(H, I, J); + dst[1] = AVG3(I, J, K); + dst[2] = AVG3(J, K, L); + dst[3] = AVG3(K, L, M); + memcpy(dst + stride * 1, dst, 4); + memcpy(dst + stride * 2, dst, 4); + memcpy(dst + stride * 3, dst, 4); +} + void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int I = left[0]; @@ -287,6 +361,30 @@ void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, DST(3, 3) = AVG3(E, F, G); // differs from vp8 } +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + const int E = above[4]; + const int F = above[5]; + const int G = above[6]; + const int H = above[7]; + (void)left; + DST(0, 0) = AVG2(A, B); + DST(1, 0) = DST(0, 2) = AVG2(B, C); + DST(2, 0) = DST(1, 2) = AVG2(C, D); + DST(3, 0) = DST(2, 2) = AVG2(D, E); + DST(3, 2) = AVG3(E, F, G); + + DST(0, 1) = AVG3(A, B, C); + DST(1, 1) = DST(0, 3) = AVG3(B, C, D); + DST(2, 1) = DST(1, 3) = AVG3(C, D, E); + DST(3, 1) = DST(2, 3) = AVG3(D, E, F); + DST(3, 3) = AVG3(F, G, H); +} + void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int A = above[0]; @@ -308,6 +406,27 @@ void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, DST(3, 3) = H; // differs from vp8 } +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + const int E = above[4]; + const int F = above[5]; + const int G = above[6]; + const int H = above[7]; + (void)stride; + (void)left; + DST(0, 0) = AVG3(A, B, C); + DST(1, 0) = DST(0, 1) = AVG3(B, C, D); + DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); + DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); + DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); + DST(3, 2) = DST(2, 3) = AVG3(F, G, H); + DST(3, 3) = AVG3(G, H, H); +} + void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int I = left[0]; @@ -409,6 +528,23 @@ static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride, } } +static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) above; + (void) bd; + + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1], + left[(c >> 1) + r + 2]) + : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]); + } + dst += stride; + } +} + static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { @@ -425,6 +561,8 @@ static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride, } } +#define highbd_d63e_predictor highbd_d63_predictor + static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { @@ -441,6 +579,21 @@ static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs, } } +static INLINE void highbd_d45e_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) left; + (void) bd; + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = AVG3(above[r + c], above[r + c + 1], + above[r + c + 1 + (r + c + 2 < bs * 2)]); + } + dst += stride; + } +} + static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { @@ -679,6 +832,11 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, intra_pred_no_4x4(d207) intra_pred_no_4x4(d63) intra_pred_no_4x4(d45) +#if CONFIG_MISC_FIXES +intra_pred_allsizes(d207e) +intra_pred_allsizes(d63e) +intra_pred_no_4x4(d45e) +#endif intra_pred_no_4x4(d117) intra_pred_no_4x4(d135) intra_pred_no_4x4(d153) diff --git a/libvpx/vpx_dsp/inv_txfm.c b/libvpx/vpx_dsp/inv_txfm.c index 3afa8cdc..5f3cfddb 100644 --- a/libvpx/vpx_dsp/inv_txfm.c +++ b/libvpx/vpx_dsp/inv_txfm.c @@ -170,16 +170,25 @@ void idct8_c(const tran_low_t *input, tran_low_t *output) { step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); - // stage 2 & stage 3 - even half - idct4_c(step1, step1); - - // stage 2 - odd half + // stage 2 + temp1 = (step1[0] + step1[2]) * cospi_16_64; + temp2 = (step1[0] - step1[2]) * cospi_16_64; + step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); + temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64; + temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64; + step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); + step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); step2[4] = WRAPLOW(step1[4] + step1[5], 8); step2[5] = WRAPLOW(step1[4] - step1[5], 8); step2[6] = WRAPLOW(-step1[6] + step1[7], 8); step2[7] = WRAPLOW(step1[6] + step1[7], 8); - // stage 3 -odd half + // stage 3 + step1[0] = WRAPLOW(step2[0] + step2[3], 8); + step1[1] = WRAPLOW(step2[1] + step2[2], 8); + step1[2] = WRAPLOW(step2[1] - step2[2], 8); + step1[3] = WRAPLOW(step2[0] - step2[3], 8); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; diff --git a/libvpx/vpx_dsp/mips/loopfilter_masks_dspr2.h b/libvpx/vpx_dsp/mips/loopfilter_masks_dspr2.h index e82dfb7e..2c964afa 100644 --- a/libvpx/vpx_dsp/mips/loopfilter_masks_dspr2.h +++ b/libvpx/vpx_dsp/mips/loopfilter_masks_dspr2.h @@ -355,7 +355,7 @@ static INLINE void flatmask5(uint32_t p4, uint32_t p3, /* flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3) */ "and %[flat1], %[flat3], %[flat1] \n\t" - : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r), + : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r), [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1), [flat3] "=&r" (flat3) : [p4] "r" (p4), [p3] "r" (p3), [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0), [q0] "r" (q0), [q1] "r" (q1), diff --git a/libvpx/vpx_dsp/prob.h b/libvpx/vpx_dsp/prob.h index 729f90a5..c3cb103f 100644 --- a/libvpx/vpx_dsp/prob.h +++ b/libvpx/vpx_dsp/prob.h @@ -65,7 +65,7 @@ static INLINE vpx_prob merge_probs(vpx_prob pre_prob, unsigned int count_sat, unsigned int max_update_factor) { const vpx_prob prob = get_binary_prob(ct[0], ct[1]); - const unsigned int count = MIN(ct[0] + ct[1], count_sat); + const unsigned int count = VPXMIN(ct[0] + ct[1], count_sat); const unsigned int factor = max_update_factor * count / count_sat; return weighted_prob(pre_prob, prob, factor); } @@ -82,7 +82,7 @@ static INLINE vpx_prob mode_mv_merge_probs(vpx_prob pre_prob, if (den == 0) { return pre_prob; } else { - const unsigned int count = MIN(den, MODE_MV_COUNT_SAT); + const unsigned int count = VPXMIN(den, MODE_MV_COUNT_SAT); const unsigned int factor = count_to_update_factor[count]; const vpx_prob prob = clip_prob(((int64_t)(ct[0]) * 256 + (den >> 1)) / den); diff --git a/libvpx/vpx_dsp/psnrhvs.c b/libvpx/vpx_dsp/psnrhvs.c index 2de77c05..30017057 100644 --- a/libvpx/vpx_dsp/psnrhvs.c +++ b/libvpx/vpx_dsp/psnrhvs.c @@ -191,7 +191,7 @@ static double calc_psnrhvs(const unsigned char *_src, int _systride, for (i = 0; i < 8; i++) { for (j = 0; j < 8; j++) { float err; - err = fabs(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]); + err = fabs((float)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j])); if (i != 0 || j != 0) err = err < s_mask / mask[i][j] ? 0 : err - s_mask / mask[i][j]; ret += (err * _csf[i][j]) * (err * _csf[i][j]); diff --git a/libvpx/vpx_dsp/vpx_dsp.mk b/libvpx/vpx_dsp/vpx_dsp.mk index 1959c4d8..9620eaa0 100644 --- a/libvpx/vpx_dsp/vpx_dsp.mk +++ b/libvpx/vpx_dsp/vpx_dsp.mk @@ -36,13 +36,13 @@ DSP_SRCS-yes += bitreader_buffer.h endif # intra predictions -ifneq ($(filter yes,$(CONFIG_VP9) $(CONFIG_VP10)),) DSP_SRCS-yes += intrapred.c ifeq ($(CONFIG_USE_X86INC),yes) DSP_SRCS-$(HAVE_SSE) += x86/intrapred_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/intrapred_sse2.asm DSP_SRCS-$(HAVE_SSSE3) += x86/intrapred_ssse3.asm +DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_8t_ssse3.asm endif # CONFIG_USE_X86INC ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) @@ -58,7 +58,6 @@ DSP_SRCS-$(HAVE_MSA) += mips/intrapred_msa.c DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred4_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred8_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred16_dspr2.c -endif # CONFIG_VP9 || CONFIG_VP10 DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.h DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.c @@ -249,7 +248,8 @@ DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c endif ifeq ($(ARCH_X86_64),yes) ifeq ($(CONFIG_USE_X86INC),yes) -DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm +DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm +DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm endif endif endif # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER @@ -308,6 +308,8 @@ DSP_SRCS-$(HAVE_MMX) += x86/variance_mmx.c DSP_SRCS-$(HAVE_MMX) += x86/variance_impl_mmx.asm DSP_SRCS-$(HAVE_SSE) += x86/variance_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/variance_sse2.c # Contains SSE2 and SSSE3 +DSP_SRCS-$(HAVE_SSE2) += x86/halfpix_variance_sse2.c +DSP_SRCS-$(HAVE_SSE2) += x86/halfpix_variance_impl_sse2.asm DSP_SRCS-$(HAVE_AVX2) += x86/variance_avx2.c DSP_SRCS-$(HAVE_AVX2) += x86/variance_impl_avx2.c diff --git a/libvpx/vpx_dsp/vpx_dsp_common.h b/libvpx/vpx_dsp/vpx_dsp_common.h index ccb81895..a9e180e7 100644 --- a/libvpx/vpx_dsp/vpx_dsp_common.h +++ b/libvpx/vpx_dsp/vpx_dsp_common.h @@ -13,14 +13,15 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #ifdef __cplusplus extern "C" { #endif -#define MIN(x, y) (((x) < (y)) ? (x) : (y)) -#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define VPXMIN(x, y) (((x) < (y)) ? (x) : (y)) +#define VPXMAX(x, y) (((x) > (y)) ? (x) : (y)) #if CONFIG_VP9_HIGHBITDEPTH // Note: diff --git a/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl b/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl index 1e56d534..b369b054 100644 --- a/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -54,322 +54,401 @@ if ($opts{arch} eq "x86_64") { # Intra prediction # -if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes")) { - add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc"; +add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vpx_d207e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d207e_predictor_4x4/; + +add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc"; + +add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d45e_predictor_4x4/; + +add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63e_predictor_4x4/; + +add_proto qw/void vpx_d63f_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63f_predictor_4x4/; + +add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc"; + +add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_he_predictor_4x4/; + +add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d117_predictor_4x4/; + +add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d135_predictor_4x4 neon/; + +add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse_x86inc"; + +add_proto qw/void vpx_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_ve_predictor_4x4/; - add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc"; +add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc"; - add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc"; +add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc"; - add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc"; +add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse_x86inc"; - add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d117_predictor_4x4/; +add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse_x86inc"; - add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d135_predictor_4x4 neon/; +add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse_x86inc"; - add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc"; +add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc"; - add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse_x86inc"; +add_proto qw/void vpx_d207e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d207e_predictor_8x8/; - add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc"; +add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc"; - add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc"; +add_proto qw/void vpx_d45e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d45e_predictor_8x8/; - add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse_x86inc"; +add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc"; - add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse_x86inc"; +add_proto qw/void vpx_d63e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63e_predictor_8x8/; - add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse_x86inc"; +add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc"; - add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc"; +add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d117_predictor_8x8/; - add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc"; +add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d135_predictor_8x8/; - add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc"; +add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc"; - add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc"; +add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse_x86inc"; - add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d117_predictor_8x8/; +add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc"; - add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d135_predictor_8x8/; +add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc"; - add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc"; +add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse_x86inc"; - add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse_x86inc"; +add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse_x86inc"; - add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse_x86inc"; - add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc"; +add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc"; - add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse_x86inc"; +add_proto qw/void vpx_d207e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d207e_predictor_16x16/; - add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse_x86inc"; +add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc"; - add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse_x86inc"; +add_proto qw/void vpx_d45e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d45e_predictor_16x16/; - add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc"; +add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc"; - add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc"; +add_proto qw/void vpx_d63e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63e_predictor_16x16/; - add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc"; +add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc"; - add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc"; +add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d117_predictor_16x16/; - add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d117_predictor_16x16/; +add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d135_predictor_16x16/; - add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d135_predictor_16x16/; +add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d153_predictor_16x16/, "$ssse3_x86inc"; - add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d153_predictor_16x16/, "$ssse3_x86inc"; +add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_v_predictor_16x16 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_v_predictor_16x16 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_tm_predictor_16x16 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_tm_predictor_16x16 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc"; - add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc"; +add_proto qw/void vpx_d207e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d207e_predictor_32x32/; - add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc"; +add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc"; - add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc"; +add_proto qw/void vpx_d45e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d45e_predictor_32x32/; - add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc"; +add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc"; - add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d117_predictor_32x32/; +add_proto qw/void vpx_d63e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63e_predictor_32x32/; - add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d135_predictor_32x32/; +add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc"; - add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d153_predictor_32x32/, "$ssse3_x86inc"; +add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d117_predictor_32x32/; - add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d135_predictor_32x32/; - add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc"; +add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d153_predictor_32x32/, "$ssse3_x86inc"; - add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc"; +add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc"; +add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc"; - add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc"; + +add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc"; + +add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc"; # High bitdepth functions - if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { - add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d207_predictor_4x4/; +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d207_predictor_4x4/; - add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d45_predictor_4x4/; + add_proto qw/void vpx_highbd_d207e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d207e_predictor_4x4/; - add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d63_predictor_4x4/; + add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d45_predictor_4x4/; - add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_h_predictor_4x4/; + add_proto qw/void vpx_highbd_d45e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d45e_predictor_4x4/; - add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d117_predictor_4x4/; + add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d63_predictor_4x4/; - add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d135_predictor_4x4/; + add_proto qw/void vpx_highbd_d63e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d63e_predictor_4x4/; - add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d153_predictor_4x4/; + add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_h_predictor_4x4/; - add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_4x4/, "$sse_x86inc"; + add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d117_predictor_4x4/; - add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc"; + add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d135_predictor_4x4/; - add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc"; + add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d153_predictor_4x4/; - add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_top_predictor_4x4/; + add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_v_predictor_4x4/, "$sse_x86inc"; - add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_left_predictor_4x4/; + add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc"; - add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_128_predictor_4x4/; + add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc"; - add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d207_predictor_8x8/; + add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_top_predictor_4x4/; - add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d45_predictor_8x8/; + add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_left_predictor_4x4/; - add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d63_predictor_8x8/; + add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_128_predictor_4x4/; - add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_h_predictor_8x8/; + add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d207_predictor_8x8/; - add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d117_predictor_8x8/; + add_proto qw/void vpx_highbd_d207e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d207e_predictor_8x8/; - add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d135_predictor_8x8/; + add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d45_predictor_8x8/; - add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d153_predictor_8x8/; + add_proto qw/void vpx_highbd_d45e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d45e_predictor_8x8/; - add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_8x8/, "$sse2_x86inc"; + add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d63_predictor_8x8/; - add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_8x8/, "$sse2_x86inc"; + add_proto qw/void vpx_highbd_d63e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d63e_predictor_8x8/; - add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_8x8/, "$sse2_x86inc";; + add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_h_predictor_8x8/; - add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_top_predictor_8x8/; + add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d117_predictor_8x8/; - add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_left_predictor_8x8/; + add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d135_predictor_8x8/; - add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_128_predictor_8x8/; + add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d153_predictor_8x8/; - add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d207_predictor_16x16/; + add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_v_predictor_8x8/, "$sse2_x86inc"; - add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d45_predictor_16x16/; + add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_tm_predictor_8x8/, "$sse2_x86inc"; - add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d63_predictor_16x16/; + add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_predictor_8x8/, "$sse2_x86inc";; - add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_h_predictor_16x16/; + add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_top_predictor_8x8/; - add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d117_predictor_16x16/; + add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_left_predictor_8x8/; - add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d135_predictor_16x16/; + add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_128_predictor_8x8/; - add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d153_predictor_16x16/; + add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d207_predictor_16x16/; - add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc"; + add_proto qw/void vpx_highbd_d207e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d207e_predictor_16x16/; - add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc"; + add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d45_predictor_16x16/; - add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc"; + add_proto qw/void vpx_highbd_d45e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d45e_predictor_16x16/; - add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_top_predictor_16x16/; + add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d63_predictor_16x16/; - add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_left_predictor_16x16/; + add_proto qw/void vpx_highbd_d63e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d63e_predictor_16x16/; - add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_128_predictor_16x16/; + add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_h_predictor_16x16/; - add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d207_predictor_32x32/; + add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d117_predictor_16x16/; - add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d45_predictor_32x32/; + add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d135_predictor_16x16/; - add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d63_predictor_32x32/; + add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d153_predictor_16x16/; - add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_h_predictor_32x32/; + add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc"; - add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d117_predictor_32x32/; + add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc"; - add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d135_predictor_32x32/; + add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc"; - add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d153_predictor_32x32/; + add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_top_predictor_16x16/; - add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc"; + add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_left_predictor_16x16/; - add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc"; + add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_128_predictor_16x16/; - add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc"; + add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d207_predictor_32x32/; - add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_top_predictor_32x32/; + add_proto qw/void vpx_highbd_d207e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d207e_predictor_32x32/; - add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_left_predictor_32x32/; + add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d45_predictor_32x32/; - add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_128_predictor_32x32/; - } # CONFIG_VP9_HIGHBITDEPTH -} # CONFIG_VP9 || CONFIG_VP10 + add_proto qw/void vpx_highbd_d45e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d45e_predictor_32x32/; + + add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d63_predictor_32x32/; + + add_proto qw/void vpx_highbd_d63e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d63e_predictor_32x32/; + + add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_h_predictor_32x32/; + + add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d117_predictor_32x32/; + + add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d135_predictor_32x32/; + + add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d153_predictor_32x32/; + + add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc"; + + add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc"; + + add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc"; + + add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_top_predictor_32x32/; + + add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_left_predictor_32x32/; + + add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_128_predictor_32x32/; +} # CONFIG_VP9_HIGHBITDEPTH # # Sub Pixel Filters @@ -421,10 +500,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Sub Pixel Filters # add_proto qw/void vpx_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; - specialize qw/vpx_highbd_convolve_copy/; + specialize qw/vpx_highbd_convolve_copy/, "$sse2_x86inc"; add_proto qw/void vpx_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; - specialize qw/vpx_highbd_convolve_avg/; + specialize qw/vpx_highbd_convolve_avg/, "$sse2_x86inc"; add_proto qw/void vpx_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; specialize qw/vpx_highbd_convolve8/, "$sse2_x86_64"; @@ -616,39 +695,6 @@ if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes")) if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Note as optimized versions of these functions are added we need to add a check to ensure # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only. - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct4x4_1_add/; - - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct4x4_16_add/; - - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct8x8_1_add/; - - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct8x8_64_add/; - - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct8x8_12_add/; - - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct16x16_1_add/; - - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct16x16_256_add/; - - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct16x16_10_add/; - - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_1024_add/; - - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_34_add/; - - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_1_add/; - add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; specialize qw/vpx_iwht4x4_1_add/; @@ -681,6 +727,39 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Force C versions if CONFIG_EMULATE_HARDWARE is 1 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { + add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct4x4_16_add/; + + add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct4x4_1_add/; + + add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_64_add/; + + add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_12_add/; + + add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_1_add/; + + add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_256_add/; + + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_10_add/; + + add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_1_add/; + + add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_1024_add/; + + add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_34_add/; + + add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_1_add/; + add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; specialize qw/vpx_highbd_idct4x4_16_add/; @@ -696,6 +775,39 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; specialize qw/vpx_highbd_idct16x16_10_add/; } else { + add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct4x4_16_add sse2/; + + add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct4x4_1_add sse2/; + + add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_64_add sse2/; + + add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_12_add sse2/; + + add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct8x8_1_add sse2/; + + add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_256_add sse2/; + + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_10_add sse2/; + + add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct16x16_1_add sse2/; + + add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_1024_add sse2/; + + add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_34_add sse2/; + + add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; + specialize qw/vpx_idct32x32_1_add sse2/; + add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; specialize qw/vpx_highbd_idct4x4_16_add sse2/; @@ -801,25 +913,19 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Quantization # if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) { -if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b/; + specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc"; add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b_32x32/; - - add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_highbd_quantize_b sse2/; + specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc"; - add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_highbd_quantize_b_32x32 sse2/; -} else { - add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc"; + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vpx_highbd_quantize_b sse2/; - add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc"; -} # CONFIG_VP9_HIGHBITDEPTH + add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vpx_highbd_quantize_b_32x32 sse2/; + } # CONFIG_VP9_HIGHBITDEPTH } # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER if (vpx_config("CONFIG_ENCODERS") eq "yes") { @@ -1373,13 +1479,13 @@ add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, i # Specialty Subpixel # add_proto qw/uint32_t vpx_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_variance_halfpixvar16x16_h mmx media/; + specialize qw/vpx_variance_halfpixvar16x16_h mmx sse2 media/; add_proto qw/uint32_t vpx_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_variance_halfpixvar16x16_v mmx media/; + specialize qw/vpx_variance_halfpixvar16x16_v mmx sse2 media/; add_proto qw/uint32_t vpx_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_variance_halfpixvar16x16_hv mmx media/; + specialize qw/vpx_variance_halfpixvar16x16_hv mmx sse2 media/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; diff --git a/libvpx/vpx_dsp/x86/halfpix_variance_impl_sse2.asm b/libvpx/vpx_dsp/x86/halfpix_variance_impl_sse2.asm new file mode 100644 index 00000000..cc26bb61 --- /dev/null +++ b/libvpx/vpx_dsp/x86/halfpix_variance_impl_sse2.asm @@ -0,0 +1,346 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "vpx_ports/x86_abi_support.asm" + +;void vpx_half_horiz_vert_variance16x_h_sse2(unsigned char *ref, +; int ref_stride, +; unsigned char *src, +; int src_stride, +; unsigned int height, +; int *sum, +; unsigned int *sumsquared) +global sym(vpx_half_horiz_vert_variance16x_h_sse2) PRIVATE +sym(vpx_half_horiz_vert_variance16x_h_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + pxor xmm6, xmm6 ; error accumulator + pxor xmm7, xmm7 ; sse eaccumulator + mov rsi, arg(0) ;ref + + mov rdi, arg(2) ;src + movsxd rcx, dword ptr arg(4) ;height + movsxd rax, dword ptr arg(1) ;ref_stride + movsxd rdx, dword ptr arg(3) ;src_stride + + pxor xmm0, xmm0 ; + + movdqu xmm5, XMMWORD PTR [rsi] + movdqu xmm3, XMMWORD PTR [rsi+1] + pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1 + + lea rsi, [rsi + rax] + +vpx_half_horiz_vert_variance16x_h_1: + movdqu xmm1, XMMWORD PTR [rsi] ; + movdqu xmm2, XMMWORD PTR [rsi+1] ; + pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1 + + pavgb xmm5, xmm1 ; xmm = vertical average of the above + + movdqa xmm4, xmm5 + punpcklbw xmm5, xmm0 ; xmm5 = words of above + punpckhbw xmm4, xmm0 + + movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7 + punpcklbw xmm3, xmm0 ; xmm3 = words of above + psubw xmm5, xmm3 ; xmm5 -= xmm3 + + movq xmm3, QWORD PTR [rdi+8] + punpcklbw xmm3, xmm0 + psubw xmm4, xmm3 + + paddw xmm6, xmm5 ; xmm6 += accumulated column differences + paddw xmm6, xmm4 + pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 + pmaddwd xmm4, xmm4 + paddd xmm7, xmm5 ; xmm7 += accumulated square column differences + paddd xmm7, xmm4 + + movdqa xmm5, xmm1 ; save xmm1 for use on the next row + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + + sub rcx, 1 ; + jnz vpx_half_horiz_vert_variance16x_h_1 ; + + pxor xmm1, xmm1 + pxor xmm5, xmm5 + + punpcklwd xmm0, xmm6 + punpckhwd xmm1, xmm6 + psrad xmm0, 16 + psrad xmm1, 16 + paddd xmm0, xmm1 + movdqa xmm1, xmm0 + + movdqa xmm6, xmm7 + punpckldq xmm6, xmm5 + punpckhdq xmm7, xmm5 + paddd xmm6, xmm7 + + punpckldq xmm0, xmm5 + punpckhdq xmm1, xmm5 + paddd xmm0, xmm1 + + movdqa xmm7, xmm6 + movdqa xmm1, xmm0 + + psrldq xmm7, 8 + psrldq xmm1, 8 + + paddd xmm6, xmm7 + paddd xmm0, xmm1 + + mov rsi, arg(5) ;[Sum] + mov rdi, arg(6) ;[SSE] + + movd [rsi], xmm0 + movd [rdi], xmm6 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vpx_half_vert_variance16x_h_sse2(unsigned char *ref, +; int ref_stride, +; unsigned char *src, +; int src_stride, +; unsigned int height, +; int *sum, +; unsigned int *sumsquared) +global sym(vpx_half_vert_variance16x_h_sse2) PRIVATE +sym(vpx_half_vert_variance16x_h_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + pxor xmm6, xmm6 ; error accumulator + pxor xmm7, xmm7 ; sse eaccumulator + mov rsi, arg(0) ;ref + + mov rdi, arg(2) ;src + movsxd rcx, dword ptr arg(4) ;height + movsxd rax, dword ptr arg(1) ;ref_stride + movsxd rdx, dword ptr arg(3) ;src_stride + + movdqu xmm5, XMMWORD PTR [rsi] + lea rsi, [rsi + rax ] + pxor xmm0, xmm0 + +vpx_half_vert_variance16x_h_1: + movdqu xmm3, XMMWORD PTR [rsi] + + pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) + movdqa xmm4, xmm5 + punpcklbw xmm5, xmm0 + punpckhbw xmm4, xmm0 + + movq xmm2, QWORD PTR [rdi] + punpcklbw xmm2, xmm0 + psubw xmm5, xmm2 + movq xmm2, QWORD PTR [rdi+8] + punpcklbw xmm2, xmm0 + psubw xmm4, xmm2 + + paddw xmm6, xmm5 ; xmm6 += accumulated column differences + paddw xmm6, xmm4 + pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 + pmaddwd xmm4, xmm4 + paddd xmm7, xmm5 ; xmm7 += accumulated square column differences + paddd xmm7, xmm4 + + movdqa xmm5, xmm3 + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + + sub rcx, 1 + jnz vpx_half_vert_variance16x_h_1 + + pxor xmm1, xmm1 + pxor xmm5, xmm5 + + punpcklwd xmm0, xmm6 + punpckhwd xmm1, xmm6 + psrad xmm0, 16 + psrad xmm1, 16 + paddd xmm0, xmm1 + movdqa xmm1, xmm0 + + movdqa xmm6, xmm7 + punpckldq xmm6, xmm5 + punpckhdq xmm7, xmm5 + paddd xmm6, xmm7 + + punpckldq xmm0, xmm5 + punpckhdq xmm1, xmm5 + paddd xmm0, xmm1 + + movdqa xmm7, xmm6 + movdqa xmm1, xmm0 + + psrldq xmm7, 8 + psrldq xmm1, 8 + + paddd xmm6, xmm7 + paddd xmm0, xmm1 + + mov rsi, arg(5) ;[Sum] + mov rdi, arg(6) ;[SSE] + + movd [rsi], xmm0 + movd [rdi], xmm6 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vpx_half_horiz_variance16x_h_sse2(unsigned char *ref, +; int ref_stride +; unsigned char *src, +; int src_stride, +; unsigned int height, +; int *sum, +; unsigned int *sumsquared) +global sym(vpx_half_horiz_variance16x_h_sse2) PRIVATE +sym(vpx_half_horiz_variance16x_h_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + pxor xmm6, xmm6 ; error accumulator + pxor xmm7, xmm7 ; sse eaccumulator + mov rsi, arg(0) ;ref + + mov rdi, arg(2) ;src + movsxd rcx, dword ptr arg(4) ;height + movsxd rax, dword ptr arg(1) ;ref_stride + movsxd rdx, dword ptr arg(3) ;src_stride + + pxor xmm0, xmm0 ; + +vpx_half_horiz_variance16x_h_1: + movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15 + movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16 + + pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) + movdqa xmm1, xmm5 + punpcklbw xmm5, xmm0 ; xmm5 = words of above + punpckhbw xmm1, xmm0 + + movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7 + punpcklbw xmm3, xmm0 ; xmm3 = words of above + movq xmm2, QWORD PTR [rdi+8] + punpcklbw xmm2, xmm0 + + psubw xmm5, xmm3 ; xmm5 -= xmm3 + psubw xmm1, xmm2 + paddw xmm6, xmm5 ; xmm6 += accumulated column differences + paddw xmm6, xmm1 + pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 + pmaddwd xmm1, xmm1 + paddd xmm7, xmm5 ; xmm7 += accumulated square column differences + paddd xmm7, xmm1 + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + + sub rcx, 1 ; + jnz vpx_half_horiz_variance16x_h_1 ; + + pxor xmm1, xmm1 + pxor xmm5, xmm5 + + punpcklwd xmm0, xmm6 + punpckhwd xmm1, xmm6 + psrad xmm0, 16 + psrad xmm1, 16 + paddd xmm0, xmm1 + movdqa xmm1, xmm0 + + movdqa xmm6, xmm7 + punpckldq xmm6, xmm5 + punpckhdq xmm7, xmm5 + paddd xmm6, xmm7 + + punpckldq xmm0, xmm5 + punpckhdq xmm1, xmm5 + paddd xmm0, xmm1 + + movdqa xmm7, xmm6 + movdqa xmm1, xmm0 + + psrldq xmm7, 8 + psrldq xmm1, 8 + + paddd xmm6, xmm7 + paddd xmm0, xmm1 + + mov rsi, arg(5) ;[Sum] + mov rdi, arg(6) ;[SSE] + + movd [rsi], xmm0 + movd [rdi], xmm6 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +; short xmm_bi_rd[8] = { 64, 64, 64, 64,64, 64, 64, 64}; +align 16 +xmm_bi_rd: + times 8 dw 64 +align 16 +vpx_bilinear_filters_sse2: + dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 + dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 + dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 + dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 + dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 + dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 + dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 + dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 diff --git a/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c b/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c new file mode 100644 index 00000000..5782155b --- /dev/null +++ b/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" + +void vpx_half_horiz_vert_variance16x_h_sse2(const unsigned char *ref, + int ref_stride, + const unsigned char *src, + int src_stride, + unsigned int height, + int *sum, + unsigned int *sumsquared); +void vpx_half_horiz_variance16x_h_sse2(const unsigned char *ref, int ref_stride, + const unsigned char *src, int src_stride, + unsigned int height, int *sum, + unsigned int *sumsquared); +void vpx_half_vert_variance16x_h_sse2(const unsigned char *ref, int ref_stride, + const unsigned char *src, int src_stride, + unsigned int height, int *sum, + unsigned int *sumsquared); + +uint32_t vpx_variance_halfpixvar16x16_h_sse2(const unsigned char *src, + int src_stride, + const unsigned char *dst, + int dst_stride, + uint32_t *sse) { + int xsum0; + unsigned int xxsum0; + + vpx_half_horiz_variance16x_h_sse2(src, src_stride, dst, dst_stride, 16, + &xsum0, &xxsum0); + + *sse = xxsum0; + return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8)); +} + +uint32_t vpx_variance_halfpixvar16x16_v_sse2(const unsigned char *src, + int src_stride, + const unsigned char *dst, + int dst_stride, + uint32_t *sse) { + int xsum0; + unsigned int xxsum0; + vpx_half_vert_variance16x_h_sse2(src, src_stride, dst, dst_stride, 16, + &xsum0, &xxsum0); + + *sse = xxsum0; + return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8)); +} + + +uint32_t vpx_variance_halfpixvar16x16_hv_sse2(const unsigned char *src, + int src_stride, + const unsigned char *dst, + int dst_stride, + uint32_t *sse) { + int xsum0; + unsigned int xxsum0; + + vpx_half_horiz_vert_variance16x_h_sse2(src, src_stride, dst, dst_stride, 16, + &xsum0, &xxsum0); + + *sse = xxsum0; + return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8)); +} diff --git a/libvpx/vpx_dsp/x86/inv_txfm_sse2.c b/libvpx/vpx_dsp/x86/inv_txfm_sse2.c index f3af68f0..ae907fd0 100644 --- a/libvpx/vpx_dsp/x86/inv_txfm_sse2.c +++ b/libvpx/vpx_dsp/x86/inv_txfm_sse2.c @@ -21,7 +21,8 @@ *(int *)(dest) = _mm_cvtsi128_si32(d0); \ } -void vpx_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i eight = _mm_set1_epi16(8); const __m128i cst = _mm_setr_epi16( @@ -32,8 +33,8 @@ void vpx_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i input0, input1, input2, input3; // Rows - input0 = _mm_load_si128((const __m128i *)input); - input2 = _mm_load_si128((const __m128i *)(input + 8)); + input0 = load_input_data(input); + input2 = load_input_data(input + 8); // Construct i3, i1, i3, i1, i2, i0, i2, i0 input0 = _mm_shufflelo_epi16(input0, 0xd8); @@ -151,7 +152,8 @@ void vpx_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { } } -void vpx_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); int a; @@ -449,7 +451,8 @@ void iadst4_sse2(__m128i *in) { out7 = _mm_subs_epi16(stp1_0, stp2_7); \ } -void vpx_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 4); @@ -469,14 +472,14 @@ void vpx_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { int i; // Load input data. - in0 = _mm_load_si128((const __m128i *)input); - in1 = _mm_load_si128((const __m128i *)(input + 8 * 1)); - in2 = _mm_load_si128((const __m128i *)(input + 8 * 2)); - in3 = _mm_load_si128((const __m128i *)(input + 8 * 3)); - in4 = _mm_load_si128((const __m128i *)(input + 8 * 4)); - in5 = _mm_load_si128((const __m128i *)(input + 8 * 5)); - in6 = _mm_load_si128((const __m128i *)(input + 8 * 6)); - in7 = _mm_load_si128((const __m128i *)(input + 8 * 7)); + in0 = load_input_data(input); + in1 = load_input_data(input + 8 * 1); + in2 = load_input_data(input + 8 * 2); + in3 = load_input_data(input + 8 * 3); + in4 = load_input_data(input + 8 * 4); + in5 = load_input_data(input + 8 * 5); + in6 = load_input_data(input + 8 * 6); + in7 = load_input_data(input + 8 * 7); // 2-D for (i = 0; i < 2; i++) { @@ -518,7 +521,8 @@ void vpx_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { RECON_AND_STORE(dest + 7 * stride, in7); } -void vpx_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); int a; @@ -792,7 +796,8 @@ void iadst8_sse2(__m128i *in) { in[7] = _mm_sub_epi16(k__const_0, s1); } -void vpx_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 4); @@ -812,10 +817,10 @@ void vpx_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; // Rows. Load 4-row input data. - in0 = _mm_load_si128((const __m128i *)input); - in1 = _mm_load_si128((const __m128i *)(input + 8 * 1)); - in2 = _mm_load_si128((const __m128i *)(input + 8 * 2)); - in3 = _mm_load_si128((const __m128i *)(input + 8 * 3)); + in0 = load_input_data(input); + in1 = load_input_data(input + 8 * 1); + in2 = load_input_data(input + 8 * 2); + in3 = load_input_data(input + 8 * 3); // 8x4 Transpose TRANSPOSE_8X8_10(in0, in1, in2, in3, in0, in1); @@ -1169,7 +1174,7 @@ void vpx_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { stp2_10, stp2_13, stp2_11, stp2_12) \ } -void vpx_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, +void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 5); @@ -1214,22 +1219,22 @@ void vpx_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, // 1-D idct // Load input data. - in[0] = _mm_load_si128((const __m128i *)input); - in[8] = _mm_load_si128((const __m128i *)(input + 8 * 1)); - in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2)); - in[9] = _mm_load_si128((const __m128i *)(input + 8 * 3)); - in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4)); - in[10] = _mm_load_si128((const __m128i *)(input + 8 * 5)); - in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6)); - in[11] = _mm_load_si128((const __m128i *)(input + 8 * 7)); - in[4] = _mm_load_si128((const __m128i *)(input + 8 * 8)); - in[12] = _mm_load_si128((const __m128i *)(input + 8 * 9)); - in[5] = _mm_load_si128((const __m128i *)(input + 8 * 10)); - in[13] = _mm_load_si128((const __m128i *)(input + 8 * 11)); - in[6] = _mm_load_si128((const __m128i *)(input + 8 * 12)); - in[14] = _mm_load_si128((const __m128i *)(input + 8 * 13)); - in[7] = _mm_load_si128((const __m128i *)(input + 8 * 14)); - in[15] = _mm_load_si128((const __m128i *)(input + 8 * 15)); + in[0] = load_input_data(input); + in[8] = load_input_data(input + 8 * 1); + in[1] = load_input_data(input + 8 * 2); + in[9] = load_input_data(input + 8 * 3); + in[2] = load_input_data(input + 8 * 4); + in[10] = load_input_data(input + 8 * 5); + in[3] = load_input_data(input + 8 * 6); + in[11] = load_input_data(input + 8 * 7); + in[4] = load_input_data(input + 8 * 8); + in[12] = load_input_data(input + 8 * 9); + in[5] = load_input_data(input + 8 * 10); + in[13] = load_input_data(input + 8 * 11); + in[6] = load_input_data(input + 8 * 12); + in[14] = load_input_data(input + 8 * 13); + in[7] = load_input_data(input + 8 * 14); + in[15] = load_input_data(input + 8 * 15); array_transpose_8x8(in, in); array_transpose_8x8(in + 8, in + 8); @@ -1294,7 +1299,8 @@ void vpx_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, } } -void vpx_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); int a, i; @@ -2152,7 +2158,7 @@ void iadst16_sse2(__m128i *in0, __m128i *in1) { iadst16_8col(in1); } -void vpx_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, +void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 5); @@ -2184,10 +2190,10 @@ void vpx_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, int i; // First 1-D inverse DCT // Load input data. - in[0] = _mm_load_si128((const __m128i *)input); - in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2)); - in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4)); - in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6)); + in[0] = load_input_data(input); + in[1] = load_input_data(input + 8 * 2); + in[2] = load_input_data(input + 8 * 4); + in[3] = load_input_data(input + 8 * 6); TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1]); @@ -2391,7 +2397,7 @@ void vpx_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, #define LOAD_DQCOEFF(reg, input) \ { \ - reg = _mm_load_si128((const __m128i *) input); \ + reg = load_input_data(input); \ input += 8; \ } \ @@ -3029,7 +3035,7 @@ void vpx_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, } // Only upper-left 8x8 has non-zero coeff -void vpx_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, +void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); @@ -3081,14 +3087,14 @@ void vpx_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, int i; // Load input data. Only need to load the top left 8x8 block. - in[0] = _mm_load_si128((const __m128i *)input); - in[1] = _mm_load_si128((const __m128i *)(input + 32)); - in[2] = _mm_load_si128((const __m128i *)(input + 64)); - in[3] = _mm_load_si128((const __m128i *)(input + 96)); - in[4] = _mm_load_si128((const __m128i *)(input + 128)); - in[5] = _mm_load_si128((const __m128i *)(input + 160)); - in[6] = _mm_load_si128((const __m128i *)(input + 192)); - in[7] = _mm_load_si128((const __m128i *)(input + 224)); + in[0] = load_input_data(input); + in[1] = load_input_data(input + 32); + in[2] = load_input_data(input + 64); + in[3] = load_input_data(input + 96); + in[4] = load_input_data(input + 128); + in[5] = load_input_data(input + 160); + in[6] = load_input_data(input + 192); + in[7] = load_input_data(input + 224); for (i = 8; i < 32; ++i) { in[i] = _mm_setzero_si128(); @@ -3188,7 +3194,7 @@ void vpx_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, } } -void vpx_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, +void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 5); @@ -3464,10 +3470,11 @@ void vpx_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, } } -void vpx_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { +void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); - int a, i; + int a, j; a = dct_const_round_shift(input[0] * cospi_16_64); a = dct_const_round_shift(a * cospi_16_64); @@ -3475,12 +3482,11 @@ void vpx_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { dc_value = _mm_set1_epi16(a); - for (i = 0; i < 4; ++i) { - int j; - for (j = 0; j < 32; ++j) { - RECON_AND_STORE(dest + j * stride, dc_value); - } - dest += 8; + for (j = 0; j < 32; ++j) { + RECON_AND_STORE(dest + 0 + j * stride, dc_value); + RECON_AND_STORE(dest + 8 + j * stride, dc_value); + RECON_AND_STORE(dest + 16 + j * stride, dc_value); + RECON_AND_STORE(dest + 24 + j * stride, dc_value); } } diff --git a/libvpx/vpx_dsp/x86/inv_txfm_sse2.h b/libvpx/vpx_dsp/x86/inv_txfm_sse2.h index 658a9148..bd520c18 100644 --- a/libvpx/vpx_dsp/x86/inv_txfm_sse2.h +++ b/libvpx/vpx_dsp/x86/inv_txfm_sse2.h @@ -15,6 +15,7 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/inv_txfm.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" // perform 8x8 transpose static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) { @@ -89,24 +90,35 @@ static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) { res0[15] = tbuf[7]; } -static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) { - in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16)); - in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16)); - in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16)); - in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16)); - in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16)); - in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16)); - in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16)); - in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16)); - - in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16)); - in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16)); - in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16)); - in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16)); - in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16)); - in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16)); - in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16)); - in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16)); +// Function to allow 8 bit optimisations to be used when profile 0 is used with +// highbitdepth enabled +static INLINE __m128i load_input_data(const tran_low_t *data) { +#if CONFIG_VP9_HIGHBITDEPTH + return octa_set_epi16(data[0], data[1], data[2], data[3], data[4], data[5], + data[6], data[7]); +#else + return _mm_load_si128((const __m128i *)data); +#endif +} + +static INLINE void load_buffer_8x16(const tran_low_t *input, __m128i *in) { + in[0] = load_input_data(input + 0 * 16); + in[1] = load_input_data(input + 1 * 16); + in[2] = load_input_data(input + 2 * 16); + in[3] = load_input_data(input + 3 * 16); + in[4] = load_input_data(input + 4 * 16); + in[5] = load_input_data(input + 5 * 16); + in[6] = load_input_data(input + 6 * 16); + in[7] = load_input_data(input + 7 * 16); + + in[8] = load_input_data(input + 8 * 16); + in[9] = load_input_data(input + 9 * 16); + in[10] = load_input_data(input + 10 * 16); + in[11] = load_input_data(input + 11 * 16); + in[12] = load_input_data(input + 12 * 16); + in[13] = load_input_data(input + 13 * 16); + in[14] = load_input_data(input + 14 * 16); + in[15] = load_input_data(input + 15 * 16); } #define RECON_AND_STORE(dest, in_x) \ diff --git a/libvpx/vpx_dsp/x86/quantize_avx_x86_64.asm b/libvpx/vpx_dsp/x86/quantize_avx_x86_64.asm new file mode 100644 index 00000000..01c41291 --- /dev/null +++ b/libvpx/vpx_dsp/x86/quantize_avx_x86_64.asm @@ -0,0 +1,544 @@ +; +; Copyright (c) 2015 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "third_party/x86inc/x86inc.asm" + +SECTION .text + +%macro QUANTIZE_FN 2 +cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ + shift, qcoeff, dqcoeff, dequant, \ + eob, scan, iscan + + vzeroupper + + ; If we can skip this block, then just zero the output + cmp skipmp, 0 + jne .blank + +%ifnidn %1, b_32x32 + + ; Special case for ncoeff == 16, as it is frequent and we can save on + ; not setting up a loop. + cmp ncoeffmp, 16 + jne .generic + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; Special case of ncoeff == 16 + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +.single: + + movifnidn coeffq, coeffmp + movifnidn zbinq, zbinmp + mova m0, [zbinq] ; m0 = zbin + + ; Get DC and first 15 AC coeffs - in this special case, that is all. +%if CONFIG_VP9_HIGHBITDEPTH + ; coeff stored as 32bit numbers but we process them as 16 bit numbers + mova m9, [coeffq] + packssdw m9, [coeffq+16] ; m9 = c[i] + mova m10, [coeffq+32] + packssdw m10, [coeffq+48] ; m10 = c[i] +%else + mova m9, [coeffq] ; m9 = c[i] + mova m10, [coeffq+16] ; m10 = c[i] +%endif + + mov r0, eobmp ; Output pointer + mov r1, qcoeffmp ; Output pointer + mov r2, dqcoeffmp ; Output pointer + + pxor m5, m5 ; m5 = dedicated zero + + pcmpeqw m4, m4 ; All word lanes -1 + paddw m0, m4 ; m0 = zbin - 1 + + pabsw m6, m9 ; m6 = abs(m9) + pabsw m11, m10 ; m11 = abs(m10) + pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin + punpckhqdq m0, m0 + pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin + + ; Check if all coeffs are less than zbin. If yes, we just write zeros + ; to the outputs and we are done. + por m14, m7, m12 + ptest m14, m14 + jnz .single_nonzero + +%if CONFIG_VP9_HIGHBITDEPTH + mova [r1 ], ymm5 + mova [r1+32], ymm5 + mova [r2 ], ymm5 + mova [r2+32], ymm5 +%else + mova [r1], ymm5 + mova [r2], ymm5 +%endif + mov [r0], word 0 + + vzeroupper + RET + +.single_nonzero: + + ; Actual quantization of size 16 block - setup pointers, rounders, etc. + movifnidn r4, roundmp + movifnidn r5, quantmp + mov r3, dequantmp + mov r6, shiftmp + mova m1, [r4] ; m1 = round + mova m2, [r5] ; m2 = quant + mova m3, [r3] ; m3 = dequant + mova m4, [r6] ; m4 = shift + + mov r3, iscanmp + + DEFINE_ARGS eob, qcoeff, dqcoeff, iscan + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + paddsw m6, m1 ; m6 += round + punpckhqdq m1, m1 + paddsw m11, m1 ; m11 += round + pmulhw m8, m6, m2 ; m8 = m6*q>>16 + punpckhqdq m2, m2 + pmulhw m13, m11, m2 ; m13 = m11*q>>16 + paddw m8, m6 ; m8 += m6 + paddw m13, m11 ; m13 += m11 + pmulhw m8, m4 ; m8 = m8*qsh>>16 + punpckhqdq m4, m4 + pmulhw m13, m4 ; m13 = m13*qsh>>16 + psignw m8, m9 ; m8 = reinsert sign + psignw m13, m10 ; m13 = reinsert sign + pand m8, m7 + pand m13, m12 + +%if CONFIG_VP9_HIGHBITDEPTH + ; Store 16bit numbers as 32bit numbers in array pointed to by qcoeff + pcmpgtw m6, m5, m8 + punpckhwd m6, m8, m6 + pmovsxwd m11, m8 + mova [qcoeffq ], m11 + mova [qcoeffq+16], m6 + pcmpgtw m6, m5, m13 + punpckhwd m6, m13, m6 + pmovsxwd m11, m13 + mova [qcoeffq+32], m11 + mova [qcoeffq+48], m6 +%else + mova [qcoeffq ], m8 + mova [qcoeffq+16], m13 +%endif + + pmullw m8, m3 ; dqc[i] = qc[i] * q + punpckhqdq m3, m3 + pmullw m13, m3 ; dqc[i] = qc[i] * q + +%if CONFIG_VP9_HIGHBITDEPTH + ; Store 16bit numbers as 32bit numbers in array pointed to by qcoeff + pcmpgtw m6, m5, m8 + punpckhwd m6, m8, m6 + pmovsxwd m11, m8 + mova [dqcoeffq ], m11 + mova [dqcoeffq+16], m6 + pcmpgtw m6, m5, m13 + punpckhwd m6, m13, m6 + pmovsxwd m11, m13 + mova [dqcoeffq+32], m11 + mova [dqcoeffq+48], m6 +%else + mova [dqcoeffq ], m8 + mova [dqcoeffq+16], m13 +%endif + + mova m6, [iscanq] ; m6 = scan[i] + mova m11, [iscanq+16] ; m11 = scan[i] + + pcmpeqw m8, m8, m5 ; m8 = c[i] == 0 + pcmpeqw m13, m13, m5 ; m13 = c[i] == 0 + psubw m6, m6, m7 ; m6 = scan[i] + 1 + psubw m11, m11, m12 ; m11 = scan[i] + 1 + pandn m8, m8, m6 ; m8 = max(eob) + pandn m13, m13, m11 ; m13 = max(eob) + pmaxsw m8, m8, m13 + + ; Horizontally accumulate/max eobs and write into [eob] memory pointer + pshufd m7, m8, 0xe + pmaxsw m8, m7 + pshuflw m7, m8, 0xe + pmaxsw m8, m7 + pshuflw m7, m8, 0x1 + pmaxsw m8, m7 + movq rax, m8 + mov [eobq], ax + + vzeroupper + RET + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; Generic case of ncoeff != 16 + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +.generic: + +%endif ; %ifnidn %1, b_32x32 + +DEFINE_ARGS coeff, ncoeff, skip, zbin, round, quant, shift, \ + qcoeff, dqcoeff, dequant, eob, scan, iscan + + ; Actual quantization loop - setup pointers, rounders, etc. + movifnidn coeffq, coeffmp + movifnidn ncoeffq, ncoeffmp + mov r2, dequantmp + movifnidn zbinq, zbinmp + movifnidn roundq, roundmp + movifnidn quantq, quantmp + mova m0, [zbinq] ; m0 = zbin + mova m1, [roundq] ; m1 = round + mova m2, [quantq] ; m2 = quant + mova m3, [r2] ; m3 = dequant + pcmpeqw m4, m4 ; All lanes -1 +%ifidn %1, b_32x32 + psubw m0, m4 + psubw m1, m4 + psrlw m0, 1 ; m0 = (m0 + 1) / 2 + psrlw m1, 1 ; m1 = (m1 + 1) / 2 +%endif + paddw m0, m4 ; m0 = m0 + 1 + + mov r2, shiftmp + mov r3, qcoeffmp + mova m4, [r2] ; m4 = shift + mov r4, dqcoeffmp + mov r5, iscanmp +%ifidn %1, b_32x32 + psllw m4, 1 +%endif + pxor m5, m5 ; m5 = dedicated zero + + DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob + +%if CONFIG_VP9_HIGHBITDEPTH + lea coeffq, [ coeffq+ncoeffq*4] + lea qcoeffq, [ qcoeffq+ncoeffq*4] + lea dqcoeffq, [dqcoeffq+ncoeffq*4] +%else + lea coeffq, [ coeffq+ncoeffq*2] + lea qcoeffq, [ qcoeffq+ncoeffq*2] + lea dqcoeffq, [dqcoeffq+ncoeffq*2] +%endif + lea iscanq, [ iscanq+ncoeffq*2] + neg ncoeffq + + ; get DC and first 15 AC coeffs +%if CONFIG_VP9_HIGHBITDEPTH + ; coeff stored as 32bit numbers & require 16bit numbers + mova m9, [coeffq+ncoeffq*4+ 0] + packssdw m9, [coeffq+ncoeffq*4+16] + mova m10, [coeffq+ncoeffq*4+32] + packssdw m10, [coeffq+ncoeffq*4+48] +%else + mova m9, [coeffq+ncoeffq*2+ 0] ; m9 = c[i] + mova m10, [coeffq+ncoeffq*2+16] ; m10 = c[i] +%endif + + pabsw m6, m9 ; m6 = abs(m9) + pabsw m11, m10 ; m11 = abs(m10) + pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin + punpckhqdq m0, m0 + pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin + + ; Check if all coeffs are less than zbin. If yes, skip forward quickly. + por m14, m7, m12 + ptest m14, m14 + jnz .first_nonzero + +%if CONFIG_VP9_HIGHBITDEPTH + mova [qcoeffq+ncoeffq*4 ], ymm5 + mova [qcoeffq+ncoeffq*4+32], ymm5 + mova [dqcoeffq+ncoeffq*4 ], ymm5 + mova [dqcoeffq+ncoeffq*4+32], ymm5 +%else + mova [qcoeffq+ncoeffq*2], ymm5 + mova [dqcoeffq+ncoeffq*2], ymm5 +%endif + + add ncoeffq, mmsize + + punpckhqdq m1, m1 + punpckhqdq m2, m2 + punpckhqdq m3, m3 + punpckhqdq m4, m4 + pxor m8, m8 + + jmp .ac_only_loop + +.first_nonzero: + + paddsw m6, m1 ; m6 += round + punpckhqdq m1, m1 + paddsw m11, m1 ; m11 += round + pmulhw m8, m6, m2 ; m8 = m6*q>>16 + punpckhqdq m2, m2 + pmulhw m13, m11, m2 ; m13 = m11*q>>16 + paddw m8, m6 ; m8 += m6 + paddw m13, m11 ; m13 += m11 + pmulhw m8, m4 ; m8 = m8*qsh>>16 + punpckhqdq m4, m4 + pmulhw m13, m4 ; m13 = m13*qsh>>16 + psignw m8, m9 ; m8 = reinsert sign + psignw m13, m10 ; m13 = reinsert sign + pand m8, m7 + pand m13, m12 + +%if CONFIG_VP9_HIGHBITDEPTH + ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff + pcmpgtw m6, m5, m8 + punpckhwd m6, m8, m6 + pmovsxwd m11, m8 + mova [qcoeffq+ncoeffq*4+ 0], m11 + mova [qcoeffq+ncoeffq*4+16], m6 + pcmpgtw m6, m5, m13 + punpckhwd m6, m13, m6 + pmovsxwd m11, m13 + mova [qcoeffq+ncoeffq*4+32], m11 + mova [qcoeffq+ncoeffq*4+48], m6 +%else + mova [qcoeffq+ncoeffq*2+ 0], m8 + mova [qcoeffq+ncoeffq*2+16], m13 +%endif + +%ifidn %1, b_32x32 + pabsw m8, m8 + pabsw m13, m13 +%endif + pmullw m8, m3 ; dqc[i] = qc[i] * q + punpckhqdq m3, m3 + pmullw m13, m3 ; dqc[i] = qc[i] * q +%ifidn %1, b_32x32 + psrlw m8, 1 + psrlw m13, 1 + psignw m8, m9 + psignw m13, m10 +%endif + +%if CONFIG_VP9_HIGHBITDEPTH + ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff + pcmpgtw m6, m5, m8 + punpckhwd m6, m8, m6 + pmovsxwd m11, m8 + mova [dqcoeffq+ncoeffq*4+ 0], m11 + mova [dqcoeffq+ncoeffq*4+16], m6 + pcmpgtw m6, m5, m13 + punpckhwd m6, m13, m6 + pmovsxwd m11, m13 + mova [dqcoeffq+ncoeffq*4+32], m11 + mova [dqcoeffq+ncoeffq*4+48], m6 +%else + mova [dqcoeffq+ncoeffq*2+ 0], m8 + mova [dqcoeffq+ncoeffq*2+16], m13 +%endif + + pcmpeqw m8, m5 ; m8 = c[i] == 0 + pcmpeqw m13, m5 ; m13 = c[i] == 0 + mova m6, [iscanq+ncoeffq*2] ; m6 = scan[i] + mova m11, [iscanq+ncoeffq*2+16] ; m11 = scan[i] + psubw m6, m7 ; m6 = scan[i] + 1 + psubw m11, m12 ; m11 = scan[i] + 1 + pandn m8, m6 ; m8 = max(eob) + pandn m13, m11 ; m13 = max(eob) + pmaxsw m8, m13 + add ncoeffq, mmsize + +.ac_only_loop: + +%if CONFIG_VP9_HIGHBITDEPTH + ; pack coeff from 32bit to 16bit array + mova m9, [coeffq+ncoeffq*4+ 0] + packssdw m9, [coeffq+ncoeffq*4+16] + mova m10, [coeffq+ncoeffq*4+32] + packssdw m10, [coeffq+ncoeffq*4+48] +%else + mova m9, [coeffq+ncoeffq*2+ 0] ; m9 = c[i] + mova m10, [coeffq+ncoeffq*2+16] ; m10 = c[i] +%endif + + pabsw m6, m9 ; m6 = abs(m9) + pabsw m11, m10 ; m11 = abs(m10) + pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin + pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin + + ; Check if all coeffs are less than zbin. If yes, skip this itertion. + ; And just write zeros as the result would be. + por m14, m7, m12 + ptest m14, m14 + jnz .rest_nonzero + +%if CONFIG_VP9_HIGHBITDEPTH + mova [qcoeffq+ncoeffq*4+ 0], ymm5 + mova [qcoeffq+ncoeffq*4+32], ymm5 + mova [dqcoeffq+ncoeffq*4+ 0], ymm5 + mova [dqcoeffq+ncoeffq*4+32], ymm5 +%else + mova [qcoeffq+ncoeffq*2+ 0], ymm5 + mova [dqcoeffq+ncoeffq*2+ 0], ymm5 +%endif + add ncoeffq, mmsize + jnz .ac_only_loop + + ; Horizontally accumulate/max eobs and write into [eob] memory pointer + mov r2, eobmp + pshufd m7, m8, 0xe + pmaxsw m8, m7 + pshuflw m7, m8, 0xe + pmaxsw m8, m7 + pshuflw m7, m8, 0x1 + pmaxsw m8, m7 + movq rax, m8 + mov [r2], ax + vzeroupper + RET + +.rest_nonzero: + paddsw m6, m1 ; m6 += round + paddsw m11, m1 ; m11 += round + pmulhw m14, m6, m2 ; m14 = m6*q>>16 + pmulhw m13, m11, m2 ; m13 = m11*q>>16 + paddw m14, m6 ; m14 += m6 + paddw m13, m11 ; m13 += m11 + pmulhw m14, m4 ; m14 = m14*qsh>>16 + pmulhw m13, m4 ; m13 = m13*qsh>>16 + psignw m14, m9 ; m14 = reinsert sign + psignw m13, m10 ; m13 = reinsert sign + pand m14, m7 + pand m13, m12 + +%if CONFIG_VP9_HIGHBITDEPTH + ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff + pcmpgtw m6, m5, m14 + punpckhwd m6, m14, m6 + pmovsxwd m11, m14 + mova [qcoeffq+ncoeffq*4+ 0], m11 + mova [qcoeffq+ncoeffq*4+16], m6 + pcmpgtw m6, m5, m13 + punpckhwd m6, m13, m6 + pmovsxwd m11, m13 + mova [qcoeffq+ncoeffq*4+32], m11 + mova [qcoeffq+ncoeffq*4+48], m6 +%else + mova [qcoeffq+ncoeffq*2+ 0], m14 + mova [qcoeffq+ncoeffq*2+16], m13 +%endif + +%ifidn %1, b_32x32 + pabsw m14, m14 + pabsw m13, m13 +%endif + pmullw m14, m3 ; dqc[i] = qc[i] * q + pmullw m13, m3 ; dqc[i] = qc[i] * q +%ifidn %1, b_32x32 + psrlw m14, 1 + psrlw m13, 1 + psignw m14, m9 + psignw m13, m10 +%endif + +%if CONFIG_VP9_HIGHBITDEPTH + ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff + pcmpgtw m6, m5, m14 + punpckhwd m6, m14, m6 + pmovsxwd m11, m14 + mova [dqcoeffq+ncoeffq*4+ 0], m11 + mova [dqcoeffq+ncoeffq*4+16], m6 + pcmpgtw m6, m5, m13 + punpckhwd m6, m13, m6 + pmovsxwd m11, m13 + mova [dqcoeffq+ncoeffq*4+32], m11 + mova [dqcoeffq+ncoeffq*4+48], m6 +%else + mova [dqcoeffq+ncoeffq*2+ 0], m14 + mova [dqcoeffq+ncoeffq*2+16], m13 +%endif + + pcmpeqw m14, m5 ; m14 = c[i] == 0 + pcmpeqw m13, m5 ; m13 = c[i] == 0 + mova m6, [iscanq+ncoeffq*2+ 0] ; m6 = scan[i] + mova m11, [iscanq+ncoeffq*2+16] ; m11 = scan[i] + psubw m6, m7 ; m6 = scan[i] + 1 + psubw m11, m12 ; m11 = scan[i] + 1 + pandn m14, m6 ; m14 = max(eob) + pandn m13, m11 ; m13 = max(eob) + pmaxsw m8, m14 + pmaxsw m8, m13 + add ncoeffq, mmsize + jnz .ac_only_loop + + ; Horizontally accumulate/max eobs and write into [eob] memory pointer + mov r2, eobmp + pshufd m7, m8, 0xe + pmaxsw m8, m7 + pshuflw m7, m8, 0xe + pmaxsw m8, m7 + pshuflw m7, m8, 0x1 + pmaxsw m8, m7 + movq rax, m8 + mov [r2], ax + vzeroupper + RET + + ; Skip-block, i.e. just write all zeroes +.blank: + +DEFINE_ARGS coeff, ncoeff, skip, zbin, round, quant, shift, \ + qcoeff, dqcoeff, dequant, eob, scan, iscan + + mov r0, dqcoeffmp + movifnidn ncoeffq, ncoeffmp + mov r2, qcoeffmp + mov r3, eobmp + +DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob + +%if CONFIG_VP9_HIGHBITDEPTH + lea dqcoeffq, [dqcoeffq+ncoeffq*4] + lea qcoeffq, [ qcoeffq+ncoeffq*4] +%else + lea dqcoeffq, [dqcoeffq+ncoeffq*2] + lea qcoeffq, [ qcoeffq+ncoeffq*2] +%endif + + neg ncoeffq + pxor m7, m7 + +.blank_loop: +%if CONFIG_VP9_HIGHBITDEPTH + mova [dqcoeffq+ncoeffq*4+ 0], ymm7 + mova [dqcoeffq+ncoeffq*4+32], ymm7 + mova [qcoeffq+ncoeffq*4+ 0], ymm7 + mova [qcoeffq+ncoeffq*4+32], ymm7 +%else + mova [dqcoeffq+ncoeffq*2+ 0], ymm7 + mova [qcoeffq+ncoeffq*2+ 0], ymm7 +%endif + add ncoeffq, mmsize + jl .blank_loop + + mov [eobq], word 0 + + vzeroupper + RET +%endmacro + +INIT_XMM avx +QUANTIZE_FN b, 7 +QUANTIZE_FN b_32x32, 7 + +END diff --git a/libvpx/vpx_dsp/x86/quantize_sse2.c b/libvpx/vpx_dsp/x86/quantize_sse2.c index c2a804e1..8aa4568d 100644 --- a/libvpx/vpx_dsp/x86/quantize_sse2.c +++ b/libvpx/vpx_dsp/x86/quantize_sse2.c @@ -14,11 +14,36 @@ #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" -void vpx_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, +static INLINE __m128i load_coefficients(const tran_low_t *coeff_ptr) { +#if CONFIG_VP9_HIGHBITDEPTH + return _mm_setr_epi16((int16_t)coeff_ptr[0], (int16_t)coeff_ptr[1], + (int16_t)coeff_ptr[2], (int16_t)coeff_ptr[3], (int16_t)coeff_ptr[4], + (int16_t)coeff_ptr[5], (int16_t)coeff_ptr[6], (int16_t)coeff_ptr[7]); +#else + return _mm_load_si128((const __m128i *)coeff_ptr); +#endif +} + +static INLINE void store_coefficients(__m128i coeff_vals, + tran_low_t *coeff_ptr) { +#if CONFIG_VP9_HIGHBITDEPTH + __m128i one = _mm_set1_epi16(1); + __m128i coeff_vals_hi = _mm_mulhi_epi16(coeff_vals, one); + __m128i coeff_vals_lo = _mm_mullo_epi16(coeff_vals, one); + __m128i coeff_vals_1 = _mm_unpacklo_epi16(coeff_vals_lo, coeff_vals_hi); + __m128i coeff_vals_2 = _mm_unpackhi_epi16(coeff_vals_lo, coeff_vals_hi); + _mm_store_si128((__m128i*)(coeff_ptr), coeff_vals_1); + _mm_store_si128((__m128i*)(coeff_ptr + 4), coeff_vals_2); +#else + _mm_store_si128((__m128i*)(coeff_ptr), coeff_vals); +#endif +} + +void vpx_quantize_b_sse2(const tran_low_t* coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t* zbin_ptr, const int16_t* round_ptr, const int16_t* quant_ptr, - const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, - int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, + const int16_t* quant_shift_ptr, tran_low_t* qcoeff_ptr, + tran_low_t* dqcoeff_ptr, const int16_t* dequant_ptr, uint16_t* eob_ptr, const int16_t* scan_ptr, const int16_t* iscan_ptr) { @@ -56,8 +81,8 @@ void vpx_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, __m128i qtmp0, qtmp1; __m128i cmp_mask0, cmp_mask1; // Do DC and first 15 AC - coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs)); - coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1); + coeff0 = load_coefficients(coeff_ptr + n_coeffs); + coeff1 = load_coefficients(coeff_ptr + n_coeffs + 8); // Poor man's sign extract coeff0_sign = _mm_srai_epi16(coeff0, 15); @@ -92,15 +117,15 @@ void vpx_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); - _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0); - _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); + store_coefficients(qcoeff0, qcoeff_ptr + n_coeffs); + store_coefficients(qcoeff1, qcoeff_ptr + n_coeffs + 8); coeff0 = _mm_mullo_epi16(qcoeff0, dequant); dequant = _mm_unpackhi_epi64(dequant, dequant); coeff1 = _mm_mullo_epi16(qcoeff1, dequant); - _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0); - _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1); + store_coefficients(coeff0, dqcoeff_ptr + n_coeffs); + store_coefficients(coeff1, dqcoeff_ptr + n_coeffs + 8); } { @@ -134,8 +159,8 @@ void vpx_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, __m128i qtmp0, qtmp1; __m128i cmp_mask0, cmp_mask1; - coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs)); - coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1); + coeff0 = load_coefficients(coeff_ptr + n_coeffs); + coeff1 = load_coefficients(coeff_ptr + n_coeffs + 8); // Poor man's sign extract coeff0_sign = _mm_srai_epi16(coeff0, 15); @@ -166,14 +191,14 @@ void vpx_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); - _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0); - _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); + store_coefficients(qcoeff0, qcoeff_ptr + n_coeffs); + store_coefficients(qcoeff1, qcoeff_ptr + n_coeffs + 8); coeff0 = _mm_mullo_epi16(qcoeff0, dequant); coeff1 = _mm_mullo_epi16(qcoeff1, dequant); - _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0); - _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1); + store_coefficients(coeff0, dqcoeff_ptr + n_coeffs); + store_coefficients(coeff1, dqcoeff_ptr + n_coeffs + 8); } { @@ -212,10 +237,10 @@ void vpx_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, } } else { do { - _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); - _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); - _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); - _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); + store_coefficients(zero, dqcoeff_ptr + n_coeffs); + store_coefficients(zero, dqcoeff_ptr + n_coeffs + 8); + store_coefficients(zero, qcoeff_ptr + n_coeffs); + store_coefficients(zero, qcoeff_ptr + n_coeffs + 8); n_coeffs += 8 * 2; } while (n_coeffs < 0); *eob_ptr = 0; diff --git a/libvpx/vpx_dsp/x86/quantize_ssse3_x86_64.asm b/libvpx/vpx_dsp/x86/quantize_ssse3_x86_64.asm index 3784d9d2..ca215391 100644 --- a/libvpx/vpx_dsp/x86/quantize_ssse3_x86_64.asm +++ b/libvpx/vpx_dsp/x86/quantize_ssse3_x86_64.asm @@ -53,15 +53,29 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ %endif pxor m5, m5 ; m5 = dedicated zero DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob +%if CONFIG_VP9_HIGHBITDEPTH + lea coeffq, [ coeffq+ncoeffq*4] + lea qcoeffq, [ qcoeffq+ncoeffq*4] + lea dqcoeffq, [dqcoeffq+ncoeffq*4] +%else lea coeffq, [ coeffq+ncoeffq*2] - lea iscanq, [ iscanq+ncoeffq*2] lea qcoeffq, [ qcoeffq+ncoeffq*2] lea dqcoeffq, [dqcoeffq+ncoeffq*2] +%endif + lea iscanq, [ iscanq+ncoeffq*2] neg ncoeffq ; get DC and first 15 AC coeffs +%if CONFIG_VP9_HIGHBITDEPTH + ; coeff stored as 32bit numbers & require 16bit numbers + mova m9, [ coeffq+ncoeffq*4+ 0] + packssdw m9, [ coeffq+ncoeffq*4+16] + mova m10, [ coeffq+ncoeffq*4+32] + packssdw m10, [ coeffq+ncoeffq*4+48] +%else mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] +%endif pabsw m6, m9 ; m6 = abs(m9) pabsw m11, m10 ; m11 = abs(m10) pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin @@ -82,8 +96,28 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psignw m13, m10 ; m13 = reinsert sign pand m8, m7 pand m13, m12 +%if CONFIG_VP9_HIGHBITDEPTH + ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff + mova m11, m8 + mova m6, m8 + pcmpgtw m5, m8 + punpcklwd m11, m5 + punpckhwd m6, m5 + mova [qcoeffq+ncoeffq*4+ 0], m11 + mova [qcoeffq+ncoeffq*4+16], m6 + pxor m5, m5 + mova m11, m13 + mova m6, m13 + pcmpgtw m5, m13 + punpcklwd m11, m5 + punpckhwd m6, m5 + mova [qcoeffq+ncoeffq*4+32], m11 + mova [qcoeffq+ncoeffq*4+48], m6 + pxor m5, m5 ; reset m5 to zero register +%else mova [qcoeffq+ncoeffq*2+ 0], m8 mova [qcoeffq+ncoeffq*2+16], m13 +%endif %ifidn %1, b_32x32 pabsw m8, m8 pabsw m13, m13 @@ -97,8 +131,28 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psignw m8, m9 psignw m13, m10 %endif +%if CONFIG_VP9_HIGHBITDEPTH + ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff + mova m11, m8 + mova m6, m8 + pcmpgtw m5, m8 + punpcklwd m11, m5 + punpckhwd m6, m5 + mova [dqcoeffq+ncoeffq*4+ 0], m11 + mova [dqcoeffq+ncoeffq*4+16], m6 + pxor m5, m5 + mova m11, m13 + mova m6, m13 + pcmpgtw m5, m13 + punpcklwd m11, m5 + punpckhwd m6, m5 + mova [dqcoeffq+ncoeffq*4+32], m11 + mova [dqcoeffq+ncoeffq*4+48], m6 + pxor m5, m5 ; reset m5 to zero register +%else mova [dqcoeffq+ncoeffq*2+ 0], m8 mova [dqcoeffq+ncoeffq*2+16], m13 +%endif pcmpeqw m8, m5 ; m8 = c[i] == 0 pcmpeqw m13, m5 ; m13 = c[i] == 0 mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] @@ -112,8 +166,16 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ jz .accumulate_eob .ac_only_loop: +%if CONFIG_VP9_HIGHBITDEPTH + ; pack coeff from 32bit to 16bit array + mova m9, [ coeffq+ncoeffq*4+ 0] + packssdw m9, [ coeffq+ncoeffq*4+16] + mova m10, [ coeffq+ncoeffq*4+32] + packssdw m10, [ coeffq+ncoeffq*4+48] +%else mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] +%endif pabsw m6, m9 ; m6 = abs(m9) pabsw m11, m10 ; m11 = abs(m10) pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin @@ -136,8 +198,29 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psignw m13, m10 ; m13 = reinsert sign pand m14, m7 pand m13, m12 +%if CONFIG_VP9_HIGHBITDEPTH + ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff + pxor m11, m11 + mova m11, m14 + mova m6, m14 + pcmpgtw m5, m14 + punpcklwd m11, m5 + punpckhwd m6, m5 + mova [qcoeffq+ncoeffq*4+ 0], m11 + mova [qcoeffq+ncoeffq*4+16], m6 + pxor m5, m5 + mova m11, m13 + mova m6, m13 + pcmpgtw m5, m13 + punpcklwd m11, m5 + punpckhwd m6, m5 + mova [qcoeffq+ncoeffq*4+32], m11 + mova [qcoeffq+ncoeffq*4+48], m6 + pxor m5, m5 ; reset m5 to zero register +%else mova [qcoeffq+ncoeffq*2+ 0], m14 mova [qcoeffq+ncoeffq*2+16], m13 +%endif %ifidn %1, b_32x32 pabsw m14, m14 pabsw m13, m13 @@ -150,8 +233,28 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psignw m14, m9 psignw m13, m10 %endif +%if CONFIG_VP9_HIGHBITDEPTH + ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff + mova m11, m14 + mova m6, m14 + pcmpgtw m5, m14 + punpcklwd m11, m5 + punpckhwd m6, m5 + mova [dqcoeffq+ncoeffq*4+ 0], m11 + mova [dqcoeffq+ncoeffq*4+16], m6 + pxor m5, m5 + mova m11, m13 + mova m6, m13 + pcmpgtw m5, m13 + punpcklwd m11, m5 + punpckhwd m6, m5 + mova [dqcoeffq+ncoeffq*4+32], m11 + mova [dqcoeffq+ncoeffq*4+48], m6 + pxor m5, m5 +%else mova [dqcoeffq+ncoeffq*2+ 0], m14 mova [dqcoeffq+ncoeffq*2+16], m13 +%endif pcmpeqw m14, m5 ; m14 = c[i] == 0 pcmpeqw m13, m5 ; m13 = c[i] == 0 mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] @@ -168,10 +271,21 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ %ifidn %1, b_32x32 jmp .accumulate_eob .skip_iter: +%if CONFIG_VP9_HIGHBITDEPTH + mova [qcoeffq+ncoeffq*4+ 0], m5 + mova [qcoeffq+ncoeffq*4+16], m5 + mova [qcoeffq+ncoeffq*4+32], m5 + mova [qcoeffq+ncoeffq*4+48], m5 + mova [dqcoeffq+ncoeffq*4+ 0], m5 + mova [dqcoeffq+ncoeffq*4+16], m5 + mova [dqcoeffq+ncoeffq*4+32], m5 + mova [dqcoeffq+ncoeffq*4+48], m5 +%else mova [qcoeffq+ncoeffq*2+ 0], m5 mova [qcoeffq+ncoeffq*2+16], m5 mova [dqcoeffq+ncoeffq*2+ 0], m5 mova [dqcoeffq+ncoeffq*2+16], m5 +%endif add ncoeffq, mmsize jl .ac_only_loop %endif @@ -196,15 +310,31 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ mov r2, qcoeffmp mov r3, eobmp DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob +%if CONFIG_VP9_HIGHBITDEPTH + lea dqcoeffq, [dqcoeffq+ncoeffq*4] + lea qcoeffq, [ qcoeffq+ncoeffq*4] +%else lea dqcoeffq, [dqcoeffq+ncoeffq*2] lea qcoeffq, [ qcoeffq+ncoeffq*2] +%endif neg ncoeffq pxor m7, m7 .blank_loop: +%if CONFIG_VP9_HIGHBITDEPTH + mova [dqcoeffq+ncoeffq*4+ 0], m7 + mova [dqcoeffq+ncoeffq*4+16], m7 + mova [dqcoeffq+ncoeffq*4+32], m7 + mova [dqcoeffq+ncoeffq*4+48], m7 + mova [qcoeffq+ncoeffq*4+ 0], m7 + mova [qcoeffq+ncoeffq*4+16], m7 + mova [qcoeffq+ncoeffq*4+32], m7 + mova [qcoeffq+ncoeffq*4+48], m7 +%else mova [dqcoeffq+ncoeffq*2+ 0], m7 mova [dqcoeffq+ncoeffq*2+16], m7 mova [qcoeffq+ncoeffq*2+ 0], m7 mova [qcoeffq+ncoeffq*2+16], m7 +%endif add ncoeffq, mmsize jl .blank_loop mov word [eobq], 0 diff --git a/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm b/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm index b2638370..9c5b414b 100644 --- a/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm +++ b/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm @@ -12,21 +12,77 @@ SECTION .text -%macro convolve_fn 1 +%macro convolve_fn 1-2 INIT_XMM sse2 +%ifidn %2, highbd +%define pavg pavgw +cglobal %2_convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \ + fx, fxs, fy, fys, w, h, bd +%else +%define pavg pavgb cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \ fx, fxs, fy, fys, w, h +%endif mov r4d, dword wm +%ifidn %2, highbd + shl r4d, 1 + shl srcq, 1 + shl src_strideq, 1 + shl dstq, 1 + shl dst_strideq, 1 +%else cmp r4d, 4 je .w4 +%endif cmp r4d, 8 je .w8 cmp r4d, 16 je .w16 cmp r4d, 32 je .w32 +%ifidn %2, highbd + cmp r4d, 64 + je .w64 mov r4d, dword hm +.loop128: + movu m0, [srcq] + movu m1, [srcq+16] + movu m2, [srcq+32] + movu m3, [srcq+48] +%ifidn %1, avg + pavg m0, [dstq] + pavg m1, [dstq+16] + pavg m2, [dstq+32] + pavg m3, [dstq+48] +%endif + mova [dstq ], m0 + mova [dstq+16], m1 + mova [dstq+32], m2 + mova [dstq+48], m3 + movu m0, [srcq+64] + movu m1, [srcq+80] + movu m2, [srcq+96] + movu m3, [srcq+112] + add srcq, src_strideq +%ifidn %1, avg + pavg m0, [dstq+64] + pavg m1, [dstq+80] + pavg m2, [dstq+96] + pavg m3, [dstq+112] +%endif + mova [dstq+64], m0 + mova [dstq+80], m1 + mova [dstq+96], m2 + mova [dstq+112], m3 + add dstq, dst_strideq + dec r4d + jnz .loop128 + RET +%endif + +.w64 + mov r4d, dword hm .loop64: movu m0, [srcq] movu m1, [srcq+16] @@ -34,10 +90,10 @@ cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \ movu m3, [srcq+48] add srcq, src_strideq %ifidn %1, avg - pavgb m0, [dstq] - pavgb m1, [dstq+16] - pavgb m2, [dstq+32] - pavgb m3, [dstq+48] + pavg m0, [dstq] + pavg m1, [dstq+16] + pavg m2, [dstq+32] + pavg m3, [dstq+48] %endif mova [dstq ], m0 mova [dstq+16], m1 @@ -57,10 +113,10 @@ cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \ movu m3, [srcq+src_strideq+16] lea srcq, [srcq+src_strideq*2] %ifidn %1, avg - pavgb m0, [dstq] - pavgb m1, [dstq +16] - pavgb m2, [dstq+dst_strideq] - pavgb m3, [dstq+dst_strideq+16] + pavg m0, [dstq] + pavg m1, [dstq +16] + pavg m2, [dstq+dst_strideq] + pavg m3, [dstq+dst_strideq+16] %endif mova [dstq ], m0 mova [dstq +16], m1 @@ -82,10 +138,10 @@ cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \ movu m3, [srcq+r5q] lea srcq, [srcq+src_strideq*4] %ifidn %1, avg - pavgb m0, [dstq] - pavgb m1, [dstq+dst_strideq] - pavgb m2, [dstq+dst_strideq*2] - pavgb m3, [dstq+r6q] + pavg m0, [dstq] + pavg m1, [dstq+dst_strideq] + pavg m2, [dstq+dst_strideq*2] + pavg m3, [dstq+r6q] %endif mova [dstq ], m0 mova [dstq+dst_strideq ], m1 @@ -108,10 +164,10 @@ INIT_MMX sse movu m3, [srcq+r5q] lea srcq, [srcq+src_strideq*4] %ifidn %1, avg - pavgb m0, [dstq] - pavgb m1, [dstq+dst_strideq] - pavgb m2, [dstq+dst_strideq*2] - pavgb m3, [dstq+r6q] + pavg m0, [dstq] + pavg m1, [dstq+dst_strideq] + pavg m2, [dstq+dst_strideq*2] + pavg m3, [dstq+r6q] %endif mova [dstq ], m0 mova [dstq+dst_strideq ], m1 @@ -122,6 +178,7 @@ INIT_MMX sse jnz .loop8 RET +%ifnidn %2, highbd .w4: mov r4d, dword hm lea r5q, [src_strideq*3] @@ -137,10 +194,10 @@ INIT_MMX sse movh m5, [dstq+dst_strideq] movh m6, [dstq+dst_strideq*2] movh m7, [dstq+r6q] - pavgb m0, m4 - pavgb m1, m5 - pavgb m2, m6 - pavgb m3, m7 + pavg m0, m4 + pavg m1, m5 + pavg m2, m6 + pavg m3, m7 %endif movh [dstq ], m0 movh [dstq+dst_strideq ], m1 @@ -150,7 +207,12 @@ INIT_MMX sse sub r4d, 4 jnz .loop4 RET +%endif %endmacro convolve_fn copy convolve_fn avg +%if CONFIG_VP9_HIGHBITDEPTH +convolve_fn copy, highbd +convolve_fn avg, highbd +%endif diff --git a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c index 29ede19f..b7186785 100644 --- a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c +++ b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c @@ -41,7 +41,10 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = { #if defined(__clang__) # if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \ - (defined(__APPLE__) && __clang_major__ == 5 && __clang_minor__ == 0) + (defined(__APPLE__) && \ + ((__clang_major__ == 4 && __clang_minor__ <= 2) || \ + (__clang_major__ == 5 && __clang_minor__ == 0))) + # define MM256_BROADCASTSI128_SI256(x) \ _mm_broadcastsi128_si256((__m128i const *)&(x)) # else // clang > 3.3, and not 5.0 on macosx. diff --git a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c index 772e01e8..6fd52087 100644 --- a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c +++ b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c @@ -203,123 +203,6 @@ void vpx_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr, } } -static void vpx_filter_block1d16_h8_intrin_ssse3(const uint8_t *src_ptr, - ptrdiff_t src_pixels_per_line, - uint8_t *output_ptr, - ptrdiff_t output_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i addFilterReg64, filtersReg, srcReg1, srcReg2; - __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg; - __m128i firstFilters, secondFilters, thirdFilters, forthFilters; - __m128i srcRegFilt1_1, srcRegFilt2_1, srcRegFilt2, srcRegFilt3; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 128 bit register - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 128 bit register - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 128 bit register - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 128 bit register - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - filt1Reg = _mm_load_si128((__m128i const *)filt1_global); - filt2Reg = _mm_load_si128((__m128i const *)filt2_global); - filt3Reg = _mm_load_si128((__m128i const *)filt3_global); - filt4Reg = _mm_load_si128((__m128i const *)filt4_global); - - for (i = 0; i < output_height; i++) { - srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); - - // filter the source buffer - srcRegFilt1_1= _mm_shuffle_epi8(srcReg1, filt1Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, forthFilters); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg1, filt2Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt3Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm_min_epi16(srcRegFilt3, srcRegFilt2)); - - // reading the next 16 bytes. - // (part of it was being read by earlier read) - srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5)); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm_max_epi16(srcRegFilt3, srcRegFilt2)); - - // filter the source buffer - srcRegFilt2_1= _mm_shuffle_epi8(srcReg2, filt1Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, forthFilters); - - // add and saturate the results together - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg2, filt2Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt3Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters); - - // add and saturate the results together - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm_min_epi16(srcRegFilt3, srcRegFilt2)); - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm_max_epi16(srcRegFilt3, srcRegFilt2)); - - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, addFilterReg64); - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7); - srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1); - - src_ptr+=src_pixels_per_line; - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1_1); - - output_ptr+=output_pitch; - } -} - void vpx_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr, @@ -408,141 +291,12 @@ void vpx_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr, } } -static void vpx_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr, - ptrdiff_t src_pitch, - uint8_t *output_ptr, - ptrdiff_t out_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i addFilterReg64, filtersReg, srcRegFilt1, srcRegFilt3; - __m128i firstFilters, secondFilters, thirdFilters, forthFilters; - __m128i srcRegFilt5, srcRegFilt6, srcRegFilt7, srcRegFilt8; - __m128i srcReg1, srcReg2, srcReg3, srcReg4, srcReg5, srcReg6, srcReg7; - __m128i srcReg8; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits in the filter - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits in the filter - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits in the filter - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - // load the first 7 rows of 16 bytes - srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr)); - srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch)); - srcReg3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2)); - srcReg4 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3)); - srcReg5 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4)); - srcReg6 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5)); - srcReg7 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6)); - - for (i = 0; i < output_height; i++) { - // load the last 16 bytes - srcReg8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)); - - // merge the result together - srcRegFilt5 = _mm_unpacklo_epi8(srcReg1, srcReg2); - srcRegFilt6 = _mm_unpacklo_epi8(srcReg7, srcReg8); - srcRegFilt1 = _mm_unpackhi_epi8(srcReg1, srcReg2); - srcRegFilt3 = _mm_unpackhi_epi8(srcReg7, srcReg8); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, firstFilters); - srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, forthFilters); - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters); - - // add and saturate the results together - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, srcRegFilt6); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3); - - // merge the result together - srcRegFilt3 = _mm_unpacklo_epi8(srcReg3, srcReg4); - srcRegFilt6 = _mm_unpackhi_epi8(srcReg3, srcReg4); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters); - srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, secondFilters); - - // merge the result together - srcRegFilt7 = _mm_unpacklo_epi8(srcReg5, srcReg6); - srcRegFilt8 = _mm_unpackhi_epi8(srcReg5, srcReg6); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7, thirdFilters); - srcRegFilt8 = _mm_maddubs_epi16(srcRegFilt8, thirdFilters); - - // add and saturate the results together - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, - _mm_min_epi16(srcRegFilt3, srcRegFilt7)); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_min_epi16(srcRegFilt6, srcRegFilt8)); - - // add and saturate the results together - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, - _mm_max_epi16(srcRegFilt3, srcRegFilt7)); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_max_epi16(srcRegFilt6, srcRegFilt8)); - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, addFilterReg64); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt5 = _mm_srai_epi16(srcRegFilt5, 7); - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1 = _mm_packus_epi16(srcRegFilt5, srcRegFilt1); - - src_ptr+=src_pitch; - - // shift down a row - srcReg1 = srcReg2; - srcReg2 = srcReg3; - srcReg3 = srcReg4; - srcReg4 = srcReg5; - srcReg5 = srcReg6; - srcReg6 = srcReg7; - srcReg7 = srcReg8; - - // save 16 bytes convolve result - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1); - - output_ptr+=out_pitch; - } -} - -#if ARCH_X86_64 -filter8_1dfunction vpx_filter_block1d16_v8_intrin_ssse3; -filter8_1dfunction vpx_filter_block1d16_h8_intrin_ssse3; -filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3; -filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3; -filter8_1dfunction vpx_filter_block1d4_v8_ssse3; -filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3; -#define vpx_filter_block1d16_v8_ssse3 vpx_filter_block1d16_v8_intrin_ssse3 -#define vpx_filter_block1d16_h8_ssse3 vpx_filter_block1d16_h8_intrin_ssse3 -#define vpx_filter_block1d8_v8_ssse3 vpx_filter_block1d8_v8_intrin_ssse3 -#define vpx_filter_block1d8_h8_ssse3 vpx_filter_block1d8_h8_intrin_ssse3 -#define vpx_filter_block1d4_h8_ssse3 vpx_filter_block1d4_h8_intrin_ssse3 -#else // ARCH_X86 filter8_1dfunction vpx_filter_block1d16_v8_ssse3; filter8_1dfunction vpx_filter_block1d16_h8_ssse3; filter8_1dfunction vpx_filter_block1d8_v8_ssse3; filter8_1dfunction vpx_filter_block1d8_h8_ssse3; filter8_1dfunction vpx_filter_block1d4_v8_ssse3; filter8_1dfunction vpx_filter_block1d4_h8_ssse3; -#endif // ARCH_X86_64 filter8_1dfunction vpx_filter_block1d16_v8_avg_ssse3; filter8_1dfunction vpx_filter_block1d16_h8_avg_ssse3; filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3; diff --git a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm index 68acc03c..3fbaa274 100644 --- a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm +++ b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm @@ -1,5 +1,5 @@ ; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; Copyright (c) 2015 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source @@ -8,1064 +8,662 @@ ; be found in the AUTHORS file in the root of the source tree. ; +%include "third_party/x86inc/x86inc.asm" -%include "vpx_ports/x86_abi_support.asm" - -%macro VERTx4 1 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm4, [rdx] ;load filters - movq xmm5, rcx - packsswb xmm4, xmm4 - pshuflw xmm0, xmm4, 0b ;k0_k1 - pshuflw xmm1, xmm4, 01010101b ;k2_k3 - pshuflw xmm2, xmm4, 10101010b ;k4_k5 - pshuflw xmm3, xmm4, 11111111b ;k6_k7 - - punpcklqdq xmm0, xmm0 - punpcklqdq xmm1, xmm1 - punpcklqdq xmm2, xmm2 - punpcklqdq xmm3, xmm3 - - movdqa k0k1, xmm0 - movdqa k2k3, xmm1 - pshufd xmm5, xmm5, 0 - movdqa k4k5, xmm2 - movdqa k6k7, xmm3 - movdqa krd, xmm5 - - movsxd rdx, DWORD PTR arg(1) ;pixels_per_line - -%if ABI_IS_32BIT=0 - movsxd r8, DWORD PTR arg(3) ;out_pitch -%endif - mov rax, rsi - movsxd rcx, DWORD PTR arg(4) ;output_height - add rax, rdx - - lea rbx, [rdx + rdx*4] - add rbx, rdx ;pitch * 6 - -.loop: - movd xmm0, [rsi] ;A - movd xmm1, [rsi + rdx] ;B - movd xmm2, [rsi + rdx * 2] ;C - movd xmm3, [rax + rdx * 2] ;D - movd xmm4, [rsi + rdx * 4] ;E - movd xmm5, [rax + rdx * 4] ;F - - punpcklbw xmm0, xmm1 ;A B - punpcklbw xmm2, xmm3 ;C D - punpcklbw xmm4, xmm5 ;E F - - movd xmm6, [rsi + rbx] ;G - movd xmm7, [rax + rbx] ;H - - pmaddubsw xmm0, k0k1 - pmaddubsw xmm2, k2k3 - punpcklbw xmm6, xmm7 ;G H - pmaddubsw xmm4, k4k5 - pmaddubsw xmm6, k6k7 - - movdqa xmm1, xmm2 - paddsw xmm0, xmm6 - pmaxsw xmm2, xmm4 - pminsw xmm4, xmm1 - paddsw xmm0, xmm4 - paddsw xmm0, xmm2 +SECTION_RODATA +pw_64: times 8 dw 64 - paddsw xmm0, krd - psraw xmm0, 7 - packuswb xmm0, xmm0 +; %define USE_PMULHRSW +; NOTE: pmulhrsw has a latency of 5 cycles. Tests showed a performance loss +; when using this instruction. - add rsi, rdx - add rax, rdx -%if %1 - movd xmm1, [rdi] - pavgb xmm0, xmm1 +SECTION .text +%if ARCH_X86_64 + %define LOCAL_VARS_SIZE 16*4 +%else + %define LOCAL_VARS_SIZE 16*6 %endif - movd [rdi], xmm0 -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;out_pitch +%macro SETUP_LOCAL_VARS 0 + ; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 + + ; pmaddubsw has a higher latency on some platforms, this might be eased by + ; interleaving the instructions. + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + packsswb m4, m4 + ; TODO(slavarnway): multiple pshufb instructions had a higher latency on + ; some platforms. + pshuflw m0, m4, 0b ;k0_k1 + pshuflw m1, m4, 01010101b ;k2_k3 + pshuflw m2, m4, 10101010b ;k4_k5 + pshuflw m3, m4, 11111111b ;k6_k7 + punpcklqdq m0, m0 + punpcklqdq m1, m1 + punpcklqdq m2, m2 + punpcklqdq m3, m3 + mova k0k1, m0 + mova k2k3, m1 + mova k4k5, m2 + mova k6k7, m3 +%if ARCH_X86_64 + %define krd m12 + %define tmp m13 + mova krd, [GLOBAL(pw_64)] +%else + %define tmp [rsp + 16*4] + %define krd [rsp + 16*5] +%if CONFIG_PIC=0 + mova m6, [GLOBAL(pw_64)] %else - add rdi, r8 + ; build constants without accessing global memory + pcmpeqb m6, m6 ;all ones + psrlw m6, 15 + psllw m6, 6 ;aka pw_64 %endif - dec rcx - jnz .loop -%endm - -%macro VERTx8 1 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm4, [rdx] ;load filters - movq xmm5, rcx - packsswb xmm4, xmm4 - pshuflw xmm0, xmm4, 0b ;k0_k1 - pshuflw xmm1, xmm4, 01010101b ;k2_k3 - pshuflw xmm2, xmm4, 10101010b ;k4_k5 - pshuflw xmm3, xmm4, 11111111b ;k6_k7 - - punpcklqdq xmm0, xmm0 - punpcklqdq xmm1, xmm1 - punpcklqdq xmm2, xmm2 - punpcklqdq xmm3, xmm3 - - movdqa k0k1, xmm0 - movdqa k2k3, xmm1 - pshufd xmm5, xmm5, 0 - movdqa k4k5, xmm2 - movdqa k6k7, xmm3 - movdqa krd, xmm5 - - movsxd rdx, DWORD PTR arg(1) ;pixels_per_line - -%if ABI_IS_32BIT=0 - movsxd r8, DWORD PTR arg(3) ;out_pitch + mova krd, m6 %endif - mov rax, rsi - movsxd rcx, DWORD PTR arg(4) ;output_height - add rax, rdx - - lea rbx, [rdx + rdx*4] - add rbx, rdx ;pitch * 6 - -.loop: - movq xmm0, [rsi] ;A - movq xmm1, [rsi + rdx] ;B - movq xmm2, [rsi + rdx * 2] ;C - movq xmm3, [rax + rdx * 2] ;D - movq xmm4, [rsi + rdx * 4] ;E - movq xmm5, [rax + rdx * 4] ;F - - punpcklbw xmm0, xmm1 ;A B - punpcklbw xmm2, xmm3 ;C D - punpcklbw xmm4, xmm5 ;E F - - movq xmm6, [rsi + rbx] ;G - movq xmm7, [rax + rbx] ;H - - pmaddubsw xmm0, k0k1 - pmaddubsw xmm2, k2k3 - punpcklbw xmm6, xmm7 ;G H - pmaddubsw xmm4, k4k5 - pmaddubsw xmm6, k6k7 - - paddsw xmm0, xmm6 - movdqa xmm1, xmm2 - pmaxsw xmm2, xmm4 - pminsw xmm4, xmm1 - paddsw xmm0, xmm4 - paddsw xmm0, xmm2 - - paddsw xmm0, krd - psraw xmm0, 7 - packuswb xmm0, xmm0 +%endm - add rsi, rdx - add rax, rdx -%if %1 - movq xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movq [rdi], xmm0 +%macro HORIZx4_ROW 2 + mova %2, %1 + punpcklbw %1, %1 + punpckhbw %2, %2 + + mova m3, %2 + palignr %2, %1, 1 + palignr m3, %1, 5 + + pmaddubsw %2, k0k1k4k5 + pmaddubsw m3, k2k3k6k7 + + mova m4, %2 + mova m5, m3 + psrldq %2, 8 + psrldq m3, 8 + mova m6, m5 + + paddsw m4, m3 + pmaxsw m5, %2 + pminsw %2, m6 + paddsw %2, m4 + paddsw %2, m5 + paddsw %2, krd + psraw %2, 7 + packuswb %2, %2 +%endm -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;out_pitch +;------------------------------------------------------------------------------- +%macro SUBPIX_HFILTER4 1 +cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ + src, sstride, dst, dstride, height, filter + mova m4, [filterq] + packsswb m4, m4 +%if ARCH_X86_64 + %define k0k1k4k5 m8 + %define k2k3k6k7 m9 + %define krd m10 + %define orig_height r7d + mova krd, [GLOBAL(pw_64)] + pshuflw k0k1k4k5, m4, 0b ;k0_k1 + pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5 + pshuflw k2k3k6k7, m4, 01010101b ;k2_k3 + pshufhw k2k3k6k7, k2k3k6k7, 11111111b ;k2_k3_k6_k7 %else - add rdi, r8 + %define k0k1k4k5 [rsp + 16*0] + %define k2k3k6k7 [rsp + 16*1] + %define krd [rsp + 16*2] + %define orig_height [rsp + 16*3] + pshuflw m6, m4, 0b ;k0_k1 + pshufhw m6, m6, 10101010b ;k0_k1_k4_k5 + pshuflw m7, m4, 01010101b ;k2_k3 + pshufhw m7, m7, 11111111b ;k2_k3_k6_k7 +%if CONFIG_PIC=0 + mova m1, [GLOBAL(pw_64)] +%else + ; build constants without accessing global memory + pcmpeqb m1, m1 ;all ones + psrlw m1, 15 + psllw m1, 6 ;aka pw_64 %endif - dec rcx - jnz .loop -%endm - - -%macro VERTx16 1 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm4, [rdx] ;load filters - movq xmm5, rcx - packsswb xmm4, xmm4 - pshuflw xmm0, xmm4, 0b ;k0_k1 - pshuflw xmm1, xmm4, 01010101b ;k2_k3 - pshuflw xmm2, xmm4, 10101010b ;k4_k5 - pshuflw xmm3, xmm4, 11111111b ;k6_k7 - - punpcklqdq xmm0, xmm0 - punpcklqdq xmm1, xmm1 - punpcklqdq xmm2, xmm2 - punpcklqdq xmm3, xmm3 - - movdqa k0k1, xmm0 - movdqa k2k3, xmm1 - pshufd xmm5, xmm5, 0 - movdqa k4k5, xmm2 - movdqa k6k7, xmm3 - movdqa krd, xmm5 - - movsxd rdx, DWORD PTR arg(1) ;pixels_per_line - -%if ABI_IS_32BIT=0 - movsxd r8, DWORD PTR arg(3) ;out_pitch + mova k0k1k4k5, m6 + mova k2k3k6k7, m7 + mova krd, m1 %endif - mov rax, rsi - movsxd rcx, DWORD PTR arg(4) ;output_height - add rax, rdx - - lea rbx, [rdx + rdx*4] - add rbx, rdx ;pitch * 6 - + mov orig_height, heightd + shr heightd, 1 .loop: - movq xmm0, [rsi] ;A - movq xmm1, [rsi + rdx] ;B - movq xmm2, [rsi + rdx * 2] ;C - movq xmm3, [rax + rdx * 2] ;D - movq xmm4, [rsi + rdx * 4] ;E - movq xmm5, [rax + rdx * 4] ;F - - punpcklbw xmm0, xmm1 ;A B - punpcklbw xmm2, xmm3 ;C D - punpcklbw xmm4, xmm5 ;E F - - movq xmm6, [rsi + rbx] ;G - movq xmm7, [rax + rbx] ;H - - pmaddubsw xmm0, k0k1 - pmaddubsw xmm2, k2k3 - punpcklbw xmm6, xmm7 ;G H - pmaddubsw xmm4, k4k5 - pmaddubsw xmm6, k6k7 - - paddsw xmm0, xmm6 - movdqa xmm1, xmm2 - pmaxsw xmm2, xmm4 - pminsw xmm4, xmm1 - paddsw xmm0, xmm4 - paddsw xmm0, xmm2 - - paddsw xmm0, krd - psraw xmm0, 7 - packuswb xmm0, xmm0 -%if %1 - movq xmm1, [rdi] - pavgb xmm0, xmm1 + ;Do two rows at once + movh m0, [srcq - 3] + movh m1, [srcq + 5] + punpcklqdq m0, m1 + mova m1, m0 + movh m2, [srcq + sstrideq - 3] + movh m3, [srcq + sstrideq + 5] + punpcklqdq m2, m3 + mova m3, m2 + punpcklbw m0, m0 + punpckhbw m1, m1 + punpcklbw m2, m2 + punpckhbw m3, m3 + mova m4, m1 + palignr m4, m0, 1 + pmaddubsw m4, k0k1k4k5 + palignr m1, m0, 5 + pmaddubsw m1, k2k3k6k7 + mova m7, m3 + palignr m7, m2, 1 + pmaddubsw m7, k0k1k4k5 + palignr m3, m2, 5 + pmaddubsw m3, k2k3k6k7 + mova m0, m4 + mova m5, m1 + mova m2, m7 + psrldq m4, 8 + psrldq m1, 8 + mova m6, m5 + paddsw m0, m1 + mova m1, m3 + psrldq m7, 8 + psrldq m3, 8 + paddsw m2, m3 + mova m3, m1 + pmaxsw m5, m4 + pminsw m4, m6 + paddsw m4, m0 + paddsw m4, m5 + pmaxsw m1, m7 + pminsw m7, m3 + paddsw m7, m2 + paddsw m7, m1 + + paddsw m4, krd + psraw m4, 7 + packuswb m4, m4 + paddsw m7, krd + psraw m7, 7 + packuswb m7, m7 + +%ifidn %1, h8_avg + movd m0, [dstq] + pavgb m4, m0 + movd m2, [dstq + dstrideq] + pavgb m7, m2 %endif - movq [rdi], xmm0 - - movq xmm0, [rsi + 8] ;A - movq xmm1, [rsi + rdx + 8] ;B - movq xmm2, [rsi + rdx * 2 + 8] ;C - movq xmm3, [rax + rdx * 2 + 8] ;D - movq xmm4, [rsi + rdx * 4 + 8] ;E - movq xmm5, [rax + rdx * 4 + 8] ;F - - punpcklbw xmm0, xmm1 ;A B - punpcklbw xmm2, xmm3 ;C D - punpcklbw xmm4, xmm5 ;E F + movd [dstq], m4 + movd [dstq + dstrideq], m7 - movq xmm6, [rsi + rbx + 8] ;G - movq xmm7, [rax + rbx + 8] ;H - punpcklbw xmm6, xmm7 ;G H + lea srcq, [srcq + sstrideq ] + prefetcht0 [srcq + 4 * sstrideq - 3] + lea srcq, [srcq + sstrideq ] + lea dstq, [dstq + 2 * dstrideq ] + prefetcht0 [srcq + 2 * sstrideq - 3] - pmaddubsw xmm0, k0k1 - pmaddubsw xmm2, k2k3 - pmaddubsw xmm4, k4k5 - pmaddubsw xmm6, k6k7 + dec heightd + jnz .loop - paddsw xmm0, xmm6 - movdqa xmm1, xmm2 - pmaxsw xmm2, xmm4 - pminsw xmm4, xmm1 - paddsw xmm0, xmm4 - paddsw xmm0, xmm2 - - paddsw xmm0, krd - psraw xmm0, 7 - packuswb xmm0, xmm0 - - add rsi, rdx - add rax, rdx -%if %1 - movq xmm1, [rdi+8] - pavgb xmm0, xmm1 -%endif - - movq [rdi+8], xmm0 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;out_pitch -%else - add rdi, r8 + ; Do last row if output_height is odd + mov heightd, orig_height + and heightd, 1 + je .done + + movh m0, [srcq - 3] ; load src + movh m1, [srcq + 5] + punpcklqdq m0, m1 + + HORIZx4_ROW m0, m1 +%ifidn %1, h8_avg + movd m0, [dstq] + pavgb m1, m0 %endif - dec rcx - jnz .loop + movd [dstq], m1 +.done + RET %endm -;void vpx_filter_block1d8_v8_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d4_v8_ssse3) PRIVATE -sym(vpx_filter_block1d4_v8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] - - VERTx4 0 - - add rsp, 16*5 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d8_v8_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d8_v8_ssse3) PRIVATE -sym(vpx_filter_block1d8_v8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] - - VERTx8 0 - - add rsp, 16*5 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d16_v8_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d16_v8_ssse3) PRIVATE -sym(vpx_filter_block1d16_v8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] - - VERTx16 0 - - add rsp, 16*5 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - -global sym(vpx_filter_block1d4_v8_avg_ssse3) PRIVATE -sym(vpx_filter_block1d4_v8_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] - - VERTx4 1 - - add rsp, 16*5 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_v8_avg_ssse3) PRIVATE -sym(vpx_filter_block1d8_v8_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] - - VERTx8 1 - - add rsp, 16*5 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_v8_avg_ssse3) PRIVATE -sym(vpx_filter_block1d16_v8_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] - - VERTx16 1 - - add rsp, 16*5 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%macro HORIZx4_ROW 2 - movdqa %2, %1 - pshufb %1, [GLOBAL(shuf_t0t1)] - pshufb %2, [GLOBAL(shuf_t2t3)] - pmaddubsw %1, k0k1k4k5 - pmaddubsw %2, k2k3k6k7 - - movdqa xmm4, %1 - movdqa xmm5, %2 - psrldq %1, 8 - psrldq %2, 8 - movdqa xmm6, xmm5 - - paddsw xmm4, %2 - pmaxsw xmm5, %1 - pminsw %1, xmm6 - paddsw %1, xmm4 - paddsw %1, xmm5 - - paddsw %1, krd - psraw %1, 7 - packuswb %1, %1 +%macro HORIZx8_ROW 5 + mova %2, %1 + punpcklbw %1, %1 + punpckhbw %2, %2 + + mova %3, %2 + mova %4, %2 + mova %5, %2 + + palignr %2, %1, 1 + palignr %3, %1, 5 + palignr %4, %1, 9 + palignr %5, %1, 13 + + pmaddubsw %2, k0k1 + pmaddubsw %3, k2k3 + pmaddubsw %4, k4k5 + pmaddubsw %5, k6k7 + + paddsw %2, %5 + mova %1, %3 + pminsw %3, %4 + pmaxsw %1, %4 + paddsw %2, %3 + paddsw %1, %2 + paddsw %1, krd + psraw %1, 7 + packuswb %1, %1 %endm -%macro HORIZx4 1 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm4, [rdx] ;load filters - movq xmm5, rcx - packsswb xmm4, xmm4 - pshuflw xmm6, xmm4, 0b ;k0_k1 - pshufhw xmm6, xmm6, 10101010b ;k0_k1_k4_k5 - pshuflw xmm7, xmm4, 01010101b ;k2_k3 - pshufhw xmm7, xmm7, 11111111b ;k2_k3_k6_k7 - pshufd xmm5, xmm5, 0 ;rounding - - movdqa k0k1k4k5, xmm6 - movdqa k2k3k6k7, xmm7 - movdqa krd, xmm5 +;------------------------------------------------------------------------------- +%macro SUBPIX_HFILTER8 1 +cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \ + src, sstride, dst, dstride, height, filter + mova m4, [filterq] + SETUP_LOCAL_VARS +%if ARCH_X86_64 + %define orig_height r7d +%else + %define orig_height heightmp +%endif + mov orig_height, heightd + shr heightd, 1 - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rdx, dword ptr arg(3) ;output_pitch - movsxd rcx, dword ptr arg(4) ;output_height - shr rcx, 1 .loop: - ;Do two rows once - movq xmm0, [rsi - 3] ;load src - movq xmm1, [rsi + 5] - movq xmm2, [rsi + rax - 3] - movq xmm3, [rsi + rax + 5] - punpcklqdq xmm0, xmm1 - punpcklqdq xmm2, xmm3 - - HORIZx4_ROW xmm0, xmm1 - HORIZx4_ROW xmm2, xmm3 -%if %1 - movd xmm1, [rdi] - pavgb xmm0, xmm1 - movd xmm3, [rdi + rdx] - pavgb xmm2, xmm3 + movh m0, [srcq - 3] + movh m3, [srcq + 5] + movh m4, [srcq + sstrideq - 3] + movh m7, [srcq + sstrideq + 5] + punpcklqdq m0, m3 + mova m1, m0 + punpcklbw m0, m0 + punpckhbw m1, m1 + mova m5, m1 + palignr m5, m0, 13 + pmaddubsw m5, k6k7 + mova m2, m1 + mova m3, m1 + palignr m1, m0, 1 + pmaddubsw m1, k0k1 + punpcklqdq m4, m7 + mova m6, m4 + punpcklbw m4, m4 + palignr m2, m0, 5 + punpckhbw m6, m6 + palignr m3, m0, 9 + mova m7, m6 + pmaddubsw m2, k2k3 + pmaddubsw m3, k4k5 + + palignr m7, m4, 13 + paddsw m1, m5 + mova m5, m6 + mova m0, m2 + palignr m5, m4, 5 + pminsw m2, m3 + pmaddubsw m7, k6k7 + pmaxsw m3, m0 + paddsw m1, m2 + mova m0, m6 + palignr m6, m4, 1 + pmaddubsw m5, k2k3 + paddsw m1, m3 + pmaddubsw m6, k0k1 + palignr m0, m4, 9 + paddsw m1, krd + pmaddubsw m0, k4k5 + mova m4, m5 + psraw m1, 7 + pminsw m5, m0 + paddsw m6, m7 + packuswb m1, m1 + + paddsw m6, m5 + pmaxsw m0, m4 + paddsw m6, m0 + paddsw m6, krd + psraw m6, 7 + packuswb m6, m6 + +%ifidn %1, h8_avg + movh m0, [dstq] + movh m2, [dstq + dstrideq] + pavgb m1, m0 + pavgb m6, m2 %endif - movd [rdi], xmm0 - movd [rdi +rdx], xmm2 + movh [dstq], m1 + movh [dstq + dstrideq], m6 - lea rsi, [rsi + rax] - prefetcht0 [rsi + 4 * rax - 3] - lea rsi, [rsi + rax] - lea rdi, [rdi + 2 * rdx] - prefetcht0 [rsi + 2 * rax - 3] + lea srcq, [srcq + sstrideq ] + prefetcht0 [srcq + 4 * sstrideq - 3] + lea srcq, [srcq + sstrideq ] + lea dstq, [dstq + 2 * dstrideq ] + prefetcht0 [srcq + 2 * sstrideq - 3] + dec heightd + jnz .loop - dec rcx - jnz .loop + ;Do last row if output_height is odd + mov heightd, orig_height + and heightd, 1 + je .done - ; Do last row if output_height is odd - movsxd rcx, dword ptr arg(4) ;output_height - and rcx, 1 - je .done + movh m0, [srcq - 3] + movh m3, [srcq + 5] + punpcklqdq m0, m3 - movq xmm0, [rsi - 3] ; load src - movq xmm1, [rsi + 5] - punpcklqdq xmm0, xmm1 + HORIZx8_ROW m0, m1, m2, m3, m4 - HORIZx4_ROW xmm0, xmm1 -%if %1 - movd xmm1, [rdi] - pavgb xmm0, xmm1 +%ifidn %1, h8_avg + movh m1, [dstq] + pavgb m0, m1 %endif - movd [rdi], xmm0 -.done + movh [dstq], m0 +.done: + RET %endm -%macro HORIZx8_ROW 4 - movdqa %2, %1 - movdqa %3, %1 - movdqa %4, %1 - - pshufb %1, [GLOBAL(shuf_t0t1)] - pshufb %2, [GLOBAL(shuf_t2t3)] - pshufb %3, [GLOBAL(shuf_t4t5)] - pshufb %4, [GLOBAL(shuf_t6t7)] - - pmaddubsw %1, k0k1 - pmaddubsw %2, k2k3 - pmaddubsw %3, k4k5 - pmaddubsw %4, k6k7 - - paddsw %1, %4 - movdqa %4, %2 - pmaxsw %2, %3 - pminsw %3, %4 - paddsw %1, %3 - paddsw %1, %2 - - paddsw %1, krd - psraw %1, 7 - packuswb %1, %1 +;------------------------------------------------------------------------------- +%macro SUBPIX_HFILTER16 1 +cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \ + src, sstride, dst, dstride, height, filter + mova m4, [filterq] + SETUP_LOCAL_VARS +.loop: + prefetcht0 [srcq + 2 * sstrideq -3] + + movh m0, [srcq - 3] + movh m4, [srcq + 5] + movh m6, [srcq + 13] + punpcklqdq m0, m4 + mova m7, m0 + punpckhbw m0, m0 + mova m1, m0 + punpcklqdq m4, m6 + mova m3, m0 + punpcklbw m7, m7 + + palignr m3, m7, 13 + mova m2, m0 + pmaddubsw m3, k6k7 + palignr m0, m7, 1 + pmaddubsw m0, k0k1 + palignr m1, m7, 5 + pmaddubsw m1, k2k3 + palignr m2, m7, 9 + pmaddubsw m2, k4k5 + paddsw m0, m3 + mova m3, m4 + punpckhbw m4, m4 + mova m5, m4 + punpcklbw m3, m3 + mova m7, m4 + palignr m5, m3, 5 + mova m6, m4 + palignr m4, m3, 1 + pmaddubsw m4, k0k1 + pmaddubsw m5, k2k3 + palignr m6, m3, 9 + pmaddubsw m6, k4k5 + palignr m7, m3, 13 + pmaddubsw m7, k6k7 + + mova m3, m1 + pmaxsw m1, m2 + pminsw m2, m3 + paddsw m0, m2 + paddsw m0, m1 + paddsw m4, m7 + mova m7, m5 + pmaxsw m5, m6 + pminsw m6, m7 + paddsw m4, m6 + paddsw m4, m5 + paddsw m0, krd + paddsw m4, krd + psraw m0, 7 + psraw m4, 7 + packuswb m0, m4 +%ifidn %1, h8_avg + mova m1, [dstq] + pavgb m0, m1 +%endif + lea srcq, [srcq + sstrideq] + mova [dstq], m0 + lea dstq, [dstq + dstrideq] + dec heightd + jnz .loop + RET %endm -%macro HORIZx8 1 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm4, [rdx] ;load filters - movq xmm5, rcx - packsswb xmm4, xmm4 - pshuflw xmm0, xmm4, 0b ;k0_k1 - pshuflw xmm1, xmm4, 01010101b ;k2_k3 - pshuflw xmm2, xmm4, 10101010b ;k4_k5 - pshuflw xmm3, xmm4, 11111111b ;k6_k7 - - punpcklqdq xmm0, xmm0 - punpcklqdq xmm1, xmm1 - punpcklqdq xmm2, xmm2 - punpcklqdq xmm3, xmm3 - - movdqa k0k1, xmm0 - movdqa k2k3, xmm1 - pshufd xmm5, xmm5, 0 - movdqa k4k5, xmm2 - movdqa k6k7, xmm3 - movdqa krd, xmm5 - - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rdx, dword ptr arg(3) ;output_pitch - movsxd rcx, dword ptr arg(4) ;output_height - shr rcx, 1 +INIT_XMM ssse3 +SUBPIX_HFILTER16 h8 +SUBPIX_HFILTER16 h8_avg +SUBPIX_HFILTER8 h8 +SUBPIX_HFILTER8 h8_avg +SUBPIX_HFILTER4 h8 +SUBPIX_HFILTER4 h8_avg + +;------------------------------------------------------------------------------- +%macro SUBPIX_VFILTER 2 +cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ + src, sstride, dst, dstride, height, filter + mova m4, [filterq] + SETUP_LOCAL_VARS +%if ARCH_X86_64 + %define src1q r7 + %define sstride6q r8 + %define dst_stride dstrideq +%else + %define src1q filterq + %define sstride6q dstrideq + %define dst_stride dstridemp +%endif + mov src1q, srcq + add src1q, sstrideq + lea sstride6q, [sstrideq + sstrideq * 4] + add sstride6q, sstrideq ;pitch * 6 +%ifidn %2, 8 + %define movx movh +%else + %define movx movd +%endif .loop: - movq xmm0, [rsi - 3] ;load src - movq xmm3, [rsi + 5] - movq xmm4, [rsi + rax - 3] - movq xmm7, [rsi + rax + 5] - punpcklqdq xmm0, xmm3 - punpcklqdq xmm4, xmm7 - - HORIZx8_ROW xmm0, xmm1, xmm2, xmm3 - HORIZx8_ROW xmm4, xmm5, xmm6, xmm7 -%if %1 - movq xmm1, [rdi] - movq xmm2, [rdi + rdx] - pavgb xmm0, xmm1 - pavgb xmm4, xmm2 + movx m0, [srcq ] ;A + movx m1, [srcq + sstrideq ] ;B + punpcklbw m0, m1 ;A B + movx m2, [srcq + sstrideq * 2 ] ;C + pmaddubsw m0, k0k1 + mova m6, m2 + movx m3, [src1q + sstrideq * 2] ;D + punpcklbw m2, m3 ;C D + pmaddubsw m2, k2k3 + movx m4, [srcq + sstrideq * 4 ] ;E + mova m7, m4 + movx m5, [src1q + sstrideq * 4] ;F + punpcklbw m4, m5 ;E F + pmaddubsw m4, k4k5 + punpcklbw m1, m6 ;A B next iter + movx m6, [srcq + sstride6q ] ;G + punpcklbw m5, m6 ;E F next iter + punpcklbw m3, m7 ;C D next iter + pmaddubsw m5, k4k5 + movx m7, [src1q + sstride6q ] ;H + punpcklbw m6, m7 ;G H + pmaddubsw m6, k6k7 + mova tmp, m2 + pmaddubsw m3, k2k3 + pmaddubsw m1, k0k1 + pmaxsw m2, m4 + paddsw m0, m6 + movx m6, [srcq + sstrideq * 8 ] ;H next iter + punpcklbw m7, m6 + pmaddubsw m7, k6k7 + pminsw m4, tmp + paddsw m0, m4 + mova m4, m3 + paddsw m0, m2 + pminsw m3, m5 + pmaxsw m5, m4 + paddsw m0, krd + psraw m0, 7 + paddsw m1, m7 + packuswb m0, m0 + + paddsw m1, m3 + paddsw m1, m5 + paddsw m1, krd + psraw m1, 7 + lea srcq, [srcq + sstrideq * 2 ] + lea src1q, [src1q + sstrideq * 2] + packuswb m1, m1 + +%ifidn %1, v8_avg + movx m2, [dstq] + pavgb m0, m2 %endif - movq [rdi], xmm0 - movq [rdi + rdx], xmm4 - - lea rsi, [rsi + rax] - prefetcht0 [rsi + 4 * rax - 3] - lea rsi, [rsi + rax] - lea rdi, [rdi + 2 * rdx] - prefetcht0 [rsi + 2 * rax - 3] - dec rcx - jnz .loop - - ;Do last row if output_height is odd - movsxd rcx, dword ptr arg(4) ;output_height - and rcx, 1 - je .done - - movq xmm0, [rsi - 3] - movq xmm3, [rsi + 5] - punpcklqdq xmm0, xmm3 - - HORIZx8_ROW xmm0, xmm1, xmm2, xmm3 -%if %1 - movq xmm1, [rdi] - pavgb xmm0, xmm1 + movx [dstq], m0 + add dstq, dst_stride +%ifidn %1, v8_avg + movx m3, [dstq] + pavgb m1, m3 %endif - movq [rdi], xmm0 -.done + movx [dstq], m1 + add dstq, dst_stride + sub heightd, 2 + cmp heightd, 1 + jg .loop + + cmp heightd, 0 + je .done + + movx m0, [srcq ] ;A + movx m1, [srcq + sstrideq ] ;B + movx m6, [srcq + sstride6q ] ;G + punpcklbw m0, m1 ;A B + movx m7, [rax + sstride6q ] ;H + pmaddubsw m0, k0k1 + movx m2, [srcq + sstrideq * 2 ] ;C + punpcklbw m6, m7 ;G H + movx m3, [rax + sstrideq * 2 ] ;D + pmaddubsw m6, k6k7 + movx m4, [srcq + sstrideq * 4 ] ;E + punpcklbw m2, m3 ;C D + movx m5, [src1q + sstrideq * 4] ;F + punpcklbw m4, m5 ;E F + pmaddubsw m2, k2k3 + pmaddubsw m4, k4k5 + paddsw m0, m6 + mova m1, m2 + pmaxsw m2, m4 + pminsw m4, m1 + paddsw m0, m4 + paddsw m0, m2 + paddsw m0, krd + psraw m0, 7 + packuswb m0, m0 +%ifidn %1, v8_avg + movx m1, [dstq] + pavgb m0, m1 +%endif + movx [dstq], m0 +.done: + RET %endm -%macro HORIZx16 1 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm4, [rdx] ;load filters - movq xmm5, rcx - packsswb xmm4, xmm4 - pshuflw xmm0, xmm4, 0b ;k0_k1 - pshuflw xmm1, xmm4, 01010101b ;k2_k3 - pshuflw xmm2, xmm4, 10101010b ;k4_k5 - pshuflw xmm3, xmm4, 11111111b ;k6_k7 - - punpcklqdq xmm0, xmm0 - punpcklqdq xmm1, xmm1 - punpcklqdq xmm2, xmm2 - punpcklqdq xmm3, xmm3 - - movdqa k0k1, xmm0 - movdqa k2k3, xmm1 - pshufd xmm5, xmm5, 0 - movdqa k4k5, xmm2 - movdqa k6k7, xmm3 - movdqa krd, xmm5 - - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rdx, dword ptr arg(3) ;output_pitch - movsxd rcx, dword ptr arg(4) ;output_height +;------------------------------------------------------------------------------- +%macro SUBPIX_VFILTER16 1 +cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ + src, sstride, dst, dstride, height, filter + + mova m4, [filterq] + SETUP_LOCAL_VARS +%if ARCH_X86_64 + %define src1q r7 + %define sstride6q r8 + %define dst_stride dstrideq +%else + %define src1q filterq + %define sstride6q dstrideq + %define dst_stride dstridemp +%endif + mov src1q, srcq + add src1q, sstrideq + lea sstride6q, [sstrideq + sstrideq * 4] + add sstride6q, sstrideq ;pitch * 6 .loop: - prefetcht0 [rsi + 2 * rax -3] - - movq xmm0, [rsi - 3] ;load src data - movq xmm4, [rsi + 5] - movq xmm6, [rsi + 13] - punpcklqdq xmm0, xmm4 - punpcklqdq xmm4, xmm6 - - movdqa xmm7, xmm0 - - punpcklbw xmm7, xmm7 - punpckhbw xmm0, xmm0 - movdqa xmm1, xmm0 - movdqa xmm2, xmm0 - movdqa xmm3, xmm0 - - palignr xmm0, xmm7, 1 - palignr xmm1, xmm7, 5 - pmaddubsw xmm0, k0k1 - palignr xmm2, xmm7, 9 - pmaddubsw xmm1, k2k3 - palignr xmm3, xmm7, 13 - - pmaddubsw xmm2, k4k5 - pmaddubsw xmm3, k6k7 - paddsw xmm0, xmm3 - - movdqa xmm3, xmm4 - punpcklbw xmm3, xmm3 - punpckhbw xmm4, xmm4 - - movdqa xmm5, xmm4 - movdqa xmm6, xmm4 - movdqa xmm7, xmm4 - - palignr xmm4, xmm3, 1 - palignr xmm5, xmm3, 5 - palignr xmm6, xmm3, 9 - palignr xmm7, xmm3, 13 - - movdqa xmm3, xmm1 - pmaddubsw xmm4, k0k1 - pmaxsw xmm1, xmm2 - pmaddubsw xmm5, k2k3 - pminsw xmm2, xmm3 - pmaddubsw xmm6, k4k5 - paddsw xmm0, xmm2 - pmaddubsw xmm7, k6k7 - paddsw xmm0, xmm1 - - paddsw xmm4, xmm7 - movdqa xmm7, xmm5 - pmaxsw xmm5, xmm6 - pminsw xmm6, xmm7 - paddsw xmm4, xmm6 - paddsw xmm4, xmm5 - - paddsw xmm0, krd - paddsw xmm4, krd - psraw xmm0, 7 - psraw xmm4, 7 - packuswb xmm0, xmm0 - packuswb xmm4, xmm4 - punpcklqdq xmm0, xmm4 -%if %1 - movdqa xmm1, [rdi] - pavgb xmm0, xmm1 + movh m0, [srcq ] ;A + movh m1, [srcq + sstrideq ] ;B + movh m2, [srcq + sstrideq * 2 ] ;C + movh m3, [src1q + sstrideq * 2] ;D + movh m4, [srcq + sstrideq * 4 ] ;E + movh m5, [src1q + sstrideq * 4] ;F + + punpcklbw m0, m1 ;A B + movh m6, [srcq + sstride6q] ;G + punpcklbw m2, m3 ;C D + movh m7, [src1q + sstride6q] ;H + punpcklbw m4, m5 ;E F + pmaddubsw m0, k0k1 + movh m3, [srcq + 8] ;A + pmaddubsw m2, k2k3 + punpcklbw m6, m7 ;G H + movh m5, [srcq + sstrideq + 8] ;B + pmaddubsw m4, k4k5 + punpcklbw m3, m5 ;A B + movh m7, [srcq + sstrideq * 2 + 8] ;C + pmaddubsw m6, k6k7 + mova m1, m2 + movh m5, [src1q + sstrideq * 2 + 8] ;D + pmaxsw m2, m4 + punpcklbw m7, m5 ;C D + pminsw m4, m1 + paddsw m0, m6 + pmaddubsw m3, k0k1 + movh m1, [srcq + sstrideq * 4 + 8] ;E + paddsw m0, m4 + pmaddubsw m7, k2k3 + movh m6, [src1q + sstrideq * 4 + 8] ;F + punpcklbw m1, m6 ;E F + paddsw m0, m2 + paddsw m0, krd + movh m2, [srcq + sstride6q + 8] ;G + pmaddubsw m1, k4k5 + movh m5, [src1q + sstride6q + 8] ;H + psraw m0, 7 + punpcklbw m2, m5 ;G H + packuswb m0, m0 + pmaddubsw m2, k6k7 +%ifidn %1, v8_avg + movh m4, [dstq] + pavgb m0, m4 %endif - - lea rsi, [rsi + rax] - movdqa [rdi], xmm0 - - lea rdi, [rdi + rdx] - dec rcx - jnz .loop + movh [dstq], m0 + mova m6, m7 + pmaxsw m7, m1 + pminsw m1, m6 + paddsw m3, m2 + paddsw m3, m1 + paddsw m3, m7 + paddsw m3, krd + psraw m3, 7 + packuswb m3, m3 + + add srcq, sstrideq + add src1q, sstrideq +%ifidn %1, v8_avg + movh m1, [dstq + 8] + pavgb m3, m1 +%endif + movh [dstq + 8], m3 + add dstq, dst_stride + dec heightd + jnz .loop + RET %endm -;void vpx_filter_block1d4_h8_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d4_h8_ssse3) PRIVATE -sym(vpx_filter_block1d4_h8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 3 - %define k0k1k4k5 [rsp + 16 * 0] - %define k2k3k6k7 [rsp + 16 * 1] - %define krd [rsp + 16 * 2] - - HORIZx4 0 - - add rsp, 16 * 3 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d8_h8_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d8_h8_ssse3) PRIVATE -sym(vpx_filter_block1d8_h8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] - - HORIZx8 0 - - add rsp, 16*5 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d16_h8_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d16_h8_ssse3) PRIVATE -sym(vpx_filter_block1d16_h8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] - - HORIZx16 0 - - add rsp, 16*5 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_h8_avg_ssse3) PRIVATE -sym(vpx_filter_block1d4_h8_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 3 - %define k0k1k4k5 [rsp + 16 * 0] - %define k2k3k6k7 [rsp + 16 * 1] - %define krd [rsp + 16 * 2] - - HORIZx4 1 - - add rsp, 16 * 3 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_h8_avg_ssse3) PRIVATE -sym(vpx_filter_block1d8_h8_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] - - HORIZx8 1 - - add rsp, 16*5 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_h8_avg_ssse3) PRIVATE -sym(vpx_filter_block1d16_h8_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16*5 - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - %define krd [rsp + 16*4] - - HORIZx16 1 - - add rsp, 16*5 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -SECTION_RODATA -align 16 -shuf_t0t1: - db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 -align 16 -shuf_t2t3: - db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 -align 16 -shuf_t4t5: - db 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 -align 16 -shuf_t6t7: - db 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 +INIT_XMM ssse3 +SUBPIX_VFILTER16 v8 +SUBPIX_VFILTER16 v8_avg +SUBPIX_VFILTER v8, 8 +SUBPIX_VFILTER v8_avg, 8 +SUBPIX_VFILTER v8, 4 +SUBPIX_VFILTER v8_avg, 4 diff --git a/libvpx/vpx_mem/vpx_mem.c b/libvpx/vpx_mem/vpx_mem.c index c6f501a6..b98fe83c 100644 --- a/libvpx/vpx_mem/vpx_mem.c +++ b/libvpx/vpx_mem/vpx_mem.c @@ -93,11 +93,10 @@ void vpx_free(void *memblk) { #if CONFIG_VP9_HIGHBITDEPTH void *vpx_memset16(void *dest, int val, size_t length) { - int i; - void *orig = dest; - uint16_t *dest16 = dest; + size_t i; + uint16_t *dest16 = (uint16_t *)dest; for (i = 0; i < length; i++) *dest16++ = val; - return orig; + return dest; } #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/libvpx/vpx_ports/bitops.h b/libvpx/vpx_ports/bitops.h index 0d3223e3..84ff3659 100644 --- a/libvpx/vpx_ports/bitops.h +++ b/libvpx/vpx_ports/bitops.h @@ -11,6 +11,8 @@ #ifndef VPX_PORTS_BITOPS_H_ #define VPX_PORTS_BITOPS_H_ +#include <assert.h> + #include "vpx_ports/msvc.h" #ifdef _MSC_VER @@ -25,10 +27,15 @@ extern "C" { #endif +// These versions of get_msb() are only valid when n != 0 because all +// of the optimized versions are undefined when n == 0: +// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html + // use GNU builtins where available. #if defined(__GNUC__) && \ ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) static INLINE int get_msb(unsigned int n) { + assert(n != 0); return 31 ^ __builtin_clz(n); } #elif defined(USE_MSC_INTRINSICS) @@ -36,6 +43,7 @@ static INLINE int get_msb(unsigned int n) { static INLINE int get_msb(unsigned int n) { unsigned long first_set_bit; + assert(n != 0); _BitScanReverse(&first_set_bit, n); return first_set_bit; } @@ -47,6 +55,8 @@ static INLINE int get_msb(unsigned int n) { unsigned int value = n; int i; + assert(n != 0); + for (i = 4; i >= 0; --i) { const int shift = (1 << i); const unsigned int x = value >> shift; diff --git a/libvpx/vpx_scale/yv12config.h b/libvpx/vpx_scale/yv12config.h index fd5d54ba..37b255d4 100644 --- a/libvpx/vpx_scale/yv12config.h +++ b/libvpx/vpx_scale/yv12config.h @@ -56,6 +56,9 @@ typedef struct yv12_buffer_config { int subsampling_y; unsigned int bit_depth; vpx_color_space_t color_space; + vpx_color_range_t color_range; + int render_width; + int render_height; int corrupted; int flags; diff --git a/libvpx/vpx_util/endian_inl.h b/libvpx/vpx_util/endian_inl.h index 6b177f17..37bdce1c 100644 --- a/libvpx/vpx_util/endian_inl.h +++ b/libvpx/vpx_util/endian_inl.h @@ -25,14 +25,10 @@ # define LOCAL_GCC_PREREQ(maj, min) 0 #endif -#ifdef __clang__ -# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) -# define LOCAL_CLANG_PREREQ(maj, min) \ - (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min))) -#else -# define LOCAL_CLANG_VERSION 0 -# define LOCAL_CLANG_PREREQ(maj, min) 0 -#endif // __clang__ +// handle clang compatibility +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif // some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) #if !defined(WORDS_BIGENDIAN) && \ @@ -53,14 +49,16 @@ #define HToBE32(X) BSwap32(X) #endif -// clang-3.3 and gcc-4.3 have builtin functions for swap32/swap64 -#if LOCAL_GCC_PREREQ(4, 3) || LOCAL_CLANG_PREREQ(3, 3) +#if LOCAL_GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) +#define HAVE_BUILTIN_BSWAP16 +#endif + +#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32) #define HAVE_BUILTIN_BSWAP32 -#define HAVE_BUILTIN_BSWAP64 #endif -// clang-3.3 and gcc-4.8 have a builtin function for swap16 -#if LOCAL_GCC_PREREQ(4, 8) || LOCAL_CLANG_PREREQ(3, 3) -#define HAVE_BUILTIN_BSWAP16 + +#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64) +#define HAVE_BUILTIN_BSWAP64 #endif #if HAVE_MIPS32 && defined(__mips__) && !defined(__mips64) && \ diff --git a/libvpx/vpxdec.c b/libvpx/vpxdec.c index 3c61bd92..285d58e1 100644 --- a/libvpx/vpxdec.c +++ b/libvpx/vpxdec.c @@ -562,7 +562,7 @@ static int main_loop(int argc, const char **argv_) { int opt_i420 = 0; vpx_codec_dec_cfg_t cfg = {0, 0, 0}; #if CONFIG_VP9_HIGHBITDEPTH - int output_bit_depth = 0; + unsigned int output_bit_depth = 0; #endif #if CONFIG_VP8_DECODER vp8_postproc_cfg_t vp8_pp_cfg = {0}; @@ -618,9 +618,6 @@ static int main_loop(int argc, const char **argv_) { use_y4m = 0; flipuv = 1; opt_yv12 = 1; -#if CONFIG_VP9_HIGHBITDEPTH - output_bit_depth = 8; // For yv12 8-bit depth output is assumed -#endif } else if (arg_match(&arg, &use_i420, argi)) { use_y4m = 0; flipuv = 0; @@ -956,22 +953,22 @@ static int main_loop(int argc, const char **argv_) { // these is set to 0, use the display size set in the first frame // header. If that is unavailable, use the raw decoded size of the // first decoded frame. - int display_width = vpx_input_ctx.width; - int display_height = vpx_input_ctx.height; - if (!display_width || !display_height) { - int display_size[2]; + int render_width = vpx_input_ctx.width; + int render_height = vpx_input_ctx.height; + if (!render_width || !render_height) { + int render_size[2]; if (vpx_codec_control(&decoder, VP9D_GET_DISPLAY_SIZE, - display_size)) { + render_size)) { // As last resort use size of first frame as display size. - display_width = img->d_w; - display_height = img->d_h; + render_width = img->d_w; + render_height = img->d_h; } else { - display_width = display_size[0]; - display_height = display_size[1]; + render_width = render_size[0]; + render_height = render_size[1]; } } - scaled_img = vpx_img_alloc(NULL, img->fmt, display_width, - display_height, 16); + scaled_img = vpx_img_alloc(NULL, img->fmt, render_width, + render_height, 16); scaled_img->bit_depth = img->bit_depth; } @@ -990,11 +987,11 @@ static int main_loop(int argc, const char **argv_) { } #if CONFIG_VP9_HIGHBITDEPTH // Default to codec bit depth if output bit depth not set - if (!output_bit_depth) { + if (!output_bit_depth && single_file && !do_md5) { output_bit_depth = img->bit_depth; } // Shift up or down if necessary - if (output_bit_depth != img->bit_depth) { + if (output_bit_depth != 0 && output_bit_depth != img->bit_depth) { const vpx_img_fmt_t shifted_fmt = output_bit_depth == 8 ? img->fmt ^ (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) : img->fmt | VPX_IMG_FMT_HIGHBITDEPTH; diff --git a/libvpx/vpxenc.c b/libvpx/vpxenc.c index 06604ea0..cb78226b 100644 --- a/libvpx/vpxenc.c +++ b/libvpx/vpxenc.c @@ -1996,7 +1996,7 @@ int main(int argc, const char **argv_) { usage_exit(); /* Decide if other chroma subsamplings than 4:2:0 are supported */ - if (global.codec->fourcc == VP9_FOURCC) + if (global.codec->fourcc == VP9_FOURCC || global.codec->fourcc == VP10_FOURCC) input.only_i420 = 0; for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) { diff --git a/libvpx/webmdec.cc b/libvpx/webmdec.cc index 1020d046..f541cfec 100644 --- a/libvpx/webmdec.cc +++ b/libvpx/webmdec.cc @@ -94,7 +94,7 @@ int file_is_webm(struct WebmInputContext *webm_ctx, } } - if (video_track == NULL) { + if (video_track == NULL || video_track->GetCodecId() == NULL) { rewind_and_reset(webm_ctx, vpx_ctx); return 0; } diff --git a/update_libvpx.sh b/update_libvpx.sh index 92e40ebe..4e41bf52 100755 --- a/update_libvpx.sh +++ b/update_libvpx.sh @@ -10,7 +10,7 @@ # Usage: # # $ ./update_libvpx.sh [branch | revision | file or url containing a revision] -# When specifying a branch it must be prefixed with origin/ +# When specifying a branch it may be necessary to prefix with origin/ # Tools required for running this tool: # @@ -113,7 +113,7 @@ rm -rf .git .gitignore .gitattributes # Add and remove files. echo "$add" | xargs -I {} git add {} -echo "$delete" | xargs -I {} git rm {} +echo "$delete" | xargs -I {} git rm --ignore-unmatch {} # Find empty directories and remove them. find . -type d -empty -exec git rm {} \; |
