diff options
Diffstat (limited to 'libavcodec/x86/vp8dsp_init.c')
-rw-r--r-- | libavcodec/x86/vp8dsp_init.c | 179 |
1 files changed, 92 insertions, 87 deletions
diff --git a/libavcodec/x86/vp8dsp_init.c b/libavcodec/x86/vp8dsp_init.c index e7ac9d236c..ff743d62dc 100644 --- a/libavcodec/x86/vp8dsp_init.c +++ b/libavcodec/x86/vp8dsp_init.c @@ -315,44 +315,18 @@ DECLARE_LOOP_FILTER(sse4) c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT -av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c, int vp7) +av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c) { #if HAVE_YASM int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(cpu_flags)) { - if (!vp7) { - c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx; - c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx; - } #if ARCH_X86_32 - if (!vp7) { - c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx; - c->vp8_idct_add = ff_vp8_idct_add_mmx; - c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx; - } c->put_vp8_epel_pixels_tab[0][0][0] = c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx; #endif c->put_vp8_epel_pixels_tab[1][0][0] = c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx; - -#if ARCH_X86_32 - if (!vp7) { - c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; - c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; - - c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; - c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; - c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx; - c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx; - - c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx; - c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx; - c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx; - c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx; - } -#endif } /* note that 4-tap width=16 functions are missing because w=16 @@ -365,29 +339,10 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c, int vp7) VP8_MC_FUNC(1, 8, mmxext); VP8_BILINEAR_MC_FUNC(0, 16, mmxext); VP8_BILINEAR_MC_FUNC(1, 8, mmxext); - - if (!vp7) { - c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; - c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; - - c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; - c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; - c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext; - c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext; - - c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext; - c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext; - c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext; - c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext; - } #endif } if (EXTERNAL_SSE(cpu_flags)) { - if (!vp7) { - c->vp8_idct_add = ff_vp8_idct_add_sse; - c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse; - } c->put_vp8_epel_pixels_tab[0][0][0] = c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; } @@ -397,64 +352,114 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c, int vp7) VP8_MC_FUNC(1, 8, sse2); VP8_BILINEAR_MC_FUNC(0, 16, sse2); VP8_BILINEAR_MC_FUNC(1, 8, sse2); + } - if (!vp7) { - c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; + if (EXTERNAL_SSSE3(cpu_flags)) { + VP8_LUMA_MC_FUNC(0, 16, ssse3); + VP8_MC_FUNC(1, 8, ssse3); + VP8_MC_FUNC(2, 4, ssse3); + VP8_BILINEAR_MC_FUNC(0, 16, ssse3); + VP8_BILINEAR_MC_FUNC(1, 8, ssse3); + VP8_BILINEAR_MC_FUNC(2, 4, ssse3); + } +#endif /* HAVE_YASM */ +} - c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; - c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2; +av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c) +{ +#if HAVE_YASM + int cpu_flags = av_get_cpu_flags(); - c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2; - c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2; - } + if (EXTERNAL_MMX(cpu_flags)) { + c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx; + c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx; +#if ARCH_X86_32 + c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx; + c->vp8_idct_add = ff_vp8_idct_add_mmx; + c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx; + + c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; + + c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; + c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; + c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx; + c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx; + + c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx; + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx; + c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx; +#endif } - if (EXTERNAL_SSE2(cpu_flags)) { - if (!vp7) { - c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2; + /* note that 4-tap width=16 functions are missing because w=16 + * is only used for luma, and luma is always a copy or sixtap. */ + if (EXTERNAL_MMXEXT(cpu_flags)) { +#if ARCH_X86_32 + c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; + + c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; + c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; + c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext; + c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext; + + c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext; + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext; + c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext; +#endif + } - c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; + if (EXTERNAL_SSE(cpu_flags)) { + c->vp8_idct_add = ff_vp8_idct_add_sse; + c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse; + } + + if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) { + c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; - c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; - c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; + c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; + c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2; - c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2; - c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2; - } + c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2; + c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2; } - if (EXTERNAL_SSSE3(cpu_flags)) { - VP8_LUMA_MC_FUNC(0, 16, ssse3); - VP8_MC_FUNC(1, 8, ssse3); - VP8_MC_FUNC(2, 4, ssse3); - VP8_BILINEAR_MC_FUNC(0, 16, ssse3); - VP8_BILINEAR_MC_FUNC(1, 8, ssse3); - VP8_BILINEAR_MC_FUNC(2, 4, ssse3); + if (EXTERNAL_SSE2(cpu_flags)) { + c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2; - if (!vp7) { - c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3; - c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3; + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; - c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3; - c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3; - c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3; - c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3; + c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; + c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; - c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3; - c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3; - c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3; - c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3; - } + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2; + } + + if (EXTERNAL_SSSE3(cpu_flags)) { + c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3; + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3; + + c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3; + c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3; + c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3; + c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3; + + c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3; + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3; + c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3; } if (EXTERNAL_SSE4(cpu_flags)) { - if (!vp7) { - c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4; + c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4; - c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4; - c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4; - c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4; - } + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4; + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4; } #endif /* HAVE_YASM */ } |