aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/vp8dsp_init.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86/vp8dsp_init.c')
-rw-r--r--libavcodec/x86/vp8dsp_init.c179
1 files changed, 92 insertions, 87 deletions
diff --git a/libavcodec/x86/vp8dsp_init.c b/libavcodec/x86/vp8dsp_init.c
index e7ac9d236c..ff743d62dc 100644
--- a/libavcodec/x86/vp8dsp_init.c
+++ b/libavcodec/x86/vp8dsp_init.c
@@ -315,44 +315,18 @@ DECLARE_LOOP_FILTER(sse4)
c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
-av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c, int vp7)
+av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c)
{
#if HAVE_YASM
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMX(cpu_flags)) {
- if (!vp7) {
- c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx;
- c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
- }
#if ARCH_X86_32
- if (!vp7) {
- c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx;
- c->vp8_idct_add = ff_vp8_idct_add_mmx;
- c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx;
- }
c->put_vp8_epel_pixels_tab[0][0][0] =
c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
#endif
c->put_vp8_epel_pixels_tab[1][0][0] =
c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
-
-#if ARCH_X86_32
- if (!vp7) {
- c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
- c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
-
- c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
- c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
- c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
- c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
-
- c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx;
- c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx;
- c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx;
- c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx;
- }
-#endif
}
/* note that 4-tap width=16 functions are missing because w=16
@@ -365,29 +339,10 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c, int vp7)
VP8_MC_FUNC(1, 8, mmxext);
VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
VP8_BILINEAR_MC_FUNC(1, 8, mmxext);
-
- if (!vp7) {
- c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
- c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
-
- c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
- c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
- c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
- c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
-
- c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext;
- c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext;
- c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
- c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
- }
#endif
}
if (EXTERNAL_SSE(cpu_flags)) {
- if (!vp7) {
- c->vp8_idct_add = ff_vp8_idct_add_sse;
- c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse;
- }
c->put_vp8_epel_pixels_tab[0][0][0] =
c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
}
@@ -397,64 +352,114 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c, int vp7)
VP8_MC_FUNC(1, 8, sse2);
VP8_BILINEAR_MC_FUNC(0, 16, sse2);
VP8_BILINEAR_MC_FUNC(1, 8, sse2);
+ }
- if (!vp7) {
- c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
+ if (EXTERNAL_SSSE3(cpu_flags)) {
+ VP8_LUMA_MC_FUNC(0, 16, ssse3);
+ VP8_MC_FUNC(1, 8, ssse3);
+ VP8_MC_FUNC(2, 4, ssse3);
+ VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
+ VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
+ VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
+ }
+#endif /* HAVE_YASM */
+}
- c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
- c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
+av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c)
+{
+#if HAVE_YASM
+ int cpu_flags = av_get_cpu_flags();
- c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2;
- c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2;
- }
+ if (EXTERNAL_MMX(cpu_flags)) {
+ c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx;
+ c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
+#if ARCH_X86_32
+ c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx;
+ c->vp8_idct_add = ff_vp8_idct_add_mmx;
+ c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx;
+
+ c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
+ c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
+
+ c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
+ c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
+ c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
+ c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
+
+ c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx;
+ c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx;
+ c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx;
+ c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx;
+#endif
}
- if (EXTERNAL_SSE2(cpu_flags)) {
- if (!vp7) {
- c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2;
+ /* note that 4-tap width=16 functions are missing because w=16
+ * is only used for luma, and luma is always a copy or sixtap. */
+ if (EXTERNAL_MMXEXT(cpu_flags)) {
+#if ARCH_X86_32
+ c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
+ c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
+
+ c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
+ c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
+ c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
+ c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
+
+ c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext;
+ c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext;
+ c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
+ c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
+#endif
+ }
- c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
+ if (EXTERNAL_SSE(cpu_flags)) {
+ c->vp8_idct_add = ff_vp8_idct_add_sse;
+ c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse;
+ }
+
+ if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
+ c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
- c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
- c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
+ c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
+ c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
- c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2;
- c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2;
- }
+ c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2;
+ c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2;
}
- if (EXTERNAL_SSSE3(cpu_flags)) {
- VP8_LUMA_MC_FUNC(0, 16, ssse3);
- VP8_MC_FUNC(1, 8, ssse3);
- VP8_MC_FUNC(2, 4, ssse3);
- VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
- VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
- VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2;
- if (!vp7) {
- c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
- c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
+ c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
- c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
- c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
- c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
- c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
+ c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
+ c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
- c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3;
- c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3;
- c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
- c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
- }
+ c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2;
+ c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2;
+ }
+
+ if (EXTERNAL_SSSE3(cpu_flags)) {
+ c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
+ c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
+
+ c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
+ c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
+ c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
+ c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
+
+ c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3;
+ c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3;
+ c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
+ c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
}
if (EXTERNAL_SSE4(cpu_flags)) {
- if (!vp7) {
- c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4;
+ c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4;
- c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4;
- c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4;
- c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4;
- }
+ c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4;
+ c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4;
+ c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4;
}
#endif /* HAVE_YASM */
}