aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2013-10-26 08:24:09 -0400
committerMichael Niedermayer <michaelni@gmx.at>2013-10-27 15:02:48 +0100
commit960490c0b20dd5f9a6c329bd14023b9598082fda (patch)
treef83967da07421a901e5d45de8e56736026b275c9 /libavcodec/x86
parentcd86eb265f36a79d2996f44ba7ec7e7acbc99f81 (diff)
downloadandroid_external_ffmpeg-960490c0b20dd5f9a6c329bd14023b9598082fda.tar.gz
android_external_ffmpeg-960490c0b20dd5f9a6c329bd14023b9598082fda.tar.bz2
android_external_ffmpeg-960490c0b20dd5f9a6c329bd14023b9598082fda.zip
avcodec/x86/videodsp: Small speedups in ff_emulated_edge_mc x86 SIMD.
Don't use word-size multiplications if size == 2, and if we're using SIMD instructions (size >= 8), complete leftover 4byte sets using movd, not mov. Both of these changes lead to minor speedups. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/videodsp.asm34
1 files changed, 17 insertions, 17 deletions
diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
index 85a41b5b6d..1ac02574d6 100644
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -344,10 +344,6 @@ VERTICAL_EXTEND 16, 22
; obviously not the same on both sides.
%macro READ_V_PIXEL 2
-%if %1 == 2
- movzx valw, byte %2
- imul valw, 0x0101
-%else
movzx vald, byte %2
imul vald, 0x01010101
%if %1 >= 8
@@ -356,13 +352,15 @@ VERTICAL_EXTEND 16, 22
pshufd m0, m0, q0000
%else
punpckldq m0, m0
-%endif
-%endif ; %1 >= 8
-%endif
+%endif ; mmsize == 16
+%endif ; %1 > 16
%endmacro ; READ_V_PIXEL
%macro WRITE_V_PIXEL 2
%assign %%off 0
+
+%if %1 >= 8
+
%rep %1/mmsize
movu [%2+%%off], m0
%assign %%off %%off+mmsize
@@ -378,27 +376,29 @@ VERTICAL_EXTEND 16, 22
%assign %%off %%off+8
%endif
%endif ; %1-%%off >= 8
-%endif
+%endif ; mmsize == 16
%if %1-%%off >= 4
%if %1 > 8 && %1-%%off > 4
movq [%2+%1-8], m0
%assign %%off %1
-%elif %1 >= 8 && %1-%%off >= 4
- movd [%2+%%off], m0
-%assign %%off %%off+4
%else
- mov [%2+%%off], vald
+ movd [%2+%%off], m0
%assign %%off %%off+4
%endif
%endif ; %1-%%off >= 4
-%if %1-%%off >= 2
-%if %1 >= 8
- movd [%2+%1-4], m0
-%else
+%else ; %1 < 8
+
+%rep %1/4
+ mov [%2+%%off], vald
+%assign %%off %%off+4
+%endrep ; %1/4
+
+%endif ; %1 >=/< 8
+
+%if %1-%%off == 2
mov [%2+%%off], valw
-%endif
%endif ; (%1-%%off)/2
%endmacro ; WRITE_V_PIXEL