diff options
Diffstat (limited to 'vp8/encoder/x86/encodeopt.asm')
-rw-r--r-- | vp8/encoder/x86/encodeopt.asm | 41 |
1 files changed, 16 insertions, 25 deletions
diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm index 1940471..413d74d 100644 --- a/vp8/encoder/x86/encodeopt.asm +++ b/vp8/encoder/x86/encodeopt.asm @@ -1,16 +1,16 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" - ;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr) global sym(vp8_block_error_xmm) sym(vp8_block_error_xmm): @@ -19,11 +19,9 @@ sym(vp8_block_error_xmm): SHADOW_ARGS_TO_STACK 2 push rsi push rdi - ; end prolog - + ; end prologue mov rsi, arg(0) ;coeff_ptr - pxor xmm7, xmm7 mov rdi, arg(1) ;dcoef_ptr movdqa xmm3, [rsi] @@ -32,31 +30,25 @@ sym(vp8_block_error_xmm): movdqa xmm5, [rsi+16] movdqa xmm6, [rdi+16] - pxor xmm1, xmm1 ; from movd xmm1, dc; dc=0 + psubw xmm3, xmm4 - movdqa xmm2, xmm7 psubw xmm5, xmm6 - - por xmm1, xmm2 + pmaddwd xmm3, xmm3 pmaddwd xmm5, xmm5 - pcmpeqw xmm1, xmm7 - psubw xmm3, xmm4 + paddd xmm3, xmm5 - pand xmm1, xmm3 - pmaddwd xmm1, xmm1 - - paddd xmm1, xmm5 - movdqa xmm0, xmm1 + pxor xmm7, xmm7 + movdqa xmm0, xmm3 punpckldq xmm0, xmm7 - punpckhdq xmm1, xmm7 + punpckhdq xmm3, xmm7 - paddd xmm0, xmm1 - movdqa xmm1, xmm0 + paddd xmm0, xmm3 + movdqa xmm3, xmm0 psrldq xmm0, 8 - paddd xmm0, xmm1 + paddd xmm0, xmm3 movd rax, xmm0 @@ -67,7 +59,6 @@ sym(vp8_block_error_xmm): pop rbp ret - ;int vp8_block_error_mmx(short *coeff_ptr, short *dcoef_ptr) global sym(vp8_block_error_mmx) sym(vp8_block_error_mmx): |