diff options
author | hkuang <hkuang@google.com> | 2013-09-16 15:09:58 -0700 |
---|---|---|
committer | Hangyu Kuang <hkuang@google.com> | 2013-09-17 22:05:28 +0000 |
commit | 1184aebb761cbeac9124c37189a80a1a58f04b6b (patch) | |
tree | b1ce6b3d29c43ffd22eb18999c5c3bad26513a48 /libvpx/vp9/encoder/x86/vp9_quantize_ssse3.asm | |
parent | f3bed9137f66ef693bd406e43b17e9a1114f1e14 (diff) | |
download | android_external_libvpx-1184aebb761cbeac9124c37189a80a1a58f04b6b.tar.gz android_external_libvpx-1184aebb761cbeac9124c37189a80a1a58f04b6b.tar.bz2 android_external_libvpx-1184aebb761cbeac9124c37189a80a1a58f04b6b.zip |
Roll latest libvpx into Android.
The latest libvpx has more neon optimizations and a lot of
algorithm optimizations which make the vp9 decode much more faster.
bug:10804666
Change-Id: I75eaacea57ecc7542a780be778f0e9e157978524
(cherry picked from commit 3df0563f1b24dac6c0bd122fc922a48211269061)
Diffstat (limited to 'libvpx/vp9/encoder/x86/vp9_quantize_ssse3.asm')
-rw-r--r-- | libvpx/vp9/encoder/x86/vp9_quantize_ssse3.asm | 28 |
1 files changed, 16 insertions, 12 deletions
diff --git a/libvpx/vp9/encoder/x86/vp9_quantize_ssse3.asm b/libvpx/vp9/encoder/x86/vp9_quantize_ssse3.asm index 60f7991..db30660 100644 --- a/libvpx/vp9/encoder/x86/vp9_quantize_ssse3.asm +++ b/libvpx/vp9/encoder/x86/vp9_quantize_ssse3.asm @@ -36,6 +36,14 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ pshufd m4, m4, 0 mova m2, [quantq] ; m2 = quant paddw m0, m4 ; m0 = zbin + zbin_oq +%ifidn %1, b_32x32 + pcmpeqw m5, m5 + psrlw m5, 15 + paddw m0, m5 + paddw m1, m5 + psrlw m0, 1 ; m0 = (m0 + 1) / 2 + psrlw m1, 1 ; m1 = (m1 + 1) / 2 +%endif mova m3, [r2q] ; m3 = dequant psubw m0, [pw_1] mov r2, shiftmp @@ -43,6 +51,9 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ mova m4, [r2] ; m4 = shift mov r4, dqcoeffmp mov r5, iscanmp +%ifidn %1, b_32x32 + psllw m4, 1 +%endif pxor m5, m5 ; m5 = dedicated zero DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob lea coeffq, [ coeffq+ncoeffq*2] @@ -56,16 +67,12 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] pabsw m6, m9 ; m6 = abs(m9) pabsw m11, m10 ; m11 = abs(m10) -%ifidn %1, b_32x32 - paddw m6, m6 - paddw m11, m11 -%endif pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin punpckhqdq m0, m0 pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin - paddw m6, m1 ; m6 += round + paddsw m6, m1 ; m6 += round punpckhqdq m1, m1 - paddw m11, m1 ; m11 += round + paddsw m11, m1 ; m11 += round pmulhw m8, m6, m2 ; m8 = m6*q>>16 punpckhqdq m2, m2 pmulhw m13, m11, m2 ; m13 = m11*q>>16 @@ -112,10 +119,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] pabsw m6, m9 ; m6 = abs(m9) pabsw m11, m10 ; m11 = abs(m10) -%ifidn %1, b_32x32 - paddw m6, m6 - paddw m11, m11 -%endif pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin %ifidn %1, b_32x32 @@ -124,8 +127,8 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ or r6, r2 jz .skip_iter %endif - paddw m6, m1 ; m6 += round - paddw m11, m1 ; m11 += round + paddsw m6, m1 ; m6 += round + paddsw m11, m1 ; m11 += round pmulhw m14, m6, m2 ; m14 = m6*q>>16 pmulhw m13, m11, m2 ; m13 = m11*q>>16 paddw m14, m6 ; m14 += m6 @@ -164,6 +167,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ pmaxsw m8, m13 add ncoeffq, mmsize jl .ac_only_loop + %ifidn %1, b_32x32 jmp .accumulate_eob .skip_iter: |