diff options
author | Saurabh Sood <saurabh.sood@ittiam.com> | 2018-11-07 11:20:05 +0530 |
---|---|---|
committer | Tim Schumacher <timschumi@gmx.de> | 2019-08-20 00:16:09 +0200 |
commit | b4e88442cf6aea1cef011b13bf0dfbe411d99506 (patch) | |
tree | 3e62e910e0c86864d68298e90ee92f3ea6d18959 | |
parent | a9c65511181b612aca9edf8e95c185db7450742d (diff) | |
download | android_external_libmpeg2-cm-13.0.tar.gz android_external_libmpeg2-cm-13.0.tar.bz2 android_external_libmpeg2-cm-13.0.zip |
Add push-pop for Neon D8-D15 registersHEADreplicant-6.0-0004-transitionreplicant-6.0-0004-rc6replicant-6.0-0004-rc5-transitionreplicant-6.0-0004-rc5replicant-6.0-0004-rc4replicant-6.0-0004-rc3replicant-6.0-0004-rc2replicant-6.0-0004cm-13.0
According to ARM calling conventions, D8-D15 are callee saved
registers. Hence have to be pushed before being used as scratch.
Bug: 120644655
Test: vendor
Change-Id: Iad2ac726ac2712a88737f3eecc25a7f7f88c7bba
(cherry picked from commit 3714e256de08f40709fec095f7d52bae78c1a259)
-rw-r--r-- | common/arm/ideint_cac_a9.s | 2 | ||||
-rw-r--r-- | common/arm/impeg2_idct.s | 25 | ||||
-rw-r--r-- | common/arm/impeg2_inter_pred.s | 32 | ||||
-rw-r--r-- | common/arm/impeg2_mem_func.s | 4 |
4 files changed, 39 insertions, 24 deletions
diff --git a/common/arm/ideint_cac_a9.s b/common/arm/ideint_cac_a9.s index 964c5e6..9de9752 100644 --- a/common/arm/ideint_cac_a9.s +++ b/common/arm/ideint_cac_a9.s @@ -66,6 +66,7 @@ ideint_cac_8x8_a9: stmfd sp!, {r4-r10, lr} + vpush {d9} @ Load first row of top vld1.u8 d28, [r0], r2 @@ -210,4 +211,5 @@ ideint_cac_8x8_a9: vmov.u32 r0, d0[0] cmp r0, #0 movne r0, #1 + vpop {d9} ldmfd sp!, {r4-r10, pc} diff --git a/common/arm/impeg2_idct.s b/common/arm/impeg2_idct.s index 0b83b72..2f29f43 100644 --- a/common/arm/impeg2_idct.s +++ b/common/arm/impeg2_idct.s @@ -108,6 +108,7 @@ gai2_impeg2_idct_first_col_q11_addr2: .global impeg2_idct_recon_dc_a9q impeg2_idct_recon_dc_a9q: stmfd sp!, {r4, r6, r12, lr} + vpush {d8-d15} @//r0: pi2_src @//r1: pi2_tmp - not used, used as pred_strd @//r2: pu1_pred @@ -115,8 +116,8 @@ impeg2_idct_recon_dc_a9q: @//r4: used as scratch @//r5: - ldr r1, [sp, #20] @//pred_strd - ldr r6, [sp, #24] @//dst_strd + ldr r1, [sp, #84] @//pred_strd + ldr r6, [sp, #88] @//dst_strd ldr r14, gai2_impeg2_idct_q15_addr1 q15lbl1: @@ -188,6 +189,7 @@ q11lbl1: vst1.8 d7, [r3], r6 + vpop {d8-d15} ldmfd sp!, {r4, r6, r12, pc} @@ -196,9 +198,10 @@ q11lbl1: .global impeg2_idct_recon_dc_mismatch_a9q impeg2_idct_recon_dc_mismatch_a9q: stmfd sp!, {r4-r12, lr} + vpush {d8-d15} - ldr r1, [sp, #44] @//pred_strd - ldr r6, [sp, #48] @//dst_strd + ldr r1, [sp, #108] @//pred_strd + ldr r6, [sp, #112] @//dst_strd ldr r14, gai2_impeg2_idct_q15_addr2 q15lbl2: @@ -304,6 +307,7 @@ additive_lbl: vst1.8 d30, [r3], r6 + vpop {d8-d15} ldmfd sp!, {r4-r12, pc} @@ -406,12 +410,14 @@ impeg2_idct_recon_a9q: @// Copy the input pointer to another register @// Step 1 : load all constants stmfd sp!, {r4-r12, lr} + vpush {d8-d15} + + ldr r8, [sp, #108] @ prediction stride + ldr r7, [sp, #112] @ destination stride + ldr r6, [sp, #104] @ src stride + ldr r12, [sp, #116] + ldr r11, [sp, #120] - ldr r8, [sp, #44] @ prediction stride - ldr r7, [sp, #48] @ destination stride - ldr r6, [sp, #40] @ src stride - ldr r12, [sp, #52] - ldr r11, [sp, #56] mov r6, r6, lsl #1 @ x sizeof(word16) add r9, r0, r6, lsl #1 @ 2 rows @@ -1198,6 +1204,7 @@ pred_buff_addition: + vpop {d8-d15} ldmfd sp!, {r4-r12, pc} diff --git a/common/arm/impeg2_inter_pred.s b/common/arm/impeg2_inter_pred.s index f1b3dde..23beca1 100644 --- a/common/arm/impeg2_inter_pred.s +++ b/common/arm/impeg2_inter_pred.s @@ -100,7 +100,7 @@ impeg2_copy_mb_a9q: - stmfd r13!, {r4, r5, r14} + stmfd sp!, {r4, r5, r14} ldr r4, [r0] @src->y @@ -188,7 +188,7 @@ impeg2_copy_mb_a9q: vld1.8 {d0}, [r4], r2 @Load and increment src vst1.8 {d0}, [r5], r3 @Store and increment dst - ldmfd r13!, {r4, r5, pc} + ldmfd sp!, {r4, r5, pc} @@ -223,7 +223,8 @@ impeg2_copy_mb_a9q: impeg2_mc_fullx_halfy_8x8_a9q: - stmfd r13!, {r14} + stmfd sp!, {r14} + vpush {d8-d9} add r14, r1, r2 mov r2, r2, lsl #1 @@ -257,6 +258,7 @@ impeg2_mc_fullx_halfy_8x8_a9q: vst1.8 {d7}, [r14], r3 @// eighth row hence r8 = D7 vst1.8 {d5}, [r0], r3 @// seventh row hence r7 = D5 + vpop {d8-d9} ldmfd sp!, {pc} @@ -315,27 +317,27 @@ impeg2_mc_halfx_fully_8x8_a9q: vld1.8 {d6, d7}, [r14], r2 @row6 - vext.8 d8, d0, d1, #1 @Extract pixels (1-8) of row1 + vext.8 d24, d0, d1, #1 @Extract pixels (1-8) of row1 - vext.8 d12, d2, d3, #1 @Extract pixels (1-8) of row5 + vext.8 d28, d2, d3, #1 @Extract pixels (1-8) of row5 vext.8 d16, d4, d5, #1 @Extract pixels (1-8) of row2 vext.8 d20, d6, d7, #1 @Extract pixels (1-8) of row6 - vld1.8 {d9, d10}, [r1], r2 @load row3 + vld1.8 {d25, d26}, [r1], r2 @load row3 - vld1.8 {d13, d14}, [r14], r2 @load row7 + vld1.8 {d29, d30}, [r14], r2 @load row7 vld1.8 {d17, d18}, [r1], r2 @load row4 vld1.8 {d21, d22}, [r14], r2 @load row8 - vext.8 d1, d9, d10, #1 @Extract pixels (1-8) of row3 + vext.8 d1, d25, d26, #1 @Extract pixels (1-8) of row3 - vext.8 d3, d13, d14, #1 @Extract pixels (1-8) of row7 + vext.8 d3, d29, d30, #1 @Extract pixels (1-8) of row7 @@ -344,9 +346,9 @@ impeg2_mc_halfx_fully_8x8_a9q: vext.8 d7, d21, d22, #1 @Extract pixels (1-8) of row8 - vrhadd.u8 q0, q0, q4 @operate on row1 and row3 + vrhadd.u8 q0, q0, q12 @operate on row1 and row3 - vrhadd.u8 q1, q1, q6 @operate on row5 and row7 + vrhadd.u8 q1, q1, q14 @operate on row5 and row7 vrhadd.u8 q2, q2, q8 @operate on row2 and row4 @@ -415,6 +417,7 @@ impeg2_mc_halfx_fully_8x8_a9q: impeg2_mc_halfx_halfy_8x8_a9q: stmfd sp!, {r14} + vpush {d8-d15} add r14, r1, r2, lsl #2 @@ -548,6 +551,7 @@ impeg2_mc_halfx_halfy_8x8_a9q: + vpop {d8-d15} ldmfd sp!, {pc} @@ -663,7 +667,8 @@ impeg2_mc_fullx_fully_8x8_a9q: impeg2_interpolate_a9q: - stmfd r13!, {r4, r5, r7, r12, r14} + stmfd sp!, {r4, r5, r7, r12, r14} + vpush {d8-d15} ldr r4, [r0, #0] @ptr_y src1 @@ -793,7 +798,8 @@ interp_chromablocks_stride: bne interp_chromablocks_stride - ldmfd r13!, {r4, r5, r7, r12, pc} + vpop {d8-d15} + ldmfd sp!, {r4, r5, r7, r12, pc} diff --git a/common/arm/impeg2_mem_func.s b/common/arm/impeg2_mem_func.s index 869b7d7..ea34db4 100644 --- a/common/arm/impeg2_mem_func.s +++ b/common/arm/impeg2_mem_func.s @@ -146,7 +146,7 @@ impeg2_memset_8bit_8x8_block_a9q: impeg2_memset0_16bit_8x8_linear_block_a9q: - stmfd r13!, {r14} + stmfd sp!, {r14} vmov.i16 q0, #0 @@ -170,7 +170,7 @@ impeg2_memset0_16bit_8x8_linear_block_a9q: - ldmfd r13!, {pc} + ldmfd sp!, {pc} |