diff options
author | Rakesh Kumar <rakesh.kumar@ittiam.com> | 2017-10-09 17:14:42 +0530 |
---|---|---|
committer | MSe <mse1969@posteo.de> | 2018-01-10 20:56:47 +0100 |
commit | 53b4d3b3ff61f79a095bf762b460d674270597bc (patch) | |
tree | 0539daa1404cf5669fd2bd053a87460accf50d41 /common/arm/ihevc_sao_edge_offset_class3_chroma.s | |
parent | 6a73e1e1443309687799cff7c958ee2386ca19ae (diff) | |
download | android_external_libhevc-53b4d3b3ff61f79a095bf762b460d674270597bc.tar.gz android_external_libhevc-53b4d3b3ff61f79a095bf762b460d674270597bc.tar.bz2 android_external_libhevc-53b4d3b3ff61f79a095bf762b460d674270597bc.zip |
Add PUSH-POP of D registers in Arm Neon 32 bit functions
According to ARM calling conventions, D8-D15 are callee saved
registers. Hence have to be pushed before used as scratch.
Added Push Pop in inter_pred, intra_pred, deblk_luma, itrans,
itrans_recon, sao, weighted_pred ARM NEON 32 bit functions.
Bug: 68320413
Test: Tested hevcdec
Change-Id: I71f8868ac4205b0a3680d7ce5b82511653e9c747
(cherry picked from commit a47cb8865a33a87f163d87781f417884d30d46ed)
CVE-2017-13177
Diffstat (limited to 'common/arm/ihevc_sao_edge_offset_class3_chroma.s')
-rw-r--r-- | common/arm/ihevc_sao_edge_offset_class3_chroma.s | 127 |
1 files changed, 69 insertions, 58 deletions
diff --git a/common/arm/ihevc_sao_edge_offset_class3_chroma.s b/common/arm/ihevc_sao_edge_offset_class3_chroma.s index fe3b459..9f4eb62 100644 --- a/common/arm/ihevc_sao_edge_offset_class3_chroma.s +++ b/common/arm/ihevc_sao_edge_offset_class3_chroma.s @@ -60,6 +60,15 @@ @r7 => wd @r8=> ht +.equ pu1_src_top_left_offset, 328 +.equ pu1_src_top_right_offset, 332 +.equ pu1_src_bot_left_offset, 336 +.equ pu1_avail_offset, 340 +.equ pi1_sao_u_offset, 344 +.equ pi1_sao_v_offset, 348 +.equ wd_offset, 352 +.equ ht_offset, 356 + .text .syntax unified .p2align 2 @@ -86,21 +95,22 @@ ihevc_sao_edge_offset_class3_chroma_a9q: STMFD sp!,{r4-r12,r14} @stack stores the values of the arguments + vpush {d8 - d15} + SUB sp,sp,#224 @Decrement the stack pointer to store some temp arr values - LDR r7,[sp,#0x40] @Loads wd - LDR r8,[sp,#0x44] @Loads ht + LDR r7,[sp,#wd_offset] @Loads wd + LDR r8,[sp,#ht_offset] @Loads ht SUB r9,r7,#2 @wd - 2 - LDR r4,[sp,#0x28] @Loads pu1_src_top_left + LDR r4,[sp,#pu1_src_top_left_offset] @Loads pu1_src_top_left LDRH r10,[r3,r9] @pu1_src_top[wd - 2] MOV r9,r7 @Move width to r9 for loop count - LDR r5,[sp,#0x34] @Loads pu1_avail - LDR r6,[sp,#0x38] @Loads pi1_sao_offset_u + LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail + LDR r6,[sp,#pi1_sao_u_offset] @Loads pi1_sao_offset_u - STR r3,[sp,#0x38] @Store pu1_src_top in sp - SUB sp,sp,#0xD4 @Decrement the stack pointer to store some temp arr values + STR r3,[sp,#220] @Store pu1_src_top in sp STRH r10,[sp] @u1_src_top_left_tmp = pu1_src_top[wd - 2] SUB r10,r8,#1 @ht-1 @@ -122,7 +132,7 @@ PU1_AVAIL_5_LOOP_U: LDRB r10,[r0,r11] @u1_pos_0_0_tmp_v = pu1_src[wd - 1] BEQ PU1_AVAIL_6_LOOP_U - LDR r11,[sp,#0x100] @Load pu1_src_top_right from sp + LDR r11,[sp,#pu1_src_top_right_offset] @Load pu1_src_top_right from sp LDRB r11,[r11] @pu1_src_top_right[0] SUB r12,r9,r11 @pu1_src[wd - 2] - pu1_src_top_right[0] CMP r12,#0 @@ -150,7 +160,7 @@ ulbl1: PU1_AVAIL_5_LOOP_V: - LDR r11,[sp,#0x100] @Load pu1_src_top_right from sp + LDR r11,[sp,#pu1_src_top_right_offset] @Load pu1_src_top_right from sp LDRB r11,[r11,#1] @pu1_src_top_right[1] SUB r12,r10,r11 @pu1_src[wd - 1] - pu1_src_top_right[1] CMP r12,#0 @@ -172,7 +182,7 @@ ulbl2: LDRSB r12,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx] CMP r12,#0 @0 != edge_idx BEQ PU1_AVAIL_6_LOOP_U - LDR r11,[sp,#0x110] @Loads pi1_sao_offset_v + LDR r11,[sp,#pi1_sao_v_offset] @Loads pi1_sao_offset_v LDRSB r11,[r11,r12] @pi1_sao_offset_v[edge_idx] ADD r10,r10,r11 @pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx] USAT r10,#8,r10 @u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1) @@ -180,7 +190,7 @@ ulbl2: PU1_AVAIL_6_LOOP_U: STRB r9,[sp,#6] STRB r10,[sp,#7] - STR r0,[sp,#0x100] @Store pu1_src in sp + STR r0,[sp,#212] @Store pu1_src in sp LDRB r10,[r5,#6] @pu1_avail[6] CMP r10,#0 @@ -198,7 +208,7 @@ PU1_AVAIL_6_LOOP_U: MVNLT r11,#0 MOVGT r11,#1 @SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 2 - src_strd]) - LDR r14,[sp,#0x104] @Load pu1_src_bot_left from sp + LDR r14,[sp,#pu1_src_bot_left_offset] @Load pu1_src_bot_left from sp LDRB r14,[r14] @Load pu1_src_bot_left[0] SUB r14,r10,r14 @pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0] CMP r14,#0 @@ -228,7 +238,7 @@ PU1_AVAIL_6_LOOP_V: MVNLT r11,#0 MOVGT r11,#1 @SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd]) - LDR r14,[sp,#0x104] @Load pu1_src_bot_left from sp + LDR r14,[sp,#pu1_src_bot_left_offset] @Load pu1_src_bot_left from sp LDRB r14,[r14,#1] @Load pu1_src_bot_left[1] SUB r14,r9,r14 @pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1] CMP r14,#0 @@ -244,7 +254,7 @@ ulbl4: LDRSB r12,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx] CMP r12,#0 BEQ PU1_AVAIL_3_LOOP - LDR r14,[sp,#0x110] @Loads pi1_sao_offset_v + LDR r14,[sp,#pi1_sao_v_offset] @Loads pi1_sao_offset_v LDRSB r11,[r14,r12] @pi1_sao_offset_v[edge_idx] ADD r9,r9,r11 @pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx] USAT r9,#8,r9 @u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) @@ -252,7 +262,7 @@ ulbl4: PU1_AVAIL_3_LOOP: STRB r10,[sp,#8] STRB r9,[sp,#9] - STR r2,[sp,#0x104] @Store pu1_src_left in sp + STR r2,[sp,#216] @Store pu1_src_left in sp MOV r12,r8 @Move ht MOV r14,r2 @Move pu1_src_left to pu1_src_left_cpy @@ -276,7 +286,7 @@ PU1_AVAIL_2_LOOP_END: VMOV.I16 Q1,#0 @const_min_clip = vdupq_n_s16(0) VMOV.I16 Q2,#255 @const_max_clip = vdupq_n_u16((1 << bit_depth) - 1) VLD1.8 D6,[r6] @offset_tbl_u = vld1_s8(pi1_sao_offset_u) - LDR r6,[sp,#0x110] @Loads pi1_sao_offset_v + LDR r6,[sp,#pi1_sao_v_offset] @Loads pi1_sao_offset_v VLD1.8 D7,[r6] @offset_tbl_v = vld1_s8(pi1_sao_offset_v) LDR r2, gi1_table_edge_idx_addr_5 @table pointer ulbl5: @@ -291,9 +301,9 @@ ulbl5: BLE WD_16_HT_4_LOOP @If jump to WD_16_HT_4_LOOP WIDTH_LOOP_16: - LDR r7,[sp,#0x114] @Loads wd + LDR r7,[sp,#wd_offset] @Loads wd CMP r6,r7 @col == wd - LDR r5,[sp,#0x108] @Loads pu1_avail + LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail LDRBEQ r8,[r5] @pu1_avail[0] MOVNE r8,#-1 @@ -314,7 +324,7 @@ SKIP_AU1_MASK_VAL: VLD1.8 D12,[r0]! @pu1_cur_row = vld1q_u8(pu1_src) VLD1.8 D13,[r0] @pu1_cur_row = vld1q_u8(pu1_src) SUB r0,#8 - ADD r5,sp,#0x4B @*au1_src_left_tmp + ADD r5,sp,#75 @*au1_src_left_tmp SUBEQ r8,r0,r1 @pu1_src - src_strd VMOV.I8 Q9,#0 @@ -326,15 +336,15 @@ SKIP_AU1_MASK_VAL: SUB r8,#8 ADD r3,r3,#16 - LDR r4,[sp,#0x118] @Loads ht + LDR r4,[sp,#ht_offset] @Loads ht VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row) - LDR r7,[sp,#0x114] @Loads wd + LDR r7,[sp,#wd_offset] @Loads wd SUB r7,r7,r6 @(wd - col) VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row) ADD r7,r7,#14 @15 + (wd - col) - LDR r8,[sp,#0x100] @Loads *pu1_src + LDR r8,[sp,#212] @Loads *pu1_src VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) ADD r7,r8,r7 @pu1_src[0 * src_strd + 15 + (wd - col)] @@ -359,7 +369,7 @@ AU1_SRC_LEFT_LOOP: LDRH r5,[r8,#2] @I VMOV.16 D19[3],r5 @I vsetq_lane_u8 - LDR r11,[sp,#0x108] @I Loads pu1_avail + LDR r11,[sp,#pu1_avail_offset] @I Loads pu1_avail LDRB r11,[r11,#2] @I pu1_avail[2] VEXT.8 Q9,Q9,Q8,#14 @I pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14) @@ -477,7 +487,7 @@ PU1_SRC_LOOP: VCGT.U8 Q11,Q6,Q14 @II vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) BNE NEXT_ROW_POINTER_ASSIGNED_2 @III - LDR r5,[sp,#0x108] @III Loads pu1_avail + LDR r5,[sp,#pu1_avail_offset] @III Loads pu1_avail LDRB r5,[r5,#3] @III pu1_avail[3] CMP r5,#0 @III SUBNE r11,r4,#4 @III pu1_src[src_strd - 2] @@ -597,7 +607,7 @@ NEXT_ROW_POINTER_ASSIGNED_2: LDRB r9,[r0,#17] @load the value pu1_src_cpy[17 - src_strd] BNE NEXT_ROW_POINTER_ASSIGNED_3 - LDR r5,[sp,#0x108] @Loads pu1_avail + LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail LDRB r5,[r5,#3] @pu1_avail[3] CMP r5,#0 SUBNE r8,r11,#4 @pu1_src[src_strd - 2] @@ -657,13 +667,13 @@ NEXT_ROW_POINTER_ASSIGNED_3: INNER_LOOP_DONE: - LDR r8,[sp,#0x118] @Loads ht + LDR r8,[sp,#ht_offset] @Loads ht VMOVN.I16 D20,Q10 @III vmovn_s16(pi2_tmp_cur_row.val[0]) - ADD r5,sp,#0x4B @*au1_src_left_tmp + ADD r5,sp,#75 @*au1_src_left_tmp LSL r8,r8,#1 VMOVN.I16 D21,Q9 @III vmovn_s16(pi2_tmp_cur_row.val[1]) - LDR r11,[sp,#0x104] @Loads *pu1_src_left + LDR r11,[sp,#216] @Loads *pu1_src_left SRC_LEFT_LOOP: LDR r7,[r5],#4 @au1_src_left_tmp[row] @@ -676,7 +686,7 @@ SRC_LEFT_LOOP: CMP r6,#8 @Check whether residue remains BLT RE_ASSINING_LOOP @Jump to re-assigning loop - LDR r7,[sp,#0x114] @Loads wd + LDR r7,[sp,#wd_offset] @Loads wd LDR r0,[sp,#0x02] @Loads *pu1_src SUB r7,r7,r6 ADD r0,r0,r7 @@ -684,9 +694,9 @@ SRC_LEFT_LOOP: BEQ WIDTH_RESIDUE @If residue remains jump to residue loop WD_16_HT_4_LOOP: - LDR r7,[sp,#0x114] @Loads wd + LDR r7,[sp,#wd_offset] @Loads wd - LDR r5,[sp,#0x108] @Loads pu1_avail + LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail CMP r6,r7 @col == wd LDRBEQ r8,[r5] @pu1_avail[0] @@ -716,17 +726,17 @@ SKIP_AU1_MASK_VAL_WD_16_HT_4: VLD1.8 D10,[r8]! @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2) VLD1.8 D11,[r8] @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2) SUB r8,#8 - ADD r5,sp,#0x4B @*au1_src_left_tmp + ADD r5,sp,#75 @*au1_src_left_tmp - LDR r4,[sp,#0x118] @Loads ht + LDR r4,[sp,#ht_offset] @Loads ht VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row) - LDR r7,[sp,#0x114] @Loads wd + LDR r7,[sp,#wd_offset] @Loads wd SUB r7,r7,r6 @(wd - col) VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row) ADD r7,r7,#14 @15 + (wd - col) - LDR r8,[sp,#0x100] @Loads *pu1_src + LDR r8,[sp,#212] @Loads *pu1_src VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) ADD r7,r8,r7 @pu1_src[0 * src_strd + 15 + (wd - col)] @@ -744,7 +754,7 @@ AU1_SRC_LEFT_LOOP_WD_16_HT_4: PU1_SRC_LOOP_WD_16_HT_4: ADD r9,r0,r1 @*pu1_src + src_strd - LDR r5,[sp,#0x108] @Loads pu1_avail + LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail VLD1.8 D16,[r9]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) VLD1.8 D17,[r9] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) SUB r9,#8 @@ -766,7 +776,7 @@ NEXT_ROW_POINTER_ASSIGNED_WD_16_HT_4: CMP r7,r12 BLT SIGN_UP_CHANGE_WD_16_HT_4 - LDR r5,[sp,#0x108] @Loads pu1_avail + LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail LDRB r5,[r5,#2] @pu1_avail[2] CMP r5,#0 BNE SIGN_UP_CHANGE_DONE_WD_16_HT_4 @@ -839,9 +849,9 @@ SIGN_UP_CHANGE_DONE_WD_16_HT_4: VST1.8 {Q14},[r0],r1 @vst1q_u8(pu1_src_cpy, pu1_cur_row) BNE PU1_SRC_LOOP_WD_16_HT_4 @If not equal jump to PU1_SRC_LOOP_WD_16_HT_4 - LDR r8,[sp,#0x118] @Loads ht - ADD r5,sp,#0x4B @*au1_src_left_tmp - LDR r11,[sp,#0x104] @Loads *pu1_src_left + LDR r8,[sp,#ht_offset] @Loads ht + ADD r5,sp,#75 @*au1_src_left_tmp + LDR r11,[sp,#216] @Loads *pu1_src_left SRC_LEFT_LOOP_WD_16_HT_4: LDR r7,[r5],#4 @au1_src_left_tmp[row] @@ -851,16 +861,16 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS r6,r6,#16 @Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP @Jump to re-assigning loop - LDR r7,[sp,#0x114] @Loads wd + LDR r7,[sp,#wd_offset] @Loads wd LDR r0,[sp,#0x02] @Loads *pu1_src SUB r7,r7,r6 ADD r0,r0,r7 BGT WD_16_HT_4_LOOP @If not equal jump to width_loop WIDTH_RESIDUE: - LDR r7,[sp,#0x114] @Loads wd + LDR r7,[sp,#wd_offset] @Loads wd - LDR r5,[sp,#0x108] @Loads pu1_avail + LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail CMP r6,r7 @wd_residue == wd LDRBEQ r8,[r5] @pu1_avail[0] @@ -878,13 +888,13 @@ WIDTH_RESIDUE: ADD r10,r10,#2 @pu1_src - src_strd + 2 VMOV.8 d8[6],r11 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) - ADD r5,sp,#0x4B @*au1_src_left_tmp + ADD r5,sp,#75 @*au1_src_left_tmp - LDR r4,[sp,#0x118] @Loads ht + LDR r4,[sp,#ht_offset] @Loads ht VMOV.8 d8[7],r11 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) - LDR r7,[sp,#0x114] @Loads wd + LDR r7,[sp,#wd_offset] @Loads wd - LDR r8,[sp,#0x100] @Loads *pu1_src + LDR r8,[sp,#212] @Loads *pu1_src VLD1.8 D10,[r10]! @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2) VLD1.8 D11,[r10] @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2) SUB r10,#8 @@ -917,7 +927,7 @@ PU1_SRC_LOOP_RESIDUE: VLD1.8 D16,[r9]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) VLD1.8 D17,[r9] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) SUB r9,#8 - LDR r5,[sp,#0x108] @Loads pu1_avail + LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail LDRB r5,[r5,#3] @pu1_avail[3] ADD r8,r14,r11,LSL #1 @pu1_src_left_cpy[(ht_tmp - row) * 2] @@ -940,7 +950,7 @@ NEXT_ROW_POINTER_ASSIGNED_RESIDUE: VEXT.8 Q9,Q9,Q8,#14 @pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14) BLT SIGN_UP_CHANGE_RESIDUE - LDR r5,[sp,#0x108] @Loads pu1_avail + LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail LDRB r5,[r5,#2] @pu1_avail[2] CMP r5,#0 BNE SIGN_UP_CHANGE_DONE_RESIDUE @@ -1007,10 +1017,10 @@ SIGN_UP_CHANGE_DONE_RESIDUE: BNE PU1_SRC_LOOP_RESIDUE @If not equal jump to PU1_SRC_LOOP - LDR r8,[sp,#0x118] @Loads ht - ADD r5,sp,#0x4B @*au1_src_left_tmp + LDR r8,[sp,#ht_offset] @Loads ht + ADD r5,sp,#75 @*au1_src_left_tmp - LDR r11,[sp,#0x104] @Loads *pu1_src_left + LDR r11,[sp,#216] @Loads *pu1_src_left SRC_LEFT_LOOP_RESIDUE: LDR r7,[r5],#4 @au1_src_left_tmp[row] @@ -1020,10 +1030,10 @@ SRC_LEFT_LOOP_RESIDUE: RE_ASSINING_LOOP: - LDR r7,[sp,#0x114] @Loads wd - LDR r8,[sp,#0x118] @Loads ht + LDR r7,[sp,#wd_offset] @Loads wd + LDR r8,[sp,#ht_offset] @Loads ht - LDR r0,[sp,#0x100] @Loads *pu1_src + LDR r0,[sp,#212] @Loads *pu1_src SUB r10,r7,#2 @wd - 2 LDRH r9,[sp,#6] @@ -1032,7 +1042,7 @@ RE_ASSINING_LOOP: STRH r9,[r0,r10] @pu1_src_org[0] = u1_pos_0_0_tmp MLA r6,r8,r1,r0 @pu1_src[(ht - 1) * src_strd] - LDR r4,[sp,#0xFC] @Loads pu1_src_top_left + LDR r4,[sp,#pu1_src_top_left_offset] @Loads pu1_src_top_left LDRH r9,[sp,#8] ADD r12,sp,#10 @@ -1041,7 +1051,7 @@ RE_ASSINING_LOOP: LDRH r10,[sp] @load u1_src_top_left_tmp from stack pointer STRH r10,[r4] @*pu1_src_top_left = u1_src_top_left_tmp - LDR r3,[sp,#0x10C] @Loads pu1_src_top + LDR r3,[sp,#220] @Loads pu1_src_top SRC_TOP_LOOP: VLD1.8 D0,[r12]! @pu1_src_top[col] = au1_src_top_tmp[col] @@ -1050,7 +1060,8 @@ SRC_TOP_LOOP: BNE SRC_TOP_LOOP END_LOOPS: - ADD sp,sp,#0xD4 + ADD sp,sp,#224 + vpop {d8 - d15} LDMFD sp!,{r4-r12,r15} @Reload the registers from SP |