summaryrefslogtreecommitdiffstats
path: root/common/arm/ihevc_sao_edge_offset_class3_chroma.s
diff options
context:
space:
mode:
authorRakesh Kumar <rakesh.kumar@ittiam.com>2017-10-09 17:14:42 +0530
committerMSe <mse1969@posteo.de>2018-01-10 20:56:47 +0100
commit53b4d3b3ff61f79a095bf762b460d674270597bc (patch)
tree0539daa1404cf5669fd2bd053a87460accf50d41 /common/arm/ihevc_sao_edge_offset_class3_chroma.s
parent6a73e1e1443309687799cff7c958ee2386ca19ae (diff)
downloadandroid_external_libhevc-53b4d3b3ff61f79a095bf762b460d674270597bc.tar.gz
android_external_libhevc-53b4d3b3ff61f79a095bf762b460d674270597bc.tar.bz2
android_external_libhevc-53b4d3b3ff61f79a095bf762b460d674270597bc.zip
Add PUSH-POP of D registers in Arm Neon 32 bit functions
According to ARM calling conventions, D8-D15 are callee saved registers. Hence have to be pushed before used as scratch. Added Push Pop in inter_pred, intra_pred, deblk_luma, itrans, itrans_recon, sao, weighted_pred ARM NEON 32 bit functions. Bug: 68320413 Test: Tested hevcdec Change-Id: I71f8868ac4205b0a3680d7ce5b82511653e9c747 (cherry picked from commit a47cb8865a33a87f163d87781f417884d30d46ed) CVE-2017-13177
Diffstat (limited to 'common/arm/ihevc_sao_edge_offset_class3_chroma.s')
-rw-r--r--common/arm/ihevc_sao_edge_offset_class3_chroma.s127
1 files changed, 69 insertions, 58 deletions
diff --git a/common/arm/ihevc_sao_edge_offset_class3_chroma.s b/common/arm/ihevc_sao_edge_offset_class3_chroma.s
index fe3b459..9f4eb62 100644
--- a/common/arm/ihevc_sao_edge_offset_class3_chroma.s
+++ b/common/arm/ihevc_sao_edge_offset_class3_chroma.s
@@ -60,6 +60,15 @@
@r7 => wd
@r8=> ht
+.equ pu1_src_top_left_offset, 328
+.equ pu1_src_top_right_offset, 332
+.equ pu1_src_bot_left_offset, 336
+.equ pu1_avail_offset, 340
+.equ pi1_sao_u_offset, 344
+.equ pi1_sao_v_offset, 348
+.equ wd_offset, 352
+.equ ht_offset, 356
+
.text
.syntax unified
.p2align 2
@@ -86,21 +95,22 @@ ihevc_sao_edge_offset_class3_chroma_a9q:
STMFD sp!,{r4-r12,r14} @stack stores the values of the arguments
+ vpush {d8 - d15}
+ SUB sp,sp,#224 @Decrement the stack pointer to store some temp arr values
- LDR r7,[sp,#0x40] @Loads wd
- LDR r8,[sp,#0x44] @Loads ht
+ LDR r7,[sp,#wd_offset] @Loads wd
+ LDR r8,[sp,#ht_offset] @Loads ht
SUB r9,r7,#2 @wd - 2
- LDR r4,[sp,#0x28] @Loads pu1_src_top_left
+ LDR r4,[sp,#pu1_src_top_left_offset] @Loads pu1_src_top_left
LDRH r10,[r3,r9] @pu1_src_top[wd - 2]
MOV r9,r7 @Move width to r9 for loop count
- LDR r5,[sp,#0x34] @Loads pu1_avail
- LDR r6,[sp,#0x38] @Loads pi1_sao_offset_u
+ LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail
+ LDR r6,[sp,#pi1_sao_u_offset] @Loads pi1_sao_offset_u
- STR r3,[sp,#0x38] @Store pu1_src_top in sp
- SUB sp,sp,#0xD4 @Decrement the stack pointer to store some temp arr values
+ STR r3,[sp,#220] @Store pu1_src_top in sp
STRH r10,[sp] @u1_src_top_left_tmp = pu1_src_top[wd - 2]
SUB r10,r8,#1 @ht-1
@@ -122,7 +132,7 @@ PU1_AVAIL_5_LOOP_U:
LDRB r10,[r0,r11] @u1_pos_0_0_tmp_v = pu1_src[wd - 1]
BEQ PU1_AVAIL_6_LOOP_U
- LDR r11,[sp,#0x100] @Load pu1_src_top_right from sp
+ LDR r11,[sp,#pu1_src_top_right_offset] @Load pu1_src_top_right from sp
LDRB r11,[r11] @pu1_src_top_right[0]
SUB r12,r9,r11 @pu1_src[wd - 2] - pu1_src_top_right[0]
CMP r12,#0
@@ -150,7 +160,7 @@ ulbl1:
PU1_AVAIL_5_LOOP_V:
- LDR r11,[sp,#0x100] @Load pu1_src_top_right from sp
+ LDR r11,[sp,#pu1_src_top_right_offset] @Load pu1_src_top_right from sp
LDRB r11,[r11,#1] @pu1_src_top_right[1]
SUB r12,r10,r11 @pu1_src[wd - 1] - pu1_src_top_right[1]
CMP r12,#0
@@ -172,7 +182,7 @@ ulbl2:
LDRSB r12,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx]
CMP r12,#0 @0 != edge_idx
BEQ PU1_AVAIL_6_LOOP_U
- LDR r11,[sp,#0x110] @Loads pi1_sao_offset_v
+ LDR r11,[sp,#pi1_sao_v_offset] @Loads pi1_sao_offset_v
LDRSB r11,[r11,r12] @pi1_sao_offset_v[edge_idx]
ADD r10,r10,r11 @pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx]
USAT r10,#8,r10 @u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
@@ -180,7 +190,7 @@ ulbl2:
PU1_AVAIL_6_LOOP_U:
STRB r9,[sp,#6]
STRB r10,[sp,#7]
- STR r0,[sp,#0x100] @Store pu1_src in sp
+ STR r0,[sp,#212] @Store pu1_src in sp
LDRB r10,[r5,#6] @pu1_avail[6]
CMP r10,#0
@@ -198,7 +208,7 @@ PU1_AVAIL_6_LOOP_U:
MVNLT r11,#0
MOVGT r11,#1 @SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 2 - src_strd])
- LDR r14,[sp,#0x104] @Load pu1_src_bot_left from sp
+ LDR r14,[sp,#pu1_src_bot_left_offset] @Load pu1_src_bot_left from sp
LDRB r14,[r14] @Load pu1_src_bot_left[0]
SUB r14,r10,r14 @pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]
CMP r14,#0
@@ -228,7 +238,7 @@ PU1_AVAIL_6_LOOP_V:
MVNLT r11,#0
MOVGT r11,#1 @SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd])
- LDR r14,[sp,#0x104] @Load pu1_src_bot_left from sp
+ LDR r14,[sp,#pu1_src_bot_left_offset] @Load pu1_src_bot_left from sp
LDRB r14,[r14,#1] @Load pu1_src_bot_left[1]
SUB r14,r9,r14 @pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1]
CMP r14,#0
@@ -244,7 +254,7 @@ ulbl4:
LDRSB r12,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx]
CMP r12,#0
BEQ PU1_AVAIL_3_LOOP
- LDR r14,[sp,#0x110] @Loads pi1_sao_offset_v
+ LDR r14,[sp,#pi1_sao_v_offset] @Loads pi1_sao_offset_v
LDRSB r11,[r14,r12] @pi1_sao_offset_v[edge_idx]
ADD r9,r9,r11 @pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx]
USAT r9,#8,r9 @u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
@@ -252,7 +262,7 @@ ulbl4:
PU1_AVAIL_3_LOOP:
STRB r10,[sp,#8]
STRB r9,[sp,#9]
- STR r2,[sp,#0x104] @Store pu1_src_left in sp
+ STR r2,[sp,#216] @Store pu1_src_left in sp
MOV r12,r8 @Move ht
MOV r14,r2 @Move pu1_src_left to pu1_src_left_cpy
@@ -276,7 +286,7 @@ PU1_AVAIL_2_LOOP_END:
VMOV.I16 Q1,#0 @const_min_clip = vdupq_n_s16(0)
VMOV.I16 Q2,#255 @const_max_clip = vdupq_n_u16((1 << bit_depth) - 1)
VLD1.8 D6,[r6] @offset_tbl_u = vld1_s8(pi1_sao_offset_u)
- LDR r6,[sp,#0x110] @Loads pi1_sao_offset_v
+ LDR r6,[sp,#pi1_sao_v_offset] @Loads pi1_sao_offset_v
VLD1.8 D7,[r6] @offset_tbl_v = vld1_s8(pi1_sao_offset_v)
LDR r2, gi1_table_edge_idx_addr_5 @table pointer
ulbl5:
@@ -291,9 +301,9 @@ ulbl5:
BLE WD_16_HT_4_LOOP @If jump to WD_16_HT_4_LOOP
WIDTH_LOOP_16:
- LDR r7,[sp,#0x114] @Loads wd
+ LDR r7,[sp,#wd_offset] @Loads wd
CMP r6,r7 @col == wd
- LDR r5,[sp,#0x108] @Loads pu1_avail
+ LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail
LDRBEQ r8,[r5] @pu1_avail[0]
MOVNE r8,#-1
@@ -314,7 +324,7 @@ SKIP_AU1_MASK_VAL:
VLD1.8 D12,[r0]! @pu1_cur_row = vld1q_u8(pu1_src)
VLD1.8 D13,[r0] @pu1_cur_row = vld1q_u8(pu1_src)
SUB r0,#8
- ADD r5,sp,#0x4B @*au1_src_left_tmp
+ ADD r5,sp,#75 @*au1_src_left_tmp
SUBEQ r8,r0,r1 @pu1_src - src_strd
VMOV.I8 Q9,#0
@@ -326,15 +336,15 @@ SKIP_AU1_MASK_VAL:
SUB r8,#8
ADD r3,r3,#16
- LDR r4,[sp,#0x118] @Loads ht
+ LDR r4,[sp,#ht_offset] @Loads ht
VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row)
- LDR r7,[sp,#0x114] @Loads wd
+ LDR r7,[sp,#wd_offset] @Loads wd
SUB r7,r7,r6 @(wd - col)
VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row)
ADD r7,r7,#14 @15 + (wd - col)
- LDR r8,[sp,#0x100] @Loads *pu1_src
+ LDR r8,[sp,#212] @Loads *pu1_src
VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt))
ADD r7,r8,r7 @pu1_src[0 * src_strd + 15 + (wd - col)]
@@ -359,7 +369,7 @@ AU1_SRC_LEFT_LOOP:
LDRH r5,[r8,#2] @I
VMOV.16 D19[3],r5 @I vsetq_lane_u8
- LDR r11,[sp,#0x108] @I Loads pu1_avail
+ LDR r11,[sp,#pu1_avail_offset] @I Loads pu1_avail
LDRB r11,[r11,#2] @I pu1_avail[2]
VEXT.8 Q9,Q9,Q8,#14 @I pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14)
@@ -477,7 +487,7 @@ PU1_SRC_LOOP:
VCGT.U8 Q11,Q6,Q14 @II vcgtq_u8(pu1_cur_row, pu1_next_row_tmp)
BNE NEXT_ROW_POINTER_ASSIGNED_2 @III
- LDR r5,[sp,#0x108] @III Loads pu1_avail
+ LDR r5,[sp,#pu1_avail_offset] @III Loads pu1_avail
LDRB r5,[r5,#3] @III pu1_avail[3]
CMP r5,#0 @III
SUBNE r11,r4,#4 @III pu1_src[src_strd - 2]
@@ -597,7 +607,7 @@ NEXT_ROW_POINTER_ASSIGNED_2:
LDRB r9,[r0,#17] @load the value pu1_src_cpy[17 - src_strd]
BNE NEXT_ROW_POINTER_ASSIGNED_3
- LDR r5,[sp,#0x108] @Loads pu1_avail
+ LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail
LDRB r5,[r5,#3] @pu1_avail[3]
CMP r5,#0
SUBNE r8,r11,#4 @pu1_src[src_strd - 2]
@@ -657,13 +667,13 @@ NEXT_ROW_POINTER_ASSIGNED_3:
INNER_LOOP_DONE:
- LDR r8,[sp,#0x118] @Loads ht
+ LDR r8,[sp,#ht_offset] @Loads ht
VMOVN.I16 D20,Q10 @III vmovn_s16(pi2_tmp_cur_row.val[0])
- ADD r5,sp,#0x4B @*au1_src_left_tmp
+ ADD r5,sp,#75 @*au1_src_left_tmp
LSL r8,r8,#1
VMOVN.I16 D21,Q9 @III vmovn_s16(pi2_tmp_cur_row.val[1])
- LDR r11,[sp,#0x104] @Loads *pu1_src_left
+ LDR r11,[sp,#216] @Loads *pu1_src_left
SRC_LEFT_LOOP:
LDR r7,[r5],#4 @au1_src_left_tmp[row]
@@ -676,7 +686,7 @@ SRC_LEFT_LOOP:
CMP r6,#8 @Check whether residue remains
BLT RE_ASSINING_LOOP @Jump to re-assigning loop
- LDR r7,[sp,#0x114] @Loads wd
+ LDR r7,[sp,#wd_offset] @Loads wd
LDR r0,[sp,#0x02] @Loads *pu1_src
SUB r7,r7,r6
ADD r0,r0,r7
@@ -684,9 +694,9 @@ SRC_LEFT_LOOP:
BEQ WIDTH_RESIDUE @If residue remains jump to residue loop
WD_16_HT_4_LOOP:
- LDR r7,[sp,#0x114] @Loads wd
+ LDR r7,[sp,#wd_offset] @Loads wd
- LDR r5,[sp,#0x108] @Loads pu1_avail
+ LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail
CMP r6,r7 @col == wd
LDRBEQ r8,[r5] @pu1_avail[0]
@@ -716,17 +726,17 @@ SKIP_AU1_MASK_VAL_WD_16_HT_4:
VLD1.8 D10,[r8]! @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2)
VLD1.8 D11,[r8] @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2)
SUB r8,#8
- ADD r5,sp,#0x4B @*au1_src_left_tmp
+ ADD r5,sp,#75 @*au1_src_left_tmp
- LDR r4,[sp,#0x118] @Loads ht
+ LDR r4,[sp,#ht_offset] @Loads ht
VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row)
- LDR r7,[sp,#0x114] @Loads wd
+ LDR r7,[sp,#wd_offset] @Loads wd
SUB r7,r7,r6 @(wd - col)
VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row)
ADD r7,r7,#14 @15 + (wd - col)
- LDR r8,[sp,#0x100] @Loads *pu1_src
+ LDR r8,[sp,#212] @Loads *pu1_src
VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt))
ADD r7,r8,r7 @pu1_src[0 * src_strd + 15 + (wd - col)]
@@ -744,7 +754,7 @@ AU1_SRC_LEFT_LOOP_WD_16_HT_4:
PU1_SRC_LOOP_WD_16_HT_4:
ADD r9,r0,r1 @*pu1_src + src_strd
- LDR r5,[sp,#0x108] @Loads pu1_avail
+ LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail
VLD1.8 D16,[r9]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
VLD1.8 D17,[r9] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
SUB r9,#8
@@ -766,7 +776,7 @@ NEXT_ROW_POINTER_ASSIGNED_WD_16_HT_4:
CMP r7,r12
BLT SIGN_UP_CHANGE_WD_16_HT_4
- LDR r5,[sp,#0x108] @Loads pu1_avail
+ LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail
LDRB r5,[r5,#2] @pu1_avail[2]
CMP r5,#0
BNE SIGN_UP_CHANGE_DONE_WD_16_HT_4
@@ -839,9 +849,9 @@ SIGN_UP_CHANGE_DONE_WD_16_HT_4:
VST1.8 {Q14},[r0],r1 @vst1q_u8(pu1_src_cpy, pu1_cur_row)
BNE PU1_SRC_LOOP_WD_16_HT_4 @If not equal jump to PU1_SRC_LOOP_WD_16_HT_4
- LDR r8,[sp,#0x118] @Loads ht
- ADD r5,sp,#0x4B @*au1_src_left_tmp
- LDR r11,[sp,#0x104] @Loads *pu1_src_left
+ LDR r8,[sp,#ht_offset] @Loads ht
+ ADD r5,sp,#75 @*au1_src_left_tmp
+ LDR r11,[sp,#216] @Loads *pu1_src_left
SRC_LEFT_LOOP_WD_16_HT_4:
LDR r7,[r5],#4 @au1_src_left_tmp[row]
@@ -851,16 +861,16 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS r6,r6,#16 @Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP @Jump to re-assigning loop
- LDR r7,[sp,#0x114] @Loads wd
+ LDR r7,[sp,#wd_offset] @Loads wd
LDR r0,[sp,#0x02] @Loads *pu1_src
SUB r7,r7,r6
ADD r0,r0,r7
BGT WD_16_HT_4_LOOP @If not equal jump to width_loop
WIDTH_RESIDUE:
- LDR r7,[sp,#0x114] @Loads wd
+ LDR r7,[sp,#wd_offset] @Loads wd
- LDR r5,[sp,#0x108] @Loads pu1_avail
+ LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail
CMP r6,r7 @wd_residue == wd
LDRBEQ r8,[r5] @pu1_avail[0]
@@ -878,13 +888,13 @@ WIDTH_RESIDUE:
ADD r10,r10,#2 @pu1_src - src_strd + 2
VMOV.8 d8[6],r11 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15)
- ADD r5,sp,#0x4B @*au1_src_left_tmp
+ ADD r5,sp,#75 @*au1_src_left_tmp
- LDR r4,[sp,#0x118] @Loads ht
+ LDR r4,[sp,#ht_offset] @Loads ht
VMOV.8 d8[7],r11 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15)
- LDR r7,[sp,#0x114] @Loads wd
+ LDR r7,[sp,#wd_offset] @Loads wd
- LDR r8,[sp,#0x100] @Loads *pu1_src
+ LDR r8,[sp,#212] @Loads *pu1_src
VLD1.8 D10,[r10]! @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2)
VLD1.8 D11,[r10] @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2)
SUB r10,#8
@@ -917,7 +927,7 @@ PU1_SRC_LOOP_RESIDUE:
VLD1.8 D16,[r9]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
VLD1.8 D17,[r9] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
SUB r9,#8
- LDR r5,[sp,#0x108] @Loads pu1_avail
+ LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail
LDRB r5,[r5,#3] @pu1_avail[3]
ADD r8,r14,r11,LSL #1 @pu1_src_left_cpy[(ht_tmp - row) * 2]
@@ -940,7 +950,7 @@ NEXT_ROW_POINTER_ASSIGNED_RESIDUE:
VEXT.8 Q9,Q9,Q8,#14 @pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14)
BLT SIGN_UP_CHANGE_RESIDUE
- LDR r5,[sp,#0x108] @Loads pu1_avail
+ LDR r5,[sp,#pu1_avail_offset] @Loads pu1_avail
LDRB r5,[r5,#2] @pu1_avail[2]
CMP r5,#0
BNE SIGN_UP_CHANGE_DONE_RESIDUE
@@ -1007,10 +1017,10 @@ SIGN_UP_CHANGE_DONE_RESIDUE:
BNE PU1_SRC_LOOP_RESIDUE @If not equal jump to PU1_SRC_LOOP
- LDR r8,[sp,#0x118] @Loads ht
- ADD r5,sp,#0x4B @*au1_src_left_tmp
+ LDR r8,[sp,#ht_offset] @Loads ht
+ ADD r5,sp,#75 @*au1_src_left_tmp
- LDR r11,[sp,#0x104] @Loads *pu1_src_left
+ LDR r11,[sp,#216] @Loads *pu1_src_left
SRC_LEFT_LOOP_RESIDUE:
LDR r7,[r5],#4 @au1_src_left_tmp[row]
@@ -1020,10 +1030,10 @@ SRC_LEFT_LOOP_RESIDUE:
RE_ASSINING_LOOP:
- LDR r7,[sp,#0x114] @Loads wd
- LDR r8,[sp,#0x118] @Loads ht
+ LDR r7,[sp,#wd_offset] @Loads wd
+ LDR r8,[sp,#ht_offset] @Loads ht
- LDR r0,[sp,#0x100] @Loads *pu1_src
+ LDR r0,[sp,#212] @Loads *pu1_src
SUB r10,r7,#2 @wd - 2
LDRH r9,[sp,#6]
@@ -1032,7 +1042,7 @@ RE_ASSINING_LOOP:
STRH r9,[r0,r10] @pu1_src_org[0] = u1_pos_0_0_tmp
MLA r6,r8,r1,r0 @pu1_src[(ht - 1) * src_strd]
- LDR r4,[sp,#0xFC] @Loads pu1_src_top_left
+ LDR r4,[sp,#pu1_src_top_left_offset] @Loads pu1_src_top_left
LDRH r9,[sp,#8]
ADD r12,sp,#10
@@ -1041,7 +1051,7 @@ RE_ASSINING_LOOP:
LDRH r10,[sp] @load u1_src_top_left_tmp from stack pointer
STRH r10,[r4] @*pu1_src_top_left = u1_src_top_left_tmp
- LDR r3,[sp,#0x10C] @Loads pu1_src_top
+ LDR r3,[sp,#220] @Loads pu1_src_top
SRC_TOP_LOOP:
VLD1.8 D0,[r12]! @pu1_src_top[col] = au1_src_top_tmp[col]
@@ -1050,7 +1060,8 @@ SRC_TOP_LOOP:
BNE SRC_TOP_LOOP
END_LOOPS:
- ADD sp,sp,#0xD4
+ ADD sp,sp,#224
+ vpop {d8 - d15}
LDMFD sp!,{r4-r12,r15} @Reload the registers from SP