diff options
| author | Harish Mahendrakar <harish.mahendrakar@ittiam.com> | 2017-11-07 22:03:40 +0000 |
|---|---|---|
| committer | android-build-merger <android-build-merger@google.com> | 2017-11-07 22:03:40 +0000 |
| commit | 20bbc546f4cec7ad2f294b5c282f1c1b87c6f2bf (patch) | |
| tree | c24682982ab31bf94e2d8df11992146276e89e64 /common | |
| parent | 289026aa8dab7661340e43393ed1889df4051d24 (diff) | |
| parent | 60d381a2da64c439ecc702c0c50fbb4340cd41a0 (diff) | |
| download | platform_external_libhevc-20bbc546f4cec7ad2f294b5c282f1c1b87c6f2bf.tar.gz platform_external_libhevc-20bbc546f4cec7ad2f294b5c282f1c1b87c6f2bf.tar.bz2 platform_external_libhevc-20bbc546f4cec7ad2f294b5c282f1c1b87c6f2bf.zip | |
Fixed few issues in SAO arm assemblies
am: 60d381a2da
Change-Id: Ie4871a5b34120cc8d623a48137caeebcffaf6bb2
Diffstat (limited to 'common')
| -rw-r--r-- | common/arm/ihevc_sao_edge_offset_class2_chroma.s | 4 | ||||
| -rw-r--r-- | common/arm/ihevc_sao_edge_offset_class3.s | 4 | ||||
| -rw-r--r-- | common/arm/ihevc_sao_edge_offset_class3_chroma.s | 4 | ||||
| -rw-r--r-- | common/arm64/ihevc_sao_edge_offset_class2.s | 6 | ||||
| -rw-r--r-- | common/arm64/ihevc_sao_edge_offset_class2_chroma.s | 16 | ||||
| -rw-r--r-- | common/arm64/ihevc_sao_edge_offset_class3.s | 10 | ||||
| -rw-r--r-- | common/arm64/ihevc_sao_edge_offset_class3_chroma.s | 16 |
7 files changed, 60 insertions, 0 deletions
diff --git a/common/arm/ihevc_sao_edge_offset_class2_chroma.s b/common/arm/ihevc_sao_edge_offset_class2_chroma.s index b74a8f6..6a301cb 100644 --- a/common/arm/ihevc_sao_edge_offset_class2_chroma.s +++ b/common/arm/ihevc_sao_edge_offset_class2_chroma.s @@ -829,6 +829,10 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS r6,r6,#16 @Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP @Jump to re-assigning loop + LDR r7,[sp,#0x114] @Loads wd + LDR r0,[sp,#0x02] @Loads *pu1_src + SUB r7,r7,r6 + ADD r0,r0,r7 BGT WD_16_HT_4_LOOP diff --git a/common/arm/ihevc_sao_edge_offset_class3.s b/common/arm/ihevc_sao_edge_offset_class3.s index de09d6c..f3482dc 100644 --- a/common/arm/ihevc_sao_edge_offset_class3.s +++ b/common/arm/ihevc_sao_edge_offset_class3.s @@ -691,6 +691,10 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS r6,r6,#16 @Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP @Jump to re-assigning loop + LDR r7,[sp,#0xD0] @Loads wd + LDR r0,[sp,#0x90] @Loads *pu1_src + SUB r7,r7,r6 + ADD r0,r0,r7 BGT WD_16_HT_4_LOOP @If not equal jump to width_loop diff --git a/common/arm/ihevc_sao_edge_offset_class3_chroma.s b/common/arm/ihevc_sao_edge_offset_class3_chroma.s index 62f40d1..fe3b459 100644 --- a/common/arm/ihevc_sao_edge_offset_class3_chroma.s +++ b/common/arm/ihevc_sao_edge_offset_class3_chroma.s @@ -851,6 +851,10 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS r6,r6,#16 @Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP @Jump to re-assigning loop + LDR r7,[sp,#0x114] @Loads wd + LDR r0,[sp,#0x02] @Loads *pu1_src + SUB r7,r7,r6 + ADD r0,r0,r7 BGT WD_16_HT_4_LOOP @If not equal jump to width_loop WIDTH_RESIDUE: diff --git a/common/arm64/ihevc_sao_edge_offset_class2.s b/common/arm64/ihevc_sao_edge_offset_class2.s index 59eeadd..5494619 100644 --- a/common/arm64/ihevc_sao_edge_offset_class2.s +++ b/common/arm64/ihevc_sao_edge_offset_class2.s @@ -146,6 +146,9 @@ PU1_AVAIL_4_LOOP: mov x20,#255 cmp x9,x20 csel x9, x20, x9, ge //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x9,x20 + csel x9, x20, x9, LT //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_7_LOOP: LDRB w14,[x5,#7] //pu1_avail[7] @@ -190,6 +193,9 @@ PU1_AVAIL_7_LOOP: mov x20,#255 cmp x10,x20 csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x10,x20 + csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL: MOV x12,x8 //Move ht diff --git a/common/arm64/ihevc_sao_edge_offset_class2_chroma.s b/common/arm64/ihevc_sao_edge_offset_class2_chroma.s index b430709..0a8a748 100644 --- a/common/arm64/ihevc_sao_edge_offset_class2_chroma.s +++ b/common/arm64/ihevc_sao_edge_offset_class2_chroma.s @@ -165,6 +165,9 @@ PU1_AVAIL_4_LOOP_U: mov x20,#255 cmp x9,x20 csel x9, x20, x9, ge //u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x9,x20 + csel x9, x20, x9, LT //u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_4_LOOP_V: @@ -201,6 +204,9 @@ PU1_AVAIL_4_LOOP_V: mov x20,#255 cmp x10,x20 csel x10, x20, x10, ge //u1_pos_0_0_tmp_v = CLIP3(pu1_src[0] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x10,x20 + csel x10, x20, x10, LT //u1_pos_0_0_tmp_v = CLIP3(pu1_src[0] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_7_LOOP_U: STRB w10,[sp,#7] @@ -249,6 +255,9 @@ PU1_AVAIL_7_LOOP_U: mov x20,#255 cmp x10,x20 csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x10,x20 + csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_7_LOOP_V: ADD x12,x12,#1 @@ -286,6 +295,9 @@ PU1_AVAIL_7_LOOP_V: mov x20,#255 cmp x9,x20 csel x9, x20, x9, ge //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x9,x20 + csel x9, x20, x9, LT //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_3_LOOP: STRB w10,[sp,#8] @@ -924,6 +936,10 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS x6,x6,#16 //Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP //Jump to re-assigning loop + mov w7, w24 //Loads wd + mov x0, x27 //Loads *pu1_src + SUB x7,x7,x6 + ADD x0,x0,x7 BGT WD_16_HT_4_LOOP diff --git a/common/arm64/ihevc_sao_edge_offset_class3.s b/common/arm64/ihevc_sao_edge_offset_class3.s index 9d4f26a..924861b 100644 --- a/common/arm64/ihevc_sao_edge_offset_class3.s +++ b/common/arm64/ihevc_sao_edge_offset_class3.s @@ -151,6 +151,9 @@ PU1_AVAIL_5_LOOP: mov x20,#255 cmp x9,x20 csel x9, x20, x9, ge //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x9,x20 + csel x9, x20, x9, LT //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_6_LOOP: LDRB w10,[x5,#6] //pu1_avail[6] @@ -198,6 +201,9 @@ PU1_AVAIL_6_LOOP: mov x20,#255 cmp x10,x20 csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x10,x20 + csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_3_LOOP: MOV x21,x2 @@ -713,6 +719,10 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS x6,x6,#16 //Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP //Jump to re-assigning loop + MOV x7,x16 //Loads wd + MOV x0,x15 //Loads *pu1_src + SUB x7,x7,x6 + ADD x0,x0,x7 BGT WD_16_HT_4_LOOP //If not equal jump to width_loop diff --git a/common/arm64/ihevc_sao_edge_offset_class3_chroma.s b/common/arm64/ihevc_sao_edge_offset_class3_chroma.s index 7c9dfd8..2e145af 100644 --- a/common/arm64/ihevc_sao_edge_offset_class3_chroma.s +++ b/common/arm64/ihevc_sao_edge_offset_class3_chroma.s @@ -160,6 +160,9 @@ PU1_AVAIL_5_LOOP_U: mov x20,#255 cmp x9,x20 csel x9, x20, x9, ge //u1_pos_0_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x9,x20 + csel x9, x20, x9, LT //u1_pos_0_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_5_LOOP_V: @@ -194,6 +197,9 @@ PU1_AVAIL_5_LOOP_V: mov x20,#255 cmp x10,x20 csel x10, x20, x10, ge //u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x10,x20 + csel x10, x20, x10, LT //u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_6_LOOP_U: STRB w9,[sp,#6] @@ -240,6 +246,9 @@ PU1_AVAIL_6_LOOP_U: mov x20,#255 cmp x10,x20 csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x10,x20 + csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_6_LOOP_V: ADD x12,x12,#1 //pu1_src[(ht - 1) * src_strd + 1] @@ -276,6 +285,9 @@ PU1_AVAIL_6_LOOP_V: mov x20,#255 cmp x9,x20 csel x9, x20, x9, ge //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x9,x20 + csel x9, x20, x9, LT //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_3_LOOP: STRB w10,[sp,#8] @@ -933,6 +945,10 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS x6,x6,#16 //Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP //Jump to re-assigning loop + mov w7, w24 //Loads wd + mov x0, x28 //Loads *pu1_src + SUB x7,x7,x6 + ADD x0,x0,x7 BGT WD_16_HT_4_LOOP //If not equal jump to width_loop WIDTH_RESIDUE: |
