diff options
author | Harish Mahendrakar <harish.mahendrakar@ittiam.com> | 2016-05-20 16:41:17 +0530 |
---|---|---|
committer | MSe <mse1969@posteo.de> | 2018-01-10 20:56:46 +0100 |
commit | 6a73e1e1443309687799cff7c958ee2386ca19ae (patch) | |
tree | 8609192506c1bac25fe09cdec0be3baf5fb84b89 | |
parent | 57e131c5cda561dcb8688e23452134bebb3baf4e (diff) | |
download | android_external_libhevc-6a73e1e1443309687799cff7c958ee2386ca19ae.tar.gz android_external_libhevc-6a73e1e1443309687799cff7c958ee2386ca19ae.tar.bz2 android_external_libhevc-6a73e1e1443309687799cff7c958ee2386ca19ae.zip |
Fixed few issues in SAO arm assemblies
There were few mismatches seen because of wrong clipping and wrong increments
in SAO assemblies
Change-Id: I8ab28d847b1708b6949eac514f99e475e792cde1
-rw-r--r-- | common/arm/ihevc_sao_edge_offset_class2_chroma.s | 4 | ||||
-rw-r--r-- | common/arm/ihevc_sao_edge_offset_class3.s | 4 | ||||
-rw-r--r-- | common/arm/ihevc_sao_edge_offset_class3_chroma.s | 4 | ||||
-rw-r--r-- | common/arm64/ihevc_sao_edge_offset_class2.s | 6 | ||||
-rw-r--r-- | common/arm64/ihevc_sao_edge_offset_class2_chroma.s | 16 | ||||
-rw-r--r-- | common/arm64/ihevc_sao_edge_offset_class3.s | 10 | ||||
-rw-r--r-- | common/arm64/ihevc_sao_edge_offset_class3_chroma.s | 16 |
7 files changed, 60 insertions, 0 deletions
diff --git a/common/arm/ihevc_sao_edge_offset_class2_chroma.s b/common/arm/ihevc_sao_edge_offset_class2_chroma.s index b74a8f6..6a301cb 100644 --- a/common/arm/ihevc_sao_edge_offset_class2_chroma.s +++ b/common/arm/ihevc_sao_edge_offset_class2_chroma.s @@ -829,6 +829,10 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS r6,r6,#16 @Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP @Jump to re-assigning loop + LDR r7,[sp,#0x114] @Loads wd + LDR r0,[sp,#0x02] @Loads *pu1_src + SUB r7,r7,r6 + ADD r0,r0,r7 BGT WD_16_HT_4_LOOP diff --git a/common/arm/ihevc_sao_edge_offset_class3.s b/common/arm/ihevc_sao_edge_offset_class3.s index de09d6c..f3482dc 100644 --- a/common/arm/ihevc_sao_edge_offset_class3.s +++ b/common/arm/ihevc_sao_edge_offset_class3.s @@ -691,6 +691,10 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS r6,r6,#16 @Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP @Jump to re-assigning loop + LDR r7,[sp,#0xD0] @Loads wd + LDR r0,[sp,#0x90] @Loads *pu1_src + SUB r7,r7,r6 + ADD r0,r0,r7 BGT WD_16_HT_4_LOOP @If not equal jump to width_loop diff --git a/common/arm/ihevc_sao_edge_offset_class3_chroma.s b/common/arm/ihevc_sao_edge_offset_class3_chroma.s index 62f40d1..fe3b459 100644 --- a/common/arm/ihevc_sao_edge_offset_class3_chroma.s +++ b/common/arm/ihevc_sao_edge_offset_class3_chroma.s @@ -851,6 +851,10 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS r6,r6,#16 @Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP @Jump to re-assigning loop + LDR r7,[sp,#0x114] @Loads wd + LDR r0,[sp,#0x02] @Loads *pu1_src + SUB r7,r7,r6 + ADD r0,r0,r7 BGT WD_16_HT_4_LOOP @If not equal jump to width_loop WIDTH_RESIDUE: diff --git a/common/arm64/ihevc_sao_edge_offset_class2.s b/common/arm64/ihevc_sao_edge_offset_class2.s index 59eeadd..5494619 100644 --- a/common/arm64/ihevc_sao_edge_offset_class2.s +++ b/common/arm64/ihevc_sao_edge_offset_class2.s @@ -146,6 +146,9 @@ PU1_AVAIL_4_LOOP: mov x20,#255 cmp x9,x20 csel x9, x20, x9, ge //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x9,x20 + csel x9, x20, x9, LT //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_7_LOOP: LDRB w14,[x5,#7] //pu1_avail[7] @@ -190,6 +193,9 @@ PU1_AVAIL_7_LOOP: mov x20,#255 cmp x10,x20 csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x10,x20 + csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL: MOV x12,x8 //Move ht diff --git a/common/arm64/ihevc_sao_edge_offset_class2_chroma.s b/common/arm64/ihevc_sao_edge_offset_class2_chroma.s index b430709..0a8a748 100644 --- a/common/arm64/ihevc_sao_edge_offset_class2_chroma.s +++ b/common/arm64/ihevc_sao_edge_offset_class2_chroma.s @@ -165,6 +165,9 @@ PU1_AVAIL_4_LOOP_U: mov x20,#255 cmp x9,x20 csel x9, x20, x9, ge //u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x9,x20 + csel x9, x20, x9, LT //u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_4_LOOP_V: @@ -201,6 +204,9 @@ PU1_AVAIL_4_LOOP_V: mov x20,#255 cmp x10,x20 csel x10, x20, x10, ge //u1_pos_0_0_tmp_v = CLIP3(pu1_src[0] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x10,x20 + csel x10, x20, x10, LT //u1_pos_0_0_tmp_v = CLIP3(pu1_src[0] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_7_LOOP_U: STRB w10,[sp,#7] @@ -249,6 +255,9 @@ PU1_AVAIL_7_LOOP_U: mov x20,#255 cmp x10,x20 csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x10,x20 + csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_7_LOOP_V: ADD x12,x12,#1 @@ -286,6 +295,9 @@ PU1_AVAIL_7_LOOP_V: mov x20,#255 cmp x9,x20 csel x9, x20, x9, ge //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x9,x20 + csel x9, x20, x9, LT //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_3_LOOP: STRB w10,[sp,#8] @@ -924,6 +936,10 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS x6,x6,#16 //Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP //Jump to re-assigning loop + mov w7, w24 //Loads wd + mov x0, x27 //Loads *pu1_src + SUB x7,x7,x6 + ADD x0,x0,x7 BGT WD_16_HT_4_LOOP diff --git a/common/arm64/ihevc_sao_edge_offset_class3.s b/common/arm64/ihevc_sao_edge_offset_class3.s index 9d4f26a..924861b 100644 --- a/common/arm64/ihevc_sao_edge_offset_class3.s +++ b/common/arm64/ihevc_sao_edge_offset_class3.s @@ -151,6 +151,9 @@ PU1_AVAIL_5_LOOP: mov x20,#255 cmp x9,x20 csel x9, x20, x9, ge //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x9,x20 + csel x9, x20, x9, LT //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_6_LOOP: LDRB w10,[x5,#6] //pu1_avail[6] @@ -198,6 +201,9 @@ PU1_AVAIL_6_LOOP: mov x20,#255 cmp x10,x20 csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x10,x20 + csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_3_LOOP: MOV x21,x2 @@ -713,6 +719,10 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS x6,x6,#16 //Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP //Jump to re-assigning loop + MOV x7,x16 //Loads wd + MOV x0,x15 //Loads *pu1_src + SUB x7,x7,x6 + ADD x0,x0,x7 BGT WD_16_HT_4_LOOP //If not equal jump to width_loop diff --git a/common/arm64/ihevc_sao_edge_offset_class3_chroma.s b/common/arm64/ihevc_sao_edge_offset_class3_chroma.s index 7c9dfd8..2e145af 100644 --- a/common/arm64/ihevc_sao_edge_offset_class3_chroma.s +++ b/common/arm64/ihevc_sao_edge_offset_class3_chroma.s @@ -160,6 +160,9 @@ PU1_AVAIL_5_LOOP_U: mov x20,#255 cmp x9,x20 csel x9, x20, x9, ge //u1_pos_0_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x9,x20 + csel x9, x20, x9, LT //u1_pos_0_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_5_LOOP_V: @@ -194,6 +197,9 @@ PU1_AVAIL_5_LOOP_V: mov x20,#255 cmp x10,x20 csel x10, x20, x10, ge //u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x10,x20 + csel x10, x20, x10, LT //u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_6_LOOP_U: STRB w9,[sp,#6] @@ -240,6 +246,9 @@ PU1_AVAIL_6_LOOP_U: mov x20,#255 cmp x10,x20 csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x10,x20 + csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_6_LOOP_V: ADD x12,x12,#1 //pu1_src[(ht - 1) * src_strd + 1] @@ -276,6 +285,9 @@ PU1_AVAIL_6_LOOP_V: mov x20,#255 cmp x9,x20 csel x9, x20, x9, ge //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) + mov x20,#0 + cmp x9,x20 + csel x9, x20, x9, LT //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) PU1_AVAIL_3_LOOP: STRB w10,[sp,#8] @@ -933,6 +945,10 @@ SRC_LEFT_LOOP_WD_16_HT_4: SUBS x6,x6,#16 //Decrement the wd loop count by 16 BLE RE_ASSINING_LOOP //Jump to re-assigning loop + mov w7, w24 //Loads wd + mov x0, x28 //Loads *pu1_src + SUB x7,x7,x6 + ADD x0,x0,x7 BGT WD_16_HT_4_LOOP //If not equal jump to width_loop WIDTH_RESIDUE: |