summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarish Mahendrakar <harish.mahendrakar@ittiam.com>2016-05-20 16:41:17 +0530
committerMSe <mse1969@posteo.de>2018-01-10 20:56:46 +0100
commit6a73e1e1443309687799cff7c958ee2386ca19ae (patch)
tree8609192506c1bac25fe09cdec0be3baf5fb84b89
parent57e131c5cda561dcb8688e23452134bebb3baf4e (diff)
downloadandroid_external_libhevc-6a73e1e1443309687799cff7c958ee2386ca19ae.tar.gz
android_external_libhevc-6a73e1e1443309687799cff7c958ee2386ca19ae.tar.bz2
android_external_libhevc-6a73e1e1443309687799cff7c958ee2386ca19ae.zip
Fixed few issues in SAO arm assemblies
There were few mismatches seen because of wrong clipping and wrong increments in SAO assemblies Change-Id: I8ab28d847b1708b6949eac514f99e475e792cde1
-rw-r--r--common/arm/ihevc_sao_edge_offset_class2_chroma.s4
-rw-r--r--common/arm/ihevc_sao_edge_offset_class3.s4
-rw-r--r--common/arm/ihevc_sao_edge_offset_class3_chroma.s4
-rw-r--r--common/arm64/ihevc_sao_edge_offset_class2.s6
-rw-r--r--common/arm64/ihevc_sao_edge_offset_class2_chroma.s16
-rw-r--r--common/arm64/ihevc_sao_edge_offset_class3.s10
-rw-r--r--common/arm64/ihevc_sao_edge_offset_class3_chroma.s16
7 files changed, 60 insertions, 0 deletions
diff --git a/common/arm/ihevc_sao_edge_offset_class2_chroma.s b/common/arm/ihevc_sao_edge_offset_class2_chroma.s
index b74a8f6..6a301cb 100644
--- a/common/arm/ihevc_sao_edge_offset_class2_chroma.s
+++ b/common/arm/ihevc_sao_edge_offset_class2_chroma.s
@@ -829,6 +829,10 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS r6,r6,#16 @Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP @Jump to re-assigning loop
+ LDR r7,[sp,#0x114] @Loads wd
+ LDR r0,[sp,#0x02] @Loads *pu1_src
+ SUB r7,r7,r6
+ ADD r0,r0,r7
BGT WD_16_HT_4_LOOP
diff --git a/common/arm/ihevc_sao_edge_offset_class3.s b/common/arm/ihevc_sao_edge_offset_class3.s
index de09d6c..f3482dc 100644
--- a/common/arm/ihevc_sao_edge_offset_class3.s
+++ b/common/arm/ihevc_sao_edge_offset_class3.s
@@ -691,6 +691,10 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS r6,r6,#16 @Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP @Jump to re-assigning loop
+ LDR r7,[sp,#0xD0] @Loads wd
+ LDR r0,[sp,#0x90] @Loads *pu1_src
+ SUB r7,r7,r6
+ ADD r0,r0,r7
BGT WD_16_HT_4_LOOP @If not equal jump to width_loop
diff --git a/common/arm/ihevc_sao_edge_offset_class3_chroma.s b/common/arm/ihevc_sao_edge_offset_class3_chroma.s
index 62f40d1..fe3b459 100644
--- a/common/arm/ihevc_sao_edge_offset_class3_chroma.s
+++ b/common/arm/ihevc_sao_edge_offset_class3_chroma.s
@@ -851,6 +851,10 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS r6,r6,#16 @Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP @Jump to re-assigning loop
+ LDR r7,[sp,#0x114] @Loads wd
+ LDR r0,[sp,#0x02] @Loads *pu1_src
+ SUB r7,r7,r6
+ ADD r0,r0,r7
BGT WD_16_HT_4_LOOP @If not equal jump to width_loop
WIDTH_RESIDUE:
diff --git a/common/arm64/ihevc_sao_edge_offset_class2.s b/common/arm64/ihevc_sao_edge_offset_class2.s
index 59eeadd..5494619 100644
--- a/common/arm64/ihevc_sao_edge_offset_class2.s
+++ b/common/arm64/ihevc_sao_edge_offset_class2.s
@@ -146,6 +146,9 @@ PU1_AVAIL_4_LOOP:
mov x20,#255
cmp x9,x20
csel x9, x20, x9, ge //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+ mov x20,#0
+ cmp x9,x20
+ csel x9, x20, x9, LT //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_7_LOOP:
LDRB w14,[x5,#7] //pu1_avail[7]
@@ -190,6 +193,9 @@ PU1_AVAIL_7_LOOP:
mov x20,#255
cmp x10,x20
csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+ mov x20,#0
+ cmp x10,x20
+ csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL:
MOV x12,x8 //Move ht
diff --git a/common/arm64/ihevc_sao_edge_offset_class2_chroma.s b/common/arm64/ihevc_sao_edge_offset_class2_chroma.s
index b430709..0a8a748 100644
--- a/common/arm64/ihevc_sao_edge_offset_class2_chroma.s
+++ b/common/arm64/ihevc_sao_edge_offset_class2_chroma.s
@@ -165,6 +165,9 @@ PU1_AVAIL_4_LOOP_U:
mov x20,#255
cmp x9,x20
csel x9, x20, x9, ge //u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+ mov x20,#0
+ cmp x9,x20
+ csel x9, x20, x9, LT //u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_4_LOOP_V:
@@ -201,6 +204,9 @@ PU1_AVAIL_4_LOOP_V:
mov x20,#255
cmp x10,x20
csel x10, x20, x10, ge //u1_pos_0_0_tmp_v = CLIP3(pu1_src[0] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
+ mov x20,#0
+ cmp x10,x20
+ csel x10, x20, x10, LT //u1_pos_0_0_tmp_v = CLIP3(pu1_src[0] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_7_LOOP_U:
STRB w10,[sp,#7]
@@ -249,6 +255,9 @@ PU1_AVAIL_7_LOOP_U:
mov x20,#255
cmp x10,x20
csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+ mov x20,#0
+ cmp x10,x20
+ csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_7_LOOP_V:
ADD x12,x12,#1
@@ -286,6 +295,9 @@ PU1_AVAIL_7_LOOP_V:
mov x20,#255
cmp x9,x20
csel x9, x20, x9, ge //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+ mov x20,#0
+ cmp x9,x20
+ csel x9, x20, x9, LT //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_3_LOOP:
STRB w10,[sp,#8]
@@ -924,6 +936,10 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS x6,x6,#16 //Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP //Jump to re-assigning loop
+ mov w7, w24 //Loads wd
+ mov x0, x27 //Loads *pu1_src
+ SUB x7,x7,x6
+ ADD x0,x0,x7
BGT WD_16_HT_4_LOOP
diff --git a/common/arm64/ihevc_sao_edge_offset_class3.s b/common/arm64/ihevc_sao_edge_offset_class3.s
index 9d4f26a..924861b 100644
--- a/common/arm64/ihevc_sao_edge_offset_class3.s
+++ b/common/arm64/ihevc_sao_edge_offset_class3.s
@@ -151,6 +151,9 @@ PU1_AVAIL_5_LOOP:
mov x20,#255
cmp x9,x20
csel x9, x20, x9, ge //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+ mov x20,#0
+ cmp x9,x20
+ csel x9, x20, x9, LT //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_6_LOOP:
LDRB w10,[x5,#6] //pu1_avail[6]
@@ -198,6 +201,9 @@ PU1_AVAIL_6_LOOP:
mov x20,#255
cmp x10,x20
csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+ mov x20,#0
+ cmp x10,x20
+ csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_3_LOOP:
MOV x21,x2
@@ -713,6 +719,10 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS x6,x6,#16 //Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP //Jump to re-assigning loop
+ MOV x7,x16 //Loads wd
+ MOV x0,x15 //Loads *pu1_src
+ SUB x7,x7,x6
+ ADD x0,x0,x7
BGT WD_16_HT_4_LOOP //If not equal jump to width_loop
diff --git a/common/arm64/ihevc_sao_edge_offset_class3_chroma.s b/common/arm64/ihevc_sao_edge_offset_class3_chroma.s
index 7c9dfd8..2e145af 100644
--- a/common/arm64/ihevc_sao_edge_offset_class3_chroma.s
+++ b/common/arm64/ihevc_sao_edge_offset_class3_chroma.s
@@ -160,6 +160,9 @@ PU1_AVAIL_5_LOOP_U:
mov x20,#255
cmp x9,x20
csel x9, x20, x9, ge //u1_pos_0_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+ mov x20,#0
+ cmp x9,x20
+ csel x9, x20, x9, LT //u1_pos_0_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_5_LOOP_V:
@@ -194,6 +197,9 @@ PU1_AVAIL_5_LOOP_V:
mov x20,#255
cmp x10,x20
csel x10, x20, x10, ge //u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
+ mov x20,#0
+ cmp x10,x20
+ csel x10, x20, x10, LT //u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_6_LOOP_U:
STRB w9,[sp,#6]
@@ -240,6 +246,9 @@ PU1_AVAIL_6_LOOP_U:
mov x20,#255
cmp x10,x20
csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+ mov x20,#0
+ cmp x10,x20
+ csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_6_LOOP_V:
ADD x12,x12,#1 //pu1_src[(ht - 1) * src_strd + 1]
@@ -276,6 +285,9 @@ PU1_AVAIL_6_LOOP_V:
mov x20,#255
cmp x9,x20
csel x9, x20, x9, ge //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+ mov x20,#0
+ cmp x9,x20
+ csel x9, x20, x9, LT //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_3_LOOP:
STRB w10,[sp,#8]
@@ -933,6 +945,10 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS x6,x6,#16 //Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP //Jump to re-assigning loop
+ mov w7, w24 //Loads wd
+ mov x0, x28 //Loads *pu1_src
+ SUB x7,x7,x6
+ ADD x0,x0,x7
BGT WD_16_HT_4_LOOP //If not equal jump to width_loop
WIDTH_RESIDUE: