1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
|
///*****************************************************************************
//*
//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
//*
//* Licensed under the Apache License, Version 2.0 (the "License");
//* you may not use this file except in compliance with the License.
//* You may obtain a copy of the License at:
//*
//* http://www.apache.org/licenses/LICENSE-2.0
//*
//* Unless required by applicable law or agreed to in writing, software
//* distributed under the License is distributed on an "AS IS" BASIS,
//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//* See the License for the specific language governing permissions and
//* limitations under the License.
//*
//*****************************************************************************/
///*******************************************************************************
//* @file
//* ihevc_deblk_luma_horz.s
//*
//* @brief
//* contains function definitions for inter prediction interpolation.
//* functions are coded using neon intrinsics and can be compiled using
//* rvct
//*
//* @author
//* anand s
//*
//* @par list of functions:
//*
//*
//* @remarks
//* none
//*
//void ihevc_deblk_chroma_horz(UWORD8 *pu1_src,
// WORD32 src_strd,
// WORD32 quant_param_p,
// WORD32 quant_param_q,
// WORD32 qp_offset_u,
// WORD32 qp_offset_v,
// WORD32 tc_offset_div2,
// WORD32 filter_flag_p,
// WORD32 filter_flag_q)
//
.text
.align 4
.include "ihevc_neon_macros.s"
.extern gai4_ihevc_qp_table
.extern gai4_ihevc_tc_table
.globl ihevc_deblk_chroma_horz_av8
.type ihevc_deblk_chroma_horz_av8, %function
ihevc_deblk_chroma_horz_av8:
sxtw x4,w4
sxtw x5,w5
sxtw x6,w6
ldr w9, [sp]
sxtw x9,w9
push_v_regs
stp x19, x20,[sp,#-16]!
mov x10, x4
mov x8, x7
mov x7, x5
mov x4, x6
sub x12,x0,x1
ld1 {v0.8b},[x0]
sub x5,x12,x1
add x6,x0,x1
add x1,x2,x3
uxtl v0.8h, v0.8b
ld1 {v2.8b},[x12]
add x2,x1,#1
ld1 {v4.8b},[x5]
ld1 {v16.8b},[x6]
adds x1,x10,x2,asr #1
uxtl v2.8h, v2.8b
adrp x3, :got:gai4_ihevc_qp_table
ldr x3, [x3, #:got_lo12:gai4_ihevc_qp_table]
bmi l1.3312
cmp x1,#0x39
bgt lbl78
ldr w1, [x3,x1,lsl #2]
lbl78:
sub x20,x1,#6
csel x1, x20, x1,gt
l1.3312:
adds x2,x7,x2,asr #1
uxtl v4.8h, v4.8b
bmi l1.3332
cmp x2,#0x39
bgt lbl85
ldr w2, [x3,x2,lsl #2]
lbl85:
sub x20,x2,#6
csel x2, x20, x2,gt
l1.3332:
add x1,x1,x4,lsl #1
sub v6.8h, v0.8h , v2.8h
add x3,x1,#2
cmp x3,#0x35
mov x20,#0x35
csel x1, x20, x1,gt
shl v6.8h, v6.8h,#2
uxtl v16.8h, v16.8b
bgt l1.3368
adds x3,x1,#2
add x20,x1,#2
csel x1, x20, x1,pl
mov x20,#0
csel x1, x20, x1,mi
l1.3368:
adrp x3, :got:gai4_ihevc_tc_table
ldr x3, [x3, #:got_lo12:gai4_ihevc_tc_table]
add v4.8h, v6.8h , v4.8h
add x2,x2,x4,lsl #1
sub v6.8h, v4.8h , v16.8h
add x4,x2,#2
ldr w1, [x3,x1,lsl #2]
cmp x4,#0x35
mov x20,#0x35
csel x2, x20, x2,gt
bgt l1.3412
adds x4,x2,#2
add x20,x2,#2
csel x2, x20, x2,pl
mov x20,#0
csel x2, x20, x2,mi
l1.3412:
ldr w2, [x3,x2,lsl #2]
cmp x8,#0
dup v31.8h,w2
dup v30.8h,w1
sub x20,x1,#0
neg x1, x20
srshr v6.8h, v6.8h,#3
dup v28.8h,w1
sub x20,x2,#0
neg x1, x20
zip1 v4.8h, v30.8h, v31.8h
dup v29.8h,w1
zip1 v18.8h, v28.8h, v29.8h
smin v16.8h, v6.8h , v4.8h
smax v4.8h, v18.8h , v16.8h
add v2.8h, v2.8h , v4.8h
sub v0.8h, v0.8h , v4.8h
sqxtun v2.8b, v2.8h
sqxtun v0.8b, v0.8h
beq l1.3528
st1 {v2.8b},[x12]
l1.3528:
cmp x9,#0
beq l1.3540
st1 {v0.8b},[x0]
l1.3540:
ldp x19, x20,[sp],#16
pop_v_regs
ret
|