summaryrefslogtreecommitdiffstats
path: root/common/arm64/ihevc_deblk_chroma_horz.s
blob: 70971420f887444e635d3f21e10bde060ddb6933 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
///*****************************************************************************
//*
//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
//*
//* Licensed under the Apache License, Version 2.0 (the "License");
//* you may not use this file except in compliance with the License.
//* You may obtain a copy of the License at:
//*
//* http://www.apache.org/licenses/LICENSE-2.0
//*
//* Unless required by applicable law or agreed to in writing, software
//* distributed under the License is distributed on an "AS IS" BASIS,
//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//* See the License for the specific language governing permissions and
//* limitations under the License.
//*
//*****************************************************************************/
///*******************************************************************************
//* @file
//*  ihevc_deblk_luma_horz.s
//*
//* @brief
//*  contains function definitions for inter prediction  interpolation.
//* functions are coded using neon  intrinsics and can be compiled using

//* rvct
//*
//* @author
//*  anand s
//*
//* @par list of functions:
//*
//*
//* @remarks
//*  none
//*
//void ihevc_deblk_chroma_horz(UWORD8 *pu1_src,
//                             WORD32 src_strd,
//                             WORD32 quant_param_p,
//                             WORD32 quant_param_q,
//                             WORD32 qp_offset_u,
//                             WORD32 qp_offset_v,
//                             WORD32 tc_offset_div2,
//                             WORD32 filter_flag_p,
//                             WORD32 filter_flag_q)
//

.text
.align 4
.include "ihevc_neon_macros.s"



.extern gai4_ihevc_qp_table
.extern gai4_ihevc_tc_table
.globl ihevc_deblk_chroma_horz_av8

.type ihevc_deblk_chroma_horz_av8, %function

ihevc_deblk_chroma_horz_av8:
    sxtw        x4,w4
    sxtw        x5,w5
    sxtw        x6,w6
    ldr         w9, [sp]
    sxtw        x9,w9
    push_v_regs
    stp         x19, x20,[sp,#-16]!
    mov         x10, x4
    mov         x8, x7
    mov         x7, x5
    mov         x4, x6

    sub         x12,x0,x1
    ld1         {v0.8b},[x0]
    sub         x5,x12,x1
    add         x6,x0,x1
    add         x1,x2,x3
    uxtl        v0.8h, v0.8b
    ld1         {v2.8b},[x12]
    add         x2,x1,#1
    ld1         {v4.8b},[x5]
    ld1         {v16.8b},[x6]
    adds        x1,x10,x2,asr #1
    uxtl        v2.8h, v2.8b
    adrp        x3, :got:gai4_ihevc_qp_table
    ldr         x3, [x3, #:got_lo12:gai4_ihevc_qp_table]
    bmi         l1.3312
    cmp         x1,#0x39
    bgt         lbl78
    ldr         w1, [x3,x1,lsl #2]
lbl78:
    sub         x20,x1,#6
    csel        x1, x20, x1,gt
l1.3312:
    adds        x2,x7,x2,asr #1
    uxtl        v4.8h, v4.8b
    bmi         l1.3332
    cmp         x2,#0x39
    bgt         lbl85
    ldr         w2, [x3,x2,lsl #2]
lbl85:
    sub         x20,x2,#6
    csel        x2, x20, x2,gt
l1.3332:
    add         x1,x1,x4,lsl #1
    sub         v6.8h,  v0.8h ,  v2.8h
    add         x3,x1,#2
    cmp         x3,#0x35
    mov         x20,#0x35
    csel        x1, x20, x1,gt
    shl         v6.8h, v6.8h,#2
    uxtl        v16.8h, v16.8b
    bgt         l1.3368
    adds        x3,x1,#2
    add         x20,x1,#2
    csel        x1, x20, x1,pl
    mov         x20,#0
    csel        x1, x20, x1,mi
l1.3368:
    adrp        x3, :got:gai4_ihevc_tc_table
    ldr         x3, [x3, #:got_lo12:gai4_ihevc_tc_table]
    add         v4.8h,  v6.8h ,  v4.8h
    add         x2,x2,x4,lsl #1
    sub         v6.8h,  v4.8h ,  v16.8h
    add         x4,x2,#2
    ldr         w1, [x3,x1,lsl #2]
    cmp         x4,#0x35
    mov         x20,#0x35
    csel        x2, x20, x2,gt
    bgt         l1.3412
    adds        x4,x2,#2
    add         x20,x2,#2
    csel        x2, x20, x2,pl
    mov         x20,#0
    csel        x2, x20, x2,mi
l1.3412:


    ldr         w2, [x3,x2,lsl #2]
    cmp         x8,#0
    dup         v31.8h,w2
    dup         v30.8h,w1
    sub         x20,x1,#0
    neg         x1, x20
    srshr       v6.8h, v6.8h,#3
    dup         v28.8h,w1
    sub         x20,x2,#0
    neg         x1, x20
    zip1        v4.8h, v30.8h, v31.8h
    dup         v29.8h,w1

    zip1        v18.8h, v28.8h, v29.8h

    smin        v16.8h,  v6.8h ,  v4.8h
    smax        v4.8h,  v18.8h ,  v16.8h
    add         v2.8h,  v2.8h ,  v4.8h
    sub         v0.8h,  v0.8h ,  v4.8h
    sqxtun      v2.8b, v2.8h
    sqxtun      v0.8b, v0.8h
    beq         l1.3528
    st1         {v2.8b},[x12]
l1.3528:
    cmp         x9,#0
    beq         l1.3540
    st1         {v0.8b},[x0]
l1.3540:
    ldp         x19, x20,[sp],#16
    pop_v_regs
    ret