summaryrefslogtreecommitdiffstats
path: root/common/arm/ihevc_deblk_chroma_vert.s
blob: 3962b28e087136b212d87abfe50f269d1137c4b0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
@/*****************************************************************************
@*
@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
@*
@* Licensed under the Apache License, Version 2.0 (the "License");
@* you may not use this file except in compliance with the License.
@* You may obtain a copy of the License at:
@*
@* http://www.apache.org/licenses/LICENSE-2.0
@*
@* Unless required by applicable law or agreed to in writing, software
@* distributed under the License is distributed on an "AS IS" BASIS,
@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@* See the License for the specific language governing permissions and
@* limitations under the License.
@*
@*****************************************************************************/
@/**
@/*******************************************************************************
@* @file
@*  ihevc_deblk_luma_vert.s
@*
@* @brief
@*  contains function definitions for inter prediction  interpolation.
@* functions are coded using neon  intrinsics and can be compiled using

@* rvct
@*
@* @author
@*  anand s
@*
@* @par list of functions:
@*
@*
@* @remarks
@*  none
@*
@*******************************************************************************/

.equ    qp_offset_u_offset,     40
.equ    qp_offset_v_offset,     44
.equ    tc_offset_div2_offset,  48
.equ    filter_p_offset,        52
.equ    filter_q_offset,        56

.text
.align 4





.extern gai4_ihevc_qp_table
.extern gai4_ihevc_tc_table
.globl ihevc_deblk_chroma_vert_a9q

gai4_ihevc_qp_table_addr:
.long gai4_ihevc_qp_table - ulbl1 - 8

gai4_ihevc_tc_table_addr:
.long gai4_ihevc_tc_table  - ulbl2 - 8

.type ihevc_deblk_chroma_vert_a9q, %function

ihevc_deblk_chroma_vert_a9q:
    push        {r4-r12,lr}
    sub         r8,r0,#4
    add         r2,r2,r3
    vld1.8      {d5},[r8],r1
    add         r2,r2,#1
    vld1.8      {d17},[r8],r1
    ldr         r7,[sp,#qp_offset_u_offset]
    vld1.8      {d16},[r8],r1
    ldr         r4,[sp,#filter_q_offset]
    vld1.8      {d4},[r8]
    ldr         r5,[sp,#tc_offset_div2_offset]
    vtrn.8      d5,d17
    adds        r3,r7,r2,asr #1
    vtrn.8      d16,d4
    ldr         r7,gai4_ihevc_qp_table_addr
ulbl1:
    add         r7,r7,pc
    ldr         r12,[sp,#filter_p_offset]
    ldr         r6,[sp,#qp_offset_v_offset]
    bmi         l1.2944
    cmp         r3,#0x39
    ldrle       r3,[r7,r3,lsl #2]
    subgt       r3,r3,#6
l1.2944:
    vtrn.16     d5,d16
    adds        r2,r6,r2,asr #1
    vtrn.16     d17,d4
    bmi         l1.2964
    cmp         r2,#0x39
    ldrle       r2,[r7,r2,lsl #2]
    subgt       r2,r2,#6
l1.2964:
    vtrn.32     d5,d17
    add         r3,r3,r5,lsl #1
    vtrn.32     d16,d4
    add         r6,r3,#2
    vmovl.u8    q9,d17
    cmp         r6,#0x35
    movgt       r3,#0x35
    bgt         l1.2996
    adds        r6,r3,#2
    addpl       r3,r3,#2
    movmi       r3,#0
l1.2996:
    vsubl.u8    q0,d17,d16
    ldr         r6,gai4_ihevc_tc_table_addr
ulbl2:
    add         r6,r6,pc
    vshl.i16    q0,q0,#2
    add         r2,r2,r5,lsl #1
    add         r5,r2,#2
    vaddw.u8    q0,q0,d5
    cmp         r5,#0x35
    ldr         r3,[r6,r3,lsl #2]
    vsubw.u8    q2,q0,d4
    movgt       r2,#0x35
    bgt         l1.3036
    adds        r5,r2,#2
    addpl       r2,r2,#2
    movmi       r2,#0
l1.3036:


    vrshr.s16   q3,q2,#3
    vdup.16     d2,r3
    ldr         r2,[r6,r2,lsl #2]
    rsb         r3,r3,#0
    cmp         r12,#0
    vdup.16     d3,r2
    rsb         r2,r2,#0
    vdup.16     d30,r3
    vdup.16     d31,r2


    vmin.s16    q2,q3,q1
    vmax.s16    q1,q15,q2

    vmovl.u8    q3,d16

    vadd.i16    q0,q3,q1
    vsub.i16    q1,q9,q1
    vqmovun.s16 d0,q0
    sub         r2,r0,#2
    vqmovun.s16 d1,q1
    vtrn.32     d0,d1
    vtrn.8      d0,d1
    beq         l1.3204

    vst1.16     {d0[0]},[r2],r1
    vst1.16     {d1[0]},[r2],r1
    vst1.16     {d0[1]},[r2],r1
    vst1.16     {d1[1]},[r2]
l1.3204:
    cmp         r4,#0
    beq         l1.3228
    vst1.16     {d0[2]},[r0],r1
    vst1.16     {d1[2]},[r0],r1
    vst1.16     {d0[3]},[r0],r1
    vst1.16     {d1[3]},[r0]
l1.3228:
    pop         {r4-r12,pc}