summaryrefslogtreecommitdiffstats
path: root/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
blob: 6bebda24f92dad2c2158066667e4b2b1eea640a9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
;
;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
;  Use of this source code is governed by a BSD-style license and patent
;  grant that can be found in the LICENSE file in the root of the source
;  tree. All contributing project authors may be found in the AUTHORS
;  file in the root of the source tree.
;


    EXPORT |vp8_dequant_dc_idct_add_v6|

    AREA |.text|, CODE, READONLY

;void vp8_dequant_dc_idct_v6(short *input, short *dq, unsigned char *pred,
; unsigned char *dest, int pitch, int stride, int Dc)
; r0 = input
; r1 = dq
; r2 = pred
; r3 = dest
; sp + 36 = pitch  ; +4 = 40
; sp + 40 = stride  ; +4 = 44
; sp + 44 = Dc  ; +4 = 48


|vp8_dequant_dc_idct_add_v6| PROC
    stmdb   sp!, {r4-r11, lr}

    ldr     r6, [sp, #44]

    ldr     r4, [r0]                ;input
    ldr     r5, [r1], #4            ;dq

    sub     sp, sp, #4
    str     r3, [sp]

    smultt  r7, r4, r5

    ldr     r4, [r0, #4]            ;input
    ldr     r5, [r1], #4            ;dq

    strh    r6, [r0], #2
    strh    r7, [r0], #2

    smulbb  r6, r4, r5
    smultt  r7, r4, r5

    ldr     r4, [r0, #4]            ;input
    ldr     r5, [r1], #4            ;dq

    strh    r6, [r0], #2
    strh    r7, [r0], #2

    mov     r12, #3

vp8_dequant_dc_add_loop
    smulbb  r6, r4, r5
    smultt  r7, r4, r5

    ldr     r4, [r0, #4]            ;input
    ldr     r5, [r1], #4            ;dq

    strh    r6, [r0], #2
    strh    r7, [r0], #2

    smulbb  r6, r4, r5
    smultt  r7, r4, r5

    subs    r12, r12, #1

    ldrne   r4, [r0, #4]
    ldrne   r5, [r1], #4

    strh    r6, [r0], #2
    strh    r7, [r0], #2

    bne     vp8_dequant_dc_add_loop

    sub     r0, r0, #32
    mov     r1, r0

; short_idct4x4llm_v6_dual
    ldr     r3, cospi8sqrt2minus1
    ldr     r4, sinpi8sqrt2
    ldr     r6, [r0, #8]
    mov     r5, #2
vp8_dequant_dc_idct_loop1_v6
    ldr     r12, [r0, #24]
    ldr     r14, [r0, #16]
    smulwt  r9, r3, r6
    smulwb  r7, r3, r6
    smulwt  r10, r4, r6
    smulwb  r8, r4, r6
    pkhbt   r7, r7, r9, lsl #16
    smulwt  r11, r3, r12
    pkhbt   r8, r8, r10, lsl #16
    uadd16  r6, r6, r7
    smulwt  r7, r4, r12
    smulwb  r9, r3, r12
    smulwb  r10, r4, r12
    subs    r5, r5, #1
    pkhbt   r9, r9, r11, lsl #16
    ldr     r11, [r0], #4
    pkhbt   r10, r10, r7, lsl #16
    uadd16  r7, r12, r9
    usub16  r7, r8, r7
    uadd16  r6, r6, r10
    uadd16  r10, r11, r14
    usub16  r8, r11, r14
    uadd16  r9, r10, r6
    usub16  r10, r10, r6
    uadd16  r6, r8, r7
    usub16  r7, r8, r7
    str     r6, [r1, #8]
    ldrne   r6, [r0, #8]
    str     r7, [r1, #16]
    str     r10, [r1, #24]
    str     r9, [r1], #4
    bne     vp8_dequant_dc_idct_loop1_v6

    mov     r5, #2
    sub     r0, r1, #8
vp8_dequant_dc_idct_loop2_v6
    ldr     r6, [r0], #4
    ldr     r7, [r0], #4
    ldr     r8, [r0], #4
    ldr     r9, [r0], #4
    smulwt  r1, r3, r6
    smulwt  r12, r4, r6
    smulwt  lr, r3, r8
    smulwt  r10, r4, r8
    pkhbt   r11, r8, r6, lsl #16
    pkhbt   r1, lr, r1, lsl #16
    pkhbt   r12, r10, r12, lsl #16
    pkhtb   r6, r6, r8, asr #16
    uadd16  r6, r1, r6
    pkhbt   lr, r9, r7, lsl #16
    uadd16  r10, r11, lr
    usub16  lr, r11, lr
    pkhtb   r8, r7, r9, asr #16
    subs    r5, r5, #1
    smulwt  r1, r3, r8
    smulwb  r7, r3, r8
    smulwt  r11, r4, r8
    smulwb  r9, r4, r8
    pkhbt   r1, r7, r1, lsl #16
    uadd16  r8, r1, r8
    pkhbt   r11, r9, r11, lsl #16
    usub16  r1, r12, r8
    uadd16  r8, r11, r6
    ldr     r9, c0x00040004
    ldr     r12, [sp, #40]
    uadd16  r6, r10, r8
    usub16  r7, r10, r8
    uadd16  r7, r7, r9
    uadd16  r6, r6, r9
    uadd16  r10, r14, r1
    usub16  r1, r14, r1
    uadd16  r10, r10, r9
    uadd16  r1, r1, r9
    ldr     r11, [r2], r12
    mov     r8, r7, asr #3
    pkhtb   r9, r8, r10, asr #19
    mov     r8, r1, asr #3
    pkhtb   r8, r8, r6, asr #19
    uxtb16  lr, r11, ror #8
    qadd16  r9, r9, lr
    uxtb16  lr, r11
    qadd16  r8, r8, lr
    usat16  r9, #8, r9
    usat16  r8, #8, r8
    orr     r9, r8, r9, lsl #8
    ldr     r11, [r2], r12
    ldr     lr, [sp]
    ldr     r12, [sp, #44]
    mov     r7, r7, lsl #16
    mov     r1, r1, lsl #16
    mov     r10, r10, lsl #16
    mov     r6, r6, lsl #16
    mov     r7, r7, asr #3
    pkhtb   r7, r7, r10, asr #19
    mov     r1, r1, asr #3
    pkhtb   r1, r1, r6, asr #19
    uxtb16  r8, r11, ror #8
    qadd16  r7, r7, r8
    uxtb16  r8, r11
    qadd16  r1, r1, r8
    usat16  r7, #8, r7
    usat16  r1, #8, r1
    orr     r1, r1, r7, lsl #8
    str     r9, [lr], r12
    str     r1, [lr], r12
    str     lr, [sp]
    bne     vp8_dequant_dc_idct_loop2_v6

; vpx_memset
    sub     r0, r0, #32
    add     sp, sp, #4

    mov     r12, #0
    str     r12, [r0]
    str     r12, [r0, #4]
    str     r12, [r0, #8]
    str     r12, [r0, #12]
    str     r12, [r0, #16]
    str     r12, [r0, #20]
    str     r12, [r0, #24]
    str     r12, [r0, #28]

    ldmia   sp!, {r4 - r11, pc}
    ENDP    ; |vp8_dequant_dc_idct_add_v6|

; Constant Pool
cospi8sqrt2minus1 DCD 0x00004E7B
sinpi8sqrt2       DCD 0x00008A8C
c0x00040004       DCD 0x00040004

    END