summaryrefslogtreecommitdiffstats
path: root/libvpx/vp8/common/ppc/recon_altivec.asm
blob: dd39e05a83663080a6f4f7b5ea210a55a0822862 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
;
;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
;  Use of this source code is governed by a BSD-style license
;  that can be found in the LICENSE file in the root of the source
;  tree. An additional intellectual property rights grant can be found
;  in the file PATENTS.  All contributing project authors may
;  be found in the AUTHORS file in the root of the source tree.
;


    .globl recon4b_ppc
    .globl recon2b_ppc
    .globl recon_b_ppc

.macro row_of16 Diff Pred Dst Stride
    lvx     v1,  0, \Pred           ;# v1 = pred = p0..p15
    addi    \Pred, \Pred, 16        ;# next pred
    vmrghb  v2, v0, v1              ;# v2 = 16-bit p0..p7
    lvx     v3,  0, \Diff           ;# v3 = d0..d7
    vaddshs v2, v2, v3              ;# v2 = r0..r7
    vmrglb  v1, v0, v1              ;# v1 = 16-bit p8..p15
    lvx     v3, r8, \Diff           ;# v3 = d8..d15
    addi    \Diff, \Diff, 32        ;# next diff
    vaddshs v3, v3, v1              ;# v3 = r8..r15
    vpkshus v2, v2, v3              ;# v2 = 8-bit r0..r15
    stvx    v2,  0, \Dst            ;# to dst
    add     \Dst, \Dst, \Stride     ;# next dst
.endm

    .text
    .align 2
;#  r3 = short *diff_ptr,
;#  r4 = unsigned char *pred_ptr,
;#  r5 = unsigned char *dst_ptr,
;#  r6 = int stride
recon4b_ppc:
    mfspr   r0, 256                     ;# get old VRSAVE
    stw     r0, -8(r1)                  ;# save old VRSAVE to stack
    oris    r0, r0, 0xf000
    mtspr   256,r0                      ;# set VRSAVE

    vxor    v0, v0, v0
    li      r8, 16

    row_of16 r3, r4, r5, r6
    row_of16 r3, r4, r5, r6
    row_of16 r3, r4, r5, r6
    row_of16 r3, r4, r5, r6

    lwz     r12, -8(r1)                 ;# restore old VRSAVE from stack
    mtspr   256, r12                    ;# reset old VRSAVE

    blr

.macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels
    lvx     v1,  0, \Pred       ;# v1 = pred = p0..p15
    vmrghb  v2, v0, v1          ;# v2 = 16-bit p0..p7
    lvx     v3,  0, \Diff       ;# v3 = d0..d7
    vaddshs v2, v2, v3          ;# v2 = r0..r7
    vmrglb  v1, v0, v1          ;# v1 = 16-bit p8..p15
    lvx     v3, r8, \Diff       ;# v2 = d8..d15
    vaddshs v3, v3, v1          ;# v3 = r8..r15
    vpkshus v2, v2, v3          ;# v3 = 8-bit r0..r15
    stvx    v2,  0, r10         ;# 2 rows to dst from buf
    lwz     r0, 0(r10)
.if \write_first_four_pels
    stw     r0, 0(\Dst)
    .else
    stwux   r0, \Dst, \Stride
.endif
    lwz     r0, 4(r10)
    stw     r0, 4(\Dst)
    lwz     r0, 8(r10)
    stwux   r0, \Dst, \Stride       ;# advance dst to next row
    lwz     r0, 12(r10)
    stw     r0, 4(\Dst)
.endm

    .align 2
;#  r3 = short *diff_ptr,
;#  r4 = unsigned char *pred_ptr,
;#  r5 = unsigned char *dst_ptr,
;#  r6 = int stride

recon2b_ppc:
    mfspr   r0, 256                     ;# get old VRSAVE
    stw     r0, -8(r1)                  ;# save old VRSAVE to stack
    oris    r0, r0, 0xf000
    mtspr   256,r0                      ;# set VRSAVE

    vxor    v0, v0, v0
    li      r8, 16

    la      r10, -48(r1)                ;# buf

    two_rows_of8 r3, r4, r5, r6, 1

    addi    r4, r4, 16;                 ;# next pred
    addi    r3, r3, 32;                 ;# next diff

    two_rows_of8 r3, r4, r5, r6, 0

    lwz     r12, -8(r1)                 ;# restore old VRSAVE from stack
    mtspr   256, r12                    ;# reset old VRSAVE

    blr

.macro get_two_diff_rows
    stw     r0, 0(r10)
    lwz     r0, 4(r3)
    stw     r0, 4(r10)
    lwzu    r0, 32(r3)
    stw     r0, 8(r10)
    lwz     r0, 4(r3)
    stw     r0, 12(r10)
    lvx     v3, 0, r10
.endm

    .align 2
;#  r3 = short *diff_ptr,
;#  r4 = unsigned char *pred_ptr,
;#  r5 = unsigned char *dst_ptr,
;#  r6 = int stride
recon_b_ppc:
    mfspr   r0, 256                     ;# get old VRSAVE
    stw     r0, -8(r1)                  ;# save old VRSAVE to stack
    oris    r0, r0, 0xf000
    mtspr   256,r0                      ;# set VRSAVE

    vxor    v0, v0, v0

    la      r10, -48(r1)    ;# buf

    lwz     r0, 0(r4)
    stw     r0, 0(r10)
    lwz     r0, 16(r4)
    stw     r0, 4(r10)
    lwz     r0, 32(r4)
    stw     r0, 8(r10)
    lwz     r0, 48(r4)
    stw     r0, 12(r10)

    lvx     v1,  0, r10;    ;# v1 = pred = p0..p15

    lwz r0, 0(r3)           ;# v3 = d0..d7

    get_two_diff_rows

    vmrghb  v2, v0, v1;     ;# v2 = 16-bit p0..p7
    vaddshs v2, v2, v3;     ;# v2 = r0..r7

    lwzu r0, 32(r3)         ;# v3 = d8..d15

    get_two_diff_rows

    vmrglb  v1, v0, v1;     ;# v1 = 16-bit p8..p15
    vaddshs v3, v3, v1;     ;# v3 = r8..r15

    vpkshus v2, v2, v3;     ;# v2 = 8-bit r0..r15
    stvx    v2,  0, r10;    ;# 16 pels to dst from buf

    lwz     r0, 0(r10)
    stw     r0, 0(r5)
    lwz     r0, 4(r10)
    stwux   r0, r5, r6
    lwz     r0, 8(r10)
    stwux   r0, r5, r6
    lwz     r0, 12(r10)
    stwx    r0, r5, r6

    lwz     r12, -8(r1)                 ;# restore old VRSAVE from stack
    mtspr   256, r12                    ;# reset old VRSAVE

    blr