diff options
author | The Android Open Source Project <initial-contribution@android.com> | 2008-10-21 07:00:00 -0700 |
---|---|---|
committer | The Android Open Source Project <initial-contribution@android.com> | 2008-10-21 07:00:00 -0700 |
commit | 4f6e8d7a00cbeda1e70cc15be9c4af1018bdad53 (patch) | |
tree | 54fd1b2695a591d2306d41264df67c53077b752c /libpixelflinger/t32cb16blend.S | |
download | core-4f6e8d7a00cbeda1e70cc15be9c4af1018bdad53.tar.gz core-4f6e8d7a00cbeda1e70cc15be9c4af1018bdad53.tar.bz2 core-4f6e8d7a00cbeda1e70cc15be9c4af1018bdad53.zip |
Initial Contribution
Diffstat (limited to 'libpixelflinger/t32cb16blend.S')
-rw-r--r-- | libpixelflinger/t32cb16blend.S | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/libpixelflinger/t32cb16blend.S b/libpixelflinger/t32cb16blend.S new file mode 100644 index 000000000..d4b257981 --- /dev/null +++ b/libpixelflinger/t32cb16blend.S @@ -0,0 +1,171 @@ +/* libs/pixelflinger/t32cb16blend.S +** +** Copyright 2006, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + + + .text + .align + + .global scanline_t32cb16blend_arm + +// uses r6, r7, lr + +.macro pixel, DREG, SRC, FB, OFFSET + + // SRC = AARRGGBB + mov r7, \SRC, lsr #24 // sA + add r7, r7, r7, lsr #7 // sA + (sA >> 7) + rsb r7, r7, #0x100 // sA = 0x100 - (sA+(sA>>7)) + +1: + +.if \OFFSET + + // red + mov lr, \DREG, lsr #(\OFFSET + 6 + 5) + smulbb lr, r7, lr + mov r6, \SRC, lsr #3 + and r6, r6, #0x1F + add lr, r6, lr, lsr #8 + orr \FB, lr, lsl #(\OFFSET + 11) + + // green + and r6, \DREG, #(0x3F<<(\OFFSET + 5)) + smulbt r6, r7, r6 + mov lr, \SRC, lsr #(8+2) + and lr, lr, #0x3F + add r6, lr, r6, lsr #(5+8) + orr \FB, \FB, r6, lsl #(\OFFSET + 5) + + // blue + and lr, \DREG, #(0x1F << \OFFSET) + smulbt lr, r7, lr + mov r6, \SRC, lsr #(8+8+3) + and r6, r6, #0x1F + add lr, r6, lr, lsr #8 + orr \FB, \FB, lr, lsl #\OFFSET + +.else + + // red + mov lr, \DREG, lsr #(6+5) + and lr, lr, #0x1F + smulbb lr, r7, lr + mov r6, \SRC, lsr #3 + and r6, r6, #0x1F + add lr, r6, lr, lsr #8 + mov \FB, lr, lsl #11 + + // green + and r6, \DREG, #(0x3F<<5) + smulbb r6, r7, r6 + mov lr, \SRC, lsr #(8+2) + and lr, lr, #0x3F + add r6, lr, r6, lsr #(5+8) + orr \FB, \FB, r6, lsl #5 + + // blue + and lr, \DREG, #0x1F + smulbb lr, r7, lr + mov r6, \SRC, lsr #(8+8+3) + and r6, r6, #0x1F + add lr, r6, lr, lsr #8 + orr \FB, \FB, lr + +.endif + + .endm + + +// r0: dst ptr +// r1: src ptr +// r2: count +// r3: d +// r4: s0 +// r5: s1 +// r6: pixel +// r7: pixel +// r8: free +// r9: free +// r10: free +// r11: free +// r12: scratch +// r14: pixel + +scanline_t32cb16blend_arm: + stmfd sp!, {r4-r7, lr} + + pld [r0] + pld [r1] + + // align DST to 32 bits + tst r0, #0x3 + beq aligned + subs r2, r2, #1 + ldmlofd sp!, {r4-r7, lr} // return + bxlo lr + +last: + ldr r4, [r1], #4 + ldrh r3, [r0] + pixel r3, r4, r12, 0 + strh r12, [r0], #2 + +aligned: + subs r2, r2, #2 + blo 9f + + // The main loop is unrolled twice and process 4 pixels +8: ldmia r1!, {r4, r5} + // stream the source + pld [r1, #32] + add r0, r0, #4 + // it's all zero, skip this pixel + orrs r3, r4, r5 + beq 7f + + // load the destination + ldr r3, [r0, #-4] + // stream the destination + pld [r0, #32] + pixel r3, r4, r12, 0 + pixel r3, r5, r12, 16 + // effectively, we're getting write-combining by virtue of the + // cpu's write-back cache. + str r12, [r0, #-4] + + // 2nd iterration of the loop, don't stream anything + subs r2, r2, #2 + movlt r4, r5 + blt 9f + ldmia r1!, {r4, r5} + add r0, r0, #4 + orrs r3, r4, r5 + beq 7f + ldr r3, [r0, #-4] + pixel r3, r4, r12, 0 + pixel r3, r5, r12, 16 + str r12, [r0, #-4] + + +7: subs r2, r2, #2 + bhs 8b + mov r4, r5 + +9: adds r2, r2, #1 + ldmlofd sp!, {r4-r7, lr} // return + bxlo lr + b last |