diff options
author | Gary King <gking@nvidia.com> | 2010-02-08 19:26:16 -0800 |
---|---|---|
committer | Steve Kondik <shade@chemlab.org> | 2010-12-17 23:12:59 -0500 |
commit | 0f507849f7741adec36dedf22dbdff5f5db2bd7d (patch) | |
tree | a8c40edcc58760b0be48f060b6a859a8736c0d41 | |
parent | 180e60acf18dede10d7a100a099e850ef86fb2e6 (diff) | |
download | android_external_skia-0f507849f7741adec36dedf22dbdff5f5db2bd7d.tar.gz android_external_skia-0f507849f7741adec36dedf22dbdff5f5db2bd7d.tar.bz2 android_external_skia-0f507849f7741adec36dedf22dbdff5f5db2bd7d.zip |
skia: optimize S32A_D565 pixel loop for ARM CPUs w/o NEON
uses ARMv5 DSP instructions, explicit cache preloading and
fast-paths for fully-opaque pixels to improve rendering performance
Change-Id: I6a6aba39c0bd7b75808bcf7c198adb7414bb6441
-rw-r--r-- | src/opts/SkBlitRow_opts_arm.cpp | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/src/opts/SkBlitRow_opts_arm.cpp b/src/opts/SkBlitRow_opts_arm.cpp index 20e03cc582..503ada1c32 100644 --- a/src/opts/SkBlitRow_opts_arm.cpp +++ b/src/opts/SkBlitRow_opts_arm.cpp @@ -425,6 +425,75 @@ static void S32_D565_Blend_Dither_neon(uint16_t *dst, const SkPMColor *src, #define S32A_D565_Blend_PROC S32A_D565_Blend_neon #define S32_D565_Blend_Dither_PROC S32_D565_Blend_Dither_neon +#elif __ARM_ARCH__ >= 7 && !defined(SK_CPU_BENDIAN) +static void S32A_D565_Opaque_v7(uint16_t* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, int count, + U8CPU alpha, int /*x*/, int /*y*/) { + SkASSERT(255 == alpha); + + asm volatile ( + "1: \n\t" + "ldr r3, [%[src]], #4 \n\t" + "cmp r3, #0xff000000 \n\t" + "blo 2f \n\t" + "and r4, r3, #0x0000f8 \n\t" + "and r5, r3, #0x00fc00 \n\t" + "and r6, r3, #0xf80000 \n\t" + "pld [r1, #32] \n\t" + "lsl r3, r4, #8 \n\t" + "orr r3, r3, r5, lsr #5 \n\t" + "orr r3, r3, r6, lsr #19 \n\t" + "subs %[count], %[count], #1 \n\t" + "strh r3, [%[dst]], #2 \n\t" + "bne 1b \n\t" + "b 4f \n\t" + "2: \n\t" + "lsrs r7, r3, #24 \n\t" + "beq 3f \n\t" + "ldrh r4, [%[dst]] \n\t" + "rsb r7, r7, #255 \n\t" + "and r6, r4, #0x001f \n\t" + "ubfx r5, r4, #5, #6 \n\t" + "pld [r0, #16] \n\t" + "lsr r4, r4, #11 \n\t" + "smulbb r6, r6, r7 \n\t" + "smulbb r5, r5, r7 \n\t" + "smulbb r4, r4, r7 \n\t" + "ubfx r7, r3, #16, #8 \n\t" + "ubfx ip, r3, #8, #8 \n\t" + "and r3, r3, #0xff \n\t" + "add r6, r6, #16 \n\t" + "add r5, r5, #32 \n\t" + "add r4, r4, #16 \n\t" + "add r6, r6, r6, lsr #5 \n\t" + "add r5, r5, r5, lsr #6 \n\t" + "add r4, r4, r4, lsr #5 \n\t" + "add r6, r7, r6, lsr #5 \n\t" + "add r5, ip, r5, lsr #6 \n\t" + "add r4, r3, r4, lsr #5 \n\t" + "lsr r6, r6, #3 \n\t" + "and r5, r5, #0xfc \n\t" + "and r4, r4, #0xf8 \n\t" + "orr r6, r6, r5, lsl #3 \n\t" + "orr r4, r6, r4, lsl #8 \n\t" + "strh r4, [%[dst]], #2 \n\t" + "pld [r1, #32] \n\t" + "subs %[count], %[count], #1 \n\t" + "bne 1b \n\t" + "b 4f \n\t" + "3: \n\t" + "subs %[count], %[count], #1 \n\t" + "add %[dst], %[dst], #2 \n\t" + "bne 1b \n\t" + "4: \n\t" + : [dst] "+r" (dst), [src] "+r" (src), [count] "+r" (count) + : + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "ip" + ); +} +#define S32A_D565_Opaque_PROC S32A_D565_Opaque_v7 +#define S32A_D565_Blend_PROC NULL +#define S32_D565_Blend_Dither_PROC NULL #else #define S32A_D565_Blend_PROC NULL #define S32_D565_Blend_Dither_PROC NULL |