diff options
author | Mike Klein <mtklein@google.com> | 2018-11-09 12:09:36 -0500 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2018-11-09 17:58:05 +0000 |
commit | 6a2c42f893ddb0141bef6e90c14fb68dda5ccb30 (patch) | |
tree | d6d183b0a1c9fa63a1723593a6ce8ec4b5d508d9 /src/opts/SkBlitRow_opts_arm_neon.cpp | |
parent | 7600cb35666131a6e03faaeb3cd0c872cc1ac111 (diff) | |
download | platform_external_skqp-6a2c42f893ddb0141bef6e90c14fb68dda5ccb30.tar.gz platform_external_skqp-6a2c42f893ddb0141bef6e90c14fb68dda5ccb30.tar.bz2 platform_external_skqp-6a2c42f893ddb0141bef6e90c14fb68dda5ccb30.zip |
clean up SkBlitRow_opts
SSE2 and NEON are common baseline instruction sets now,
so there's no need to runtime detect support for these routines.
I simplified the SSE and portable implementations while moving them.
Cq-Include-Trybots: master.tryserver.blink:linux_trusty_blink_rel
Change-Id: I34e96851735c8d7ad90198f3ac4bf86ff508f17c
Reviewed-on: https://skia-review.googlesource.com/c/170220
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
Diffstat (limited to 'src/opts/SkBlitRow_opts_arm_neon.cpp')
-rw-r--r-- | src/opts/SkBlitRow_opts_arm_neon.cpp | 200 |
1 files changed, 0 insertions, 200 deletions
diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp deleted file mode 100644 index 23ea938dfb..0000000000 --- a/src/opts/SkBlitRow_opts_arm_neon.cpp +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright 2012 The Android Open Source Project - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "SkBlitRow_opts_arm_neon.h" - -#include "SkBlitRow.h" -#include "SkColorData.h" -#include "SkMathPriv.h" -#include "SkUTF.h" - -#include "SkColor_opts_neon.h" -#include <arm_neon.h> - -/* Neon version of S32_Blend_BlitRow32() - * portable version is in src/core/SkBlitRow_D32.cpp - */ -void S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, - const SkPMColor* SK_RESTRICT src, - int count, U8CPU alpha) { - SkASSERT(alpha <= 255); - - if (count <= 0) { - return; - } - - uint16_t src_scale = SkAlpha255To256(alpha); - uint16_t dst_scale = 256 - src_scale; - - while (count >= 2) { - uint8x8_t vsrc, vdst, vres; - uint16x8_t vsrc_wide, vdst_wide; - - /* These commented prefetches are a big win for count - * values > 64 on an A9 (Pandaboard) but hurt by 10% for count = 4. - * They also hurt a little (<5%) on an A15 - */ - //__builtin_prefetch(src+32); - //__builtin_prefetch(dst+32); - - // Load - vsrc = vreinterpret_u8_u32(vld1_u32(src)); - vdst = vreinterpret_u8_u32(vld1_u32(dst)); - - // Process src - vsrc_wide = vmovl_u8(vsrc); - vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale)); - - // Process dst - vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale)); - - // Combine - vdst_wide += vsrc_wide; - vres = vshrn_n_u16(vdst_wide, 8); - - // Store - vst1_u32(dst, vreinterpret_u32_u8(vres)); - - src += 2; - dst += 2; - count -= 2; - } - - if (count == 1) { - uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres; - uint16x8_t vsrc_wide, vdst_wide; - - // Load - vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); - vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); - - // Process - vsrc_wide = vmovl_u8(vsrc); - vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale)); - vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale)); - vdst_wide += vsrc_wide; - vres = vshrn_n_u16(vdst_wide, 8); - - // Store - vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); - } -} - -#ifdef SK_CPU_ARM32 -void S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, - const SkPMColor* SK_RESTRICT src, - int count, U8CPU alpha) { - - SkASSERT(255 > alpha); - - if (count <= 0) { - return; - } - - unsigned alpha256 = SkAlpha255To256(alpha); - - // First deal with odd counts - if (count & 1) { - uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres; - uint16x8_t vdst_wide, vsrc_wide; - unsigned dst_scale; - - // Load - vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); - vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); - - // Calc dst_scale - dst_scale = vget_lane_u8(vsrc, 3); - dst_scale = SkAlphaMulInv256(dst_scale, alpha256); - - // Process src - vsrc_wide = vmovl_u8(vsrc); - vsrc_wide = vmulq_n_u16(vsrc_wide, alpha256); - - // Process dst - vdst_wide = vmovl_u8(vdst); - vdst_wide = vmulq_n_u16(vdst_wide, dst_scale); - - // Combine - vdst_wide += vsrc_wide; - vres = vshrn_n_u16(vdst_wide, 8); - - vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); - dst++; - src++; - count--; - } - - if (count) { - uint8x8_t alpha_mask; - static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; - alpha_mask = vld1_u8(alpha_mask_setup); - - do { - - uint8x8_t vsrc, vdst, vres, vsrc_alphas; - uint16x8_t vdst_wide, vsrc_wide, vsrc_scale, vdst_scale; - - __builtin_prefetch(src+32); - __builtin_prefetch(dst+32); - - // Load - vsrc = vreinterpret_u8_u32(vld1_u32(src)); - vdst = vreinterpret_u8_u32(vld1_u32(dst)); - - // Prepare src_scale - vsrc_scale = vdupq_n_u16(alpha256); - - // Calc dst_scale - vsrc_alphas = vtbl1_u8(vsrc, alpha_mask); - vdst_scale = vmovl_u8(vsrc_alphas); - // Calculate SkAlphaMulInv256(vdst_scale, vsrc_scale). - // A 16-bit lane would overflow if we used 0xFFFF here, - // so use an approximation with 0xFF00 that is off by 1, - // and add back 1 after to get the correct value. - // This is valid if alpha256 <= 255. - vdst_scale = vmlsq_u16(vdupq_n_u16(0xFF00), vdst_scale, vsrc_scale); - vdst_scale = vsraq_n_u16(vdst_scale, vdst_scale, 8); - vdst_scale = vsraq_n_u16(vdupq_n_u16(1), vdst_scale, 8); - - // Process src - vsrc_wide = vmovl_u8(vsrc); - vsrc_wide *= vsrc_scale; - - // Process dst - vdst_wide = vmovl_u8(vdst); - vdst_wide *= vdst_scale; - - // Combine - vdst_wide += vsrc_wide; - vres = vshrn_n_u16(vdst_wide, 8); - - vst1_u32(dst, vreinterpret_u32_u8(vres)); - - src += 2; - dst += 2; - count -= 2; - } while(count); - } -} - -/////////////////////////////////////////////////////////////////////////////// - -#endif // #ifdef SK_CPU_ARM32 - -/////////////////////////////////////////////////////////////////////////////// - -const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { - nullptr, // S32_Opaque, - S32_Blend_BlitRow32_neon, // S32_Blend, - nullptr, // Ported to SkOpts -#ifdef SK_CPU_ARM32 - S32A_Blend_BlitRow32_neon // S32A_Blend -#else - nullptr -#endif -}; |