diff options
author | Mike Klein <mtklein@google.com> | 2018-11-09 12:09:36 -0500 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2018-11-09 17:58:05 +0000 |
commit | 6a2c42f893ddb0141bef6e90c14fb68dda5ccb30 (patch) | |
tree | d6d183b0a1c9fa63a1723593a6ce8ec4b5d508d9 /src/opts | |
parent | 7600cb35666131a6e03faaeb3cd0c872cc1ac111 (diff) | |
download | platform_external_skqp-6a2c42f893ddb0141bef6e90c14fb68dda5ccb30.tar.gz platform_external_skqp-6a2c42f893ddb0141bef6e90c14fb68dda5ccb30.tar.bz2 platform_external_skqp-6a2c42f893ddb0141bef6e90c14fb68dda5ccb30.zip |
clean up SkBlitRow_opts
SSE2 and NEON are common baseline instruction sets now,
so there's no need to runtime detect support for these routines.
I simplified the SSE and portable implementations while moving them.
Cq-Include-Trybots: master.tryserver.blink:linux_trusty_blink_rel
Change-Id: I34e96851735c8d7ad90198f3ac4bf86ff508f17c
Reviewed-on: https://skia-review.googlesource.com/c/170220
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
Diffstat (limited to 'src/opts')
-rw-r--r-- | src/opts/SkBlitRow_opts_SSE2.cpp | 103 | ||||
-rw-r--r-- | src/opts/SkBlitRow_opts_SSE2.h | 21 | ||||
-rw-r--r-- | src/opts/SkBlitRow_opts_arm.cpp | 19 | ||||
-rw-r--r-- | src/opts/SkBlitRow_opts_arm_neon.cpp | 200 | ||||
-rw-r--r-- | src/opts/SkBlitRow_opts_arm_neon.h | 14 | ||||
-rw-r--r-- | src/opts/SkBlitRow_opts_none.cpp | 14 | ||||
-rw-r--r-- | src/opts/opts_check_x86.cpp | 19 |
7 files changed, 0 insertions, 390 deletions
diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp deleted file mode 100644 index 36cc3f4b19..0000000000 --- a/src/opts/SkBlitRow_opts_SSE2.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright 2012 The Android Open Source Project - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include <emmintrin.h> -#include "SkBitmapProcState_opts_SSE2.h" -#include "SkBlitRow_opts_SSE2.h" -#include "SkColorData.h" -#include "SkColor_opts_SSE2.h" -#include "SkMSAN.h" -#include "SkUTF.h" - -/* SSE2 version of S32_Blend_BlitRow32() - * portable version is in core/SkBlitRow_D32.cpp - */ -void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, - const SkPMColor* SK_RESTRICT src, - int count, U8CPU alpha) { - SkASSERT(alpha <= 255); - if (count <= 0) { - return; - } - - uint32_t src_scale = SkAlpha255To256(alpha); - - if (count >= 4) { - SkASSERT(((size_t)dst & 0x03) == 0); - while (((size_t)dst & 0x0F) != 0) { - *dst = SkPMLerp(*src, *dst, src_scale); - src++; - dst++; - count--; - } - - const __m128i *s = reinterpret_cast<const __m128i*>(src); - __m128i *d = reinterpret_cast<__m128i*>(dst); - - while (count >= 4) { - // Load 4 pixels each of src and dest. - __m128i src_pixel = _mm_loadu_si128(s); - __m128i dst_pixel = _mm_load_si128(d); - - __m128i result = SkPMLerp_SSE2(src_pixel, dst_pixel, src_scale); - _mm_store_si128(d, result); - s++; - d++; - count -= 4; - } - src = reinterpret_cast<const SkPMColor*>(s); - dst = reinterpret_cast<SkPMColor*>(d); - } - - while (count > 0) { - *dst = SkPMLerp(*src, *dst, src_scale); - src++; - dst++; - count--; - } -} - -void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, - const SkPMColor* SK_RESTRICT src, - int count, U8CPU alpha) { - SkASSERT(alpha <= 255); - if (count <= 0) { - return; - } - - if (count >= 4) { - while (((size_t)dst & 0x0F) != 0) { - *dst = SkBlendARGB32(*src, *dst, alpha); - src++; - dst++; - count--; - } - - const __m128i *s = reinterpret_cast<const __m128i*>(src); - __m128i *d = reinterpret_cast<__m128i*>(dst); - while (count >= 4) { - // Load 4 pixels each of src and dest. - __m128i src_pixel = _mm_loadu_si128(s); - __m128i dst_pixel = _mm_load_si128(d); - - __m128i result = SkBlendARGB32_SSE2(src_pixel, dst_pixel, alpha); - _mm_store_si128(d, result); - s++; - d++; - count -= 4; - } - src = reinterpret_cast<const SkPMColor*>(s); - dst = reinterpret_cast<SkPMColor*>(d); - } - - while (count > 0) { - *dst = SkBlendARGB32(*src, *dst, alpha); - src++; - dst++; - count--; - } -} diff --git a/src/opts/SkBlitRow_opts_SSE2.h b/src/opts/SkBlitRow_opts_SSE2.h deleted file mode 100644 index 826a5ccaaf..0000000000 --- a/src/opts/SkBlitRow_opts_SSE2.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright 2009 The Android Open Source Project - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SkBlitRow_opts_SSE2_DEFINED -#define SkBlitRow_opts_SSE2_DEFINED - -#include "SkBlitRow.h" - -void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, - const SkPMColor* SK_RESTRICT src, - int count, U8CPU alpha); - -void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, - const SkPMColor* SK_RESTRICT src, - int count, U8CPU alpha); - -#endif diff --git a/src/opts/SkBlitRow_opts_arm.cpp b/src/opts/SkBlitRow_opts_arm.cpp deleted file mode 100644 index 543640a57f..0000000000 --- a/src/opts/SkBlitRow_opts_arm.cpp +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright 2012 The Android Open Source Project - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "SkBlitRow.h" -#include "SkUtilsArm.h" - -#include "SkBlitRow_opts_arm_neon.h" - -extern const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm[] = { - nullptr, nullptr, nullptr, nullptr, -}; - -SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { - return SK_ARM_NEON_WRAP(sk_blitrow_platform_32_procs_arm)[flags]; -} diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp deleted file mode 100644 index 23ea938dfb..0000000000 --- a/src/opts/SkBlitRow_opts_arm_neon.cpp +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright 2012 The Android Open Source Project - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "SkBlitRow_opts_arm_neon.h" - -#include "SkBlitRow.h" -#include "SkColorData.h" -#include "SkMathPriv.h" -#include "SkUTF.h" - -#include "SkColor_opts_neon.h" -#include <arm_neon.h> - -/* Neon version of S32_Blend_BlitRow32() - * portable version is in src/core/SkBlitRow_D32.cpp - */ -void S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, - const SkPMColor* SK_RESTRICT src, - int count, U8CPU alpha) { - SkASSERT(alpha <= 255); - - if (count <= 0) { - return; - } - - uint16_t src_scale = SkAlpha255To256(alpha); - uint16_t dst_scale = 256 - src_scale; - - while (count >= 2) { - uint8x8_t vsrc, vdst, vres; - uint16x8_t vsrc_wide, vdst_wide; - - /* These commented prefetches are a big win for count - * values > 64 on an A9 (Pandaboard) but hurt by 10% for count = 4. - * They also hurt a little (<5%) on an A15 - */ - //__builtin_prefetch(src+32); - //__builtin_prefetch(dst+32); - - // Load - vsrc = vreinterpret_u8_u32(vld1_u32(src)); - vdst = vreinterpret_u8_u32(vld1_u32(dst)); - - // Process src - vsrc_wide = vmovl_u8(vsrc); - vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale)); - - // Process dst - vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale)); - - // Combine - vdst_wide += vsrc_wide; - vres = vshrn_n_u16(vdst_wide, 8); - - // Store - vst1_u32(dst, vreinterpret_u32_u8(vres)); - - src += 2; - dst += 2; - count -= 2; - } - - if (count == 1) { - uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres; - uint16x8_t vsrc_wide, vdst_wide; - - // Load - vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); - vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); - - // Process - vsrc_wide = vmovl_u8(vsrc); - vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale)); - vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale)); - vdst_wide += vsrc_wide; - vres = vshrn_n_u16(vdst_wide, 8); - - // Store - vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); - } -} - -#ifdef SK_CPU_ARM32 -void S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, - const SkPMColor* SK_RESTRICT src, - int count, U8CPU alpha) { - - SkASSERT(255 > alpha); - - if (count <= 0) { - return; - } - - unsigned alpha256 = SkAlpha255To256(alpha); - - // First deal with odd counts - if (count & 1) { - uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres; - uint16x8_t vdst_wide, vsrc_wide; - unsigned dst_scale; - - // Load - vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); - vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); - - // Calc dst_scale - dst_scale = vget_lane_u8(vsrc, 3); - dst_scale = SkAlphaMulInv256(dst_scale, alpha256); - - // Process src - vsrc_wide = vmovl_u8(vsrc); - vsrc_wide = vmulq_n_u16(vsrc_wide, alpha256); - - // Process dst - vdst_wide = vmovl_u8(vdst); - vdst_wide = vmulq_n_u16(vdst_wide, dst_scale); - - // Combine - vdst_wide += vsrc_wide; - vres = vshrn_n_u16(vdst_wide, 8); - - vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); - dst++; - src++; - count--; - } - - if (count) { - uint8x8_t alpha_mask; - static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; - alpha_mask = vld1_u8(alpha_mask_setup); - - do { - - uint8x8_t vsrc, vdst, vres, vsrc_alphas; - uint16x8_t vdst_wide, vsrc_wide, vsrc_scale, vdst_scale; - - __builtin_prefetch(src+32); - __builtin_prefetch(dst+32); - - // Load - vsrc = vreinterpret_u8_u32(vld1_u32(src)); - vdst = vreinterpret_u8_u32(vld1_u32(dst)); - - // Prepare src_scale - vsrc_scale = vdupq_n_u16(alpha256); - - // Calc dst_scale - vsrc_alphas = vtbl1_u8(vsrc, alpha_mask); - vdst_scale = vmovl_u8(vsrc_alphas); - // Calculate SkAlphaMulInv256(vdst_scale, vsrc_scale). - // A 16-bit lane would overflow if we used 0xFFFF here, - // so use an approximation with 0xFF00 that is off by 1, - // and add back 1 after to get the correct value. - // This is valid if alpha256 <= 255. - vdst_scale = vmlsq_u16(vdupq_n_u16(0xFF00), vdst_scale, vsrc_scale); - vdst_scale = vsraq_n_u16(vdst_scale, vdst_scale, 8); - vdst_scale = vsraq_n_u16(vdupq_n_u16(1), vdst_scale, 8); - - // Process src - vsrc_wide = vmovl_u8(vsrc); - vsrc_wide *= vsrc_scale; - - // Process dst - vdst_wide = vmovl_u8(vdst); - vdst_wide *= vdst_scale; - - // Combine - vdst_wide += vsrc_wide; - vres = vshrn_n_u16(vdst_wide, 8); - - vst1_u32(dst, vreinterpret_u32_u8(vres)); - - src += 2; - dst += 2; - count -= 2; - } while(count); - } -} - -/////////////////////////////////////////////////////////////////////////////// - -#endif // #ifdef SK_CPU_ARM32 - -/////////////////////////////////////////////////////////////////////////////// - -const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { - nullptr, // S32_Opaque, - S32_Blend_BlitRow32_neon, // S32_Blend, - nullptr, // Ported to SkOpts -#ifdef SK_CPU_ARM32 - S32A_Blend_BlitRow32_neon // S32A_Blend -#else - nullptr -#endif -}; diff --git a/src/opts/SkBlitRow_opts_arm_neon.h b/src/opts/SkBlitRow_opts_arm_neon.h deleted file mode 100644 index 815c2b7476..0000000000 --- a/src/opts/SkBlitRow_opts_arm_neon.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright 2012 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ -#ifndef SkBlitRow_opts_arm_neon_DEFINED -#define SkBlitRow_opts_arm_neon_DEFINED - -#include "SkBlitRow.h" - -extern const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[]; - -#endif diff --git a/src/opts/SkBlitRow_opts_none.cpp b/src/opts/SkBlitRow_opts_none.cpp deleted file mode 100644 index 289bb7e88c..0000000000 --- a/src/opts/SkBlitRow_opts_none.cpp +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright 2011 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "SkBlitRow.h" - -// Platform impl of Platform_procs with no overrides - -SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { - return nullptr; -} diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp index 4ef210ac02..82d2d47550 100644 --- a/src/opts/opts_check_x86.cpp +++ b/src/opts/opts_check_x86.cpp @@ -7,8 +7,6 @@ #include "SkBitmapProcState_opts_SSE2.h" #include "SkBitmapProcState_opts_SSSE3.h" -#include "SkBlitRow.h" -#include "SkBlitRow_opts_SSE2.h" #include "SkCpu.h" @@ -61,20 +59,3 @@ void SkBitmapProcState::platformProcs() { fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2; } } - -//////////////////////////////////////////////////////////////////////////////// - -static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = { - nullptr, // S32_Opaque, - S32_Blend_BlitRow32_SSE2, // S32_Blend, - nullptr, // Ported to SkOpts - S32A_Blend_BlitRow32_SSE2, // S32A_Blend, -}; - -SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { - if (SkCpu::Supports(SkCpu::SSE2)) { - return platform_32_procs_SSE2[flags]; - } else { - return nullptr; - } -} |