clean up SkBlitRow_opts

SSE2 and NEON are common baseline instruction sets now, so there's no need to runtime detect support for these routines. I simplified the SSE and portable implementations while moving them. Cq-Include-Trybots: master.tryserver.blink:linux_trusty_blink_rel Change-Id: I34e96851735c8d7ad90198f3ac4bf86ff508f17c Reviewed-on: https://skia-review.googlesource.com/c/170220 Reviewed-by: Mike Klein <mtklein@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
author: Mike Klein <mtklein@google.com> 2018-11-09 12:09:36 -0500
committer: Skia Commit-Bot <skia-commit-bot@chromium.org> 2018-11-09 17:58:05 +0000
commit: 6a2c42f893ddb0141bef6e90c14fb68dda5ccb30 (patch)
tree: d6d183b0a1c9fa63a1723593a6ce8ec4b5d508d9 /src/opts/SkBlitRow_opts_arm_neon.cpp
parent: 7600cb35666131a6e03faaeb3cd0c872cc1ac111 (diff)
download: platform_external_skqp-6a2c42f893ddb0141bef6e90c14fb68dda5ccb30.tar.gz
platform_external_skqp-6a2c42f893ddb0141bef6e90c14fb68dda5ccb30.tar.bz2
platform_external_skqp-6a2c42f893ddb0141bef6e90c14fb68dda5ccb30.zip
1 files changed, 0 insertions, 200 deletions
diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp
deleted file mode 100644
index 23ea938dfb..0000000000
--- a/src/opts/SkBlitRow_opts_arm_neon.cpp
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Copyright 2012 The Android Open Source Project
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "SkBlitRow_opts_arm_neon.h"
-
-#include "SkBlitRow.h"
-#include "SkColorData.h"
-#include "SkMathPriv.h"
-#include "SkUTF.h"
-
-#include "SkColor_opts_neon.h"
-#include <arm_neon.h>
-
-/* Neon version of S32_Blend_BlitRow32()
- * portable version is in src/core/SkBlitRow_D32.cpp
- */
-void S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
-                              const SkPMColor* SK_RESTRICT src,
-                              int count, U8CPU alpha) {
-    SkASSERT(alpha <= 255);
-
-    if (count <= 0) {
-        return;
-    }
-
-    uint16_t src_scale = SkAlpha255To256(alpha);
-    uint16_t dst_scale = 256 - src_scale;
-
-    while (count >= 2) {
-        uint8x8_t vsrc, vdst, vres;
-        uint16x8_t vsrc_wide, vdst_wide;
-
-        /* These commented prefetches are a big win for count
-         * values > 64 on an A9 (Pandaboard) but hurt by 10% for count = 4.
-         * They also hurt a little (<5%) on an A15
-         */
-        //__builtin_prefetch(src+32);
-        //__builtin_prefetch(dst+32);
-
-        // Load
-        vsrc = vreinterpret_u8_u32(vld1_u32(src));
-        vdst = vreinterpret_u8_u32(vld1_u32(dst));
-
-        // Process src
-        vsrc_wide = vmovl_u8(vsrc);
-        vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale));
-
-        // Process dst
-        vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale));
-
-        // Combine
-        vdst_wide += vsrc_wide;
-        vres = vshrn_n_u16(vdst_wide, 8);
-
-        // Store
-        vst1_u32(dst, vreinterpret_u32_u8(vres));
-
-        src += 2;
-        dst += 2;
-        count -= 2;
-    }
-
-    if (count == 1) {
-        uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres;
-        uint16x8_t vsrc_wide, vdst_wide;
-
-        // Load
-        vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0));
-        vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0));
-
-        // Process
-        vsrc_wide = vmovl_u8(vsrc);
-        vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale));
-        vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale));
-        vdst_wide += vsrc_wide;
-        vres = vshrn_n_u16(vdst_wide, 8);
-
-        // Store
-        vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0);
-    }
-}
-
-#ifdef SK_CPU_ARM32
-void S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
-                         const SkPMColor* SK_RESTRICT src,
-                         int count, U8CPU alpha) {
-
-    SkASSERT(255 > alpha);
-
-    if (count <= 0) {
-        return;
-    }
-
-    unsigned alpha256 = SkAlpha255To256(alpha);
-
-    // First deal with odd counts
-    if (count & 1) {
-        uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres;
-        uint16x8_t vdst_wide, vsrc_wide;
-        unsigned dst_scale;
-
-        // Load
-        vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0));
-        vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0));
-
-        // Calc dst_scale
-        dst_scale = vget_lane_u8(vsrc, 3);
-        dst_scale = SkAlphaMulInv256(dst_scale, alpha256);
-
-        // Process src
-        vsrc_wide = vmovl_u8(vsrc);
-        vsrc_wide = vmulq_n_u16(vsrc_wide, alpha256);
-
-        // Process dst
-        vdst_wide = vmovl_u8(vdst);
-        vdst_wide = vmulq_n_u16(vdst_wide, dst_scale);
-
-        // Combine
-        vdst_wide += vsrc_wide;
-        vres = vshrn_n_u16(vdst_wide, 8);
-
-        vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0);
-        dst++;
-        src++;
-        count--;
-    }
-
-    if (count) {
-        uint8x8_t alpha_mask;
-        static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};
-        alpha_mask = vld1_u8(alpha_mask_setup);
-
-        do {
-
-            uint8x8_t vsrc, vdst, vres, vsrc_alphas;
-            uint16x8_t vdst_wide, vsrc_wide, vsrc_scale, vdst_scale;
-
-            __builtin_prefetch(src+32);
-            __builtin_prefetch(dst+32);
-
-            // Load
-            vsrc = vreinterpret_u8_u32(vld1_u32(src));
-            vdst = vreinterpret_u8_u32(vld1_u32(dst));
-
-            // Prepare src_scale
-            vsrc_scale = vdupq_n_u16(alpha256);
-
-            // Calc dst_scale
-            vsrc_alphas = vtbl1_u8(vsrc, alpha_mask);
-            vdst_scale = vmovl_u8(vsrc_alphas);
-            // Calculate SkAlphaMulInv256(vdst_scale, vsrc_scale).
-            // A 16-bit lane would overflow if we used 0xFFFF here,
-            // so use an approximation with 0xFF00 that is off by 1,
-            // and add back 1 after to get the correct value.
-            // This is valid if alpha256 <= 255.
-            vdst_scale = vmlsq_u16(vdupq_n_u16(0xFF00), vdst_scale, vsrc_scale);
-            vdst_scale = vsraq_n_u16(vdst_scale, vdst_scale, 8);
-            vdst_scale = vsraq_n_u16(vdupq_n_u16(1), vdst_scale, 8);
-
-            // Process src
-            vsrc_wide = vmovl_u8(vsrc);
-            vsrc_wide *= vsrc_scale;
-
-            // Process dst
-            vdst_wide = vmovl_u8(vdst);
-            vdst_wide *= vdst_scale;
-
-            // Combine
-            vdst_wide += vsrc_wide;
-            vres = vshrn_n_u16(vdst_wide, 8);
-
-            vst1_u32(dst, vreinterpret_u32_u8(vres));
-
-            src += 2;
-            dst += 2;
-            count -= 2;
-        } while(count);
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-
-#endif // #ifdef SK_CPU_ARM32
-
-///////////////////////////////////////////////////////////////////////////////
-
-const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = {
-    nullptr,   // S32_Opaque,
-    S32_Blend_BlitRow32_neon,        // S32_Blend,
-    nullptr,  // Ported to SkOpts
-#ifdef SK_CPU_ARM32
-    S32A_Blend_BlitRow32_neon        // S32A_Blend
-#else
-    nullptr
-#endif
-};
author	Mike Klein <mtklein@google.com>	2018-11-09 12:09:36 -0500
committer	Skia Commit-Bot <skia-commit-bot@chromium.org>	2018-11-09 17:58:05 +0000
commit	6a2c42f893ddb0141bef6e90c14fb68dda5ccb30 (patch)
tree	d6d183b0a1c9fa63a1723593a6ce8ec4b5d508d9 /src/opts/SkBlitRow_opts_arm_neon.cpp
parent	7600cb35666131a6e03faaeb3cd0c872cc1ac111 (diff)
download	platform_external_skqp-6a2c42f893ddb0141bef6e90c14fb68dda5ccb30.tar.gz platform_external_skqp-6a2c42f893ddb0141bef6e90c14fb68dda5ccb30.tar.bz2 platform_external_skqp-6a2c42f893ddb0141bef6e90c14fb68dda5ccb30.zip