From 286b1dd2bb77720f888a210243b6cc7a67cc1aca Mon Sep 17 00:00:00 2001 From: mtklein Date: Fri, 22 May 2015 10:54:39 -0700 Subject: Move Sk4px Xfermode code to a header so we can use it twice. - Once in SkXfermode as usual to pick up compile-time SSE and NEON - Once in SkXfermode_arm_neon to pick up run-time NEON This allows us to start cleaning up SkXfermode_arm_neon as we've done for SkXfermode_SSE2. I'm saving this catharsis for a day when I need it. The Sk4px xfermodes are generally faster than the existing NEON procs, so this should also have the side effect of a perf win there. This means our new Plus-AA code works for runtime NEON too. BUG=skia:3852 Review URL: https://codereview.chromium.org/1150313003 --- src/core/Sk4pxXfermode.h | 156 ++++++++++++++++++++++++++++++++++ src/core/SkXfermode.cpp | 144 +------------------------------ src/opts/SkXfermode_opts_arm_neon.cpp | 13 +-- 3 files changed, 167 insertions(+), 146 deletions(-) create mode 100644 src/core/Sk4pxXfermode.h diff --git a/src/core/Sk4pxXfermode.h b/src/core/Sk4pxXfermode.h new file mode 100644 index 0000000000..ff26436134 --- /dev/null +++ b/src/core/Sk4pxXfermode.h @@ -0,0 +1,156 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef Sk4pxXfermode_DEFINED +#define Sk4pxXfermode_DEFINED + +#include "Sk4px.h" + +// This file is possibly included into multiple .cpp files. +// Each gets its own independent instantiation by wrapping in an anonymous namespace. +namespace { + +#define XFERMODE(Name) \ + struct Name { \ + static Sk4px Xfer(const Sk4px&, const Sk4px&); \ + static const SkXfermode::Mode kMode = SkXfermode::k##Name##_Mode; \ + }; \ + inline Sk4px Name::Xfer(const Sk4px& s, const Sk4px& d) + +XFERMODE(Clear) { return Sk4px((SkPMColor)0); } +XFERMODE(Src) { return s; } +XFERMODE(Dst) { return d; } +XFERMODE(SrcIn) { return s.fastMulDiv255Round(d.alphas() ); } +XFERMODE(SrcOut) { return s.fastMulDiv255Round(d.alphas().inv()); } +XFERMODE(SrcOver) { return s + d.fastMulDiv255Round(s.alphas().inv()); } +XFERMODE(DstIn) { return SrcIn ::Xfer(d,s); } +XFERMODE(DstOut) { return SrcOut ::Xfer(d,s); } +XFERMODE(DstOver) { return SrcOver::Xfer(d,s); } + +// [ S * Da + (1 - Sa) * D] +XFERMODE(SrcATop) { + return Sk4px::Wide(s.mulWiden(d.alphas()) + d.mulWiden(s.alphas().inv())) + .div255RoundNarrow(); +} +XFERMODE(DstATop) { return SrcATop::Xfer(d,s); } +//[ S * (1 - Da) + (1 - Sa) * D ] +XFERMODE(Xor) { + return Sk4px::Wide(s.mulWiden(d.alphas().inv()) + d.mulWiden(s.alphas().inv())) + .div255RoundNarrow(); +} +// [S + D ] +XFERMODE(Plus) { return s.saturatedAdd(d); } +// [S * D ] +XFERMODE(Modulate) { return s.fastMulDiv255Round(d); } +// [S + D - S * D] +XFERMODE(Screen) { + // Doing the math as S + (1-S)*D or S + (D - S*D) means the add and subtract can be done + // in 8-bit space without overflow. S + (1-S)*D is a touch faster because inv() is cheap. + return s + d.fastMulDiv255Round(s.inv()); +} +XFERMODE(Multiply) { + return Sk4px::Wide(s.mulWiden(d.alphas().inv()) + + d.mulWiden(s.alphas().inv()) + + s.mulWiden(d)) + .div255RoundNarrow(); +} +// [ Sa + Da - Sa*Da, Sc + Dc - 2*min(Sc*Da, Dc*Sa) ] (And notice Sa*Da == min(Sa*Da, Da*Sa).) +XFERMODE(Difference) { + auto m = Sk4px::Wide(Sk16h::Min(s.mulWiden(d.alphas()), d.mulWiden(s.alphas()))) + .div255RoundNarrow(); + // There's no chance of underflow, and if we subtract m before adding s+d, no overflow. + return (s - m) + (d - m.zeroAlphas()); +} +// [ Sa + Da - Sa*Da, Sc + Dc - 2*Sc*Dc ] +XFERMODE(Exclusion) { + auto p = s.fastMulDiv255Round(d); + // There's no chance of underflow, and if we subtract p before adding src+dst, no overflow. + return (s - p) + (d - p.zeroAlphas()); +} + +#undef XFERMODE + +// A reasonable fallback mode for doing AA is to simply apply the transfermode first, +// then linearly interpolate the AA. +template +static Sk4px xfer_aa(const Sk4px& s, const Sk4px& d, const Sk16b& aa) { + Sk4px noAA = Mode::Xfer(s, d); + return Sk4px::Wide(noAA.mulWiden(aa) + d.mulWiden(Sk4px(aa).inv())) + .div255RoundNarrow(); +} + +// For some transfermodes we specialize AA, either for correctness or performance. +#ifndef SK_NO_SPECIALIZED_AA_XFERMODES + #define XFERMODE_AA(Name) \ + template <> Sk4px xfer_aa(const Sk4px& s, const Sk4px& d, const Sk16b& aa) + + // Plus' clamp needs to happen after AA. skia:3852 + XFERMODE_AA(Plus) { // [ clamp(D + AA*S) ] + // We implement this as D + Min(S*AA, (1-D)) to fit the arguments to Min in 16 bits. + return d + + Sk4px::Wide(Sk16h::Min(s.mulWiden(aa), d.inv().mul255Widen())).div255RoundNarrow(); + } + + #undef XFERMODE_AA +#endif + +template +class SkT4pxXfermode : public SkProcCoeffXfermode { +public: + static SkProcCoeffXfermode* Create(const ProcCoeff& rec) { + return SkNEW_ARGS(SkT4pxXfermode, (rec)); + } + + void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override { + if (NULL == aa) { + Sk4px::MapDstSrc(n, dst, src, [&](const Sk4px& dst4, const Sk4px& src4) { + return ProcType::Xfer(src4, dst4); + }); + } else { + Sk4px::MapDstSrcAlpha(n, dst, src, aa, + [&](const Sk4px& dst4, const Sk4px& src4, const Sk16b& alpha) { + return xfer_aa(src4, dst4, alpha); + }); + } + } + +private: + SkT4pxXfermode(const ProcCoeff& rec) : SkProcCoeffXfermode(rec, ProcType::kMode) {} + + typedef SkProcCoeffXfermode INHERITED; +}; + +static SkProcCoeffXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfermode::Mode mode) { +#if !defined(SK_CPU_ARM32) || defined(SK_ARM_HAS_NEON) + switch (mode) { + case SkXfermode::kClear_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kSrc_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDst_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kSrcOver_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDstOver_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kSrcIn_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDstIn_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kSrcOut_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDstOut_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kSrcATop_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDstATop_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kXor_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kPlus_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kModulate_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kScreen_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kMultiply_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDifference_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kExclusion_Mode: return SkT4pxXfermode::Create(rec); + default: break; + } +#endif + return nullptr; +} + +} // namespace + +#endif//Sk4pxXfermode_DEFINED diff --git a/src/core/SkXfermode.cpp b/src/core/SkXfermode.cpp index 99f83f9f3d..d8ca8f1e6e 100644 --- a/src/core/SkXfermode.cpp +++ b/src/core/SkXfermode.cpp @@ -9,7 +9,7 @@ #include "SkXfermode.h" #include "SkXfermode_opts_SSE2.h" #include "SkXfermode_proccoeff.h" -#include "Sk4px.h" +#include "Sk4pxXfermode.h" #include "SkColorPriv.h" #include "SkLazyPtr.h" #include "SkMathPriv.h" @@ -19,14 +19,6 @@ #include "SkUtilsArm.h" #include "SkWriteBuffer.h" -#if SK_CPU_X86 && SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SSE2 - #warning "SkXfermode will be much faster if you compile with support for SSE2." -#endif - -#if SK_CPU_X86 || defined(SK_ARM_HAS_NEON) - #define SK_USE_4PX_XFERMODES -#endif - #if !SK_ARM_NEON_IS_NONE #include "SkXfermode_opts_arm_neon.h" #endif @@ -1181,116 +1173,6 @@ void SkDstInXfermode::toString(SkString* str) const { /////////////////////////////////////////////////////////////////////////////// -#define XFERMODE(Name) \ - struct Name { \ - static Sk4px Xfer(const Sk4px&, const Sk4px&); \ - static const SkXfermode::Mode kMode = SkXfermode::k##Name##_Mode; \ - }; \ - inline Sk4px Name::Xfer(const Sk4px& s, const Sk4px& d) - -XFERMODE(Clear) { return Sk4px((SkPMColor)0); } -XFERMODE(Src) { return s; } -XFERMODE(Dst) { return d; } -XFERMODE(SrcIn) { return s.fastMulDiv255Round(d.alphas() ); } -XFERMODE(SrcOut) { return s.fastMulDiv255Round(d.alphas().inv()); } -XFERMODE(SrcOver) { return s + d.fastMulDiv255Round(s.alphas().inv()); } -XFERMODE(DstIn) { return SrcIn ::Xfer(d,s); } -XFERMODE(DstOut) { return SrcOut ::Xfer(d,s); } -XFERMODE(DstOver) { return SrcOver::Xfer(d,s); } - -// [ S * Da + (1 - Sa) * D] -XFERMODE(SrcATop) { - return Sk4px::Wide(s.mulWiden(d.alphas()) + d.mulWiden(s.alphas().inv())) - .div255RoundNarrow(); -} -XFERMODE(DstATop) { return SrcATop::Xfer(d,s); } -//[ S * (1 - Da) + (1 - Sa) * D ] -XFERMODE(Xor) { - return Sk4px::Wide(s.mulWiden(d.alphas().inv()) + d.mulWiden(s.alphas().inv())) - .div255RoundNarrow(); -} -// [S + D ] -XFERMODE(Plus) { return s.saturatedAdd(d); } -// [S * D ] -XFERMODE(Modulate) { return s.fastMulDiv255Round(d); } -// [S + D - S * D] -XFERMODE(Screen) { - // Doing the math as S + (1-S)*D or S + (D - S*D) means the add and subtract can be done - // in 8-bit space without overflow. S + (1-S)*D is a touch faster because inv() is cheap. - return s + d.fastMulDiv255Round(s.inv()); -} -XFERMODE(Multiply) { - return Sk4px::Wide(s.mulWiden(d.alphas().inv()) + - d.mulWiden(s.alphas().inv()) + - s.mulWiden(d)) - .div255RoundNarrow(); -} -// [ Sa + Da - Sa*Da, Sc + Dc - 2*min(Sc*Da, Dc*Sa) ] (And notice Sa*Da == min(Sa*Da, Da*Sa).) -XFERMODE(Difference) { - auto m = Sk4px::Wide(Sk16h::Min(s.mulWiden(d.alphas()), d.mulWiden(s.alphas()))) - .div255RoundNarrow(); - // There's no chance of underflow, and if we subtract m before adding s+d, no overflow. - return (s - m) + (d - m.zeroAlphas()); -} -// [ Sa + Da - Sa*Da, Sc + Dc - 2*Sc*Dc ] -XFERMODE(Exclusion) { - auto p = s.fastMulDiv255Round(d); - // There's no chance of underflow, and if we subtract p before adding src+dst, no overflow. - return (s - p) + (d - p.zeroAlphas()); -} - -#undef XFERMODE - -// A reasonable fallback mode for doing AA is to simply apply the transfermode first, -// then linearly interpolate the AA. -template -static Sk4px xfer_aa(const Sk4px& s, const Sk4px& d, const Sk16b& aa) { - Sk4px noAA = Mode::Xfer(s, d); - return Sk4px::Wide(noAA.mulWiden(aa) + d.mulWiden(Sk4px(aa).inv())) - .div255RoundNarrow(); -} - -// For some transfermodes we specialize AA, either for correctness or performance. -#ifndef SK_NO_SPECIALIZED_AA_XFERMODES - #define XFERMODE_AA(Name) \ - template <> Sk4px xfer_aa(const Sk4px& s, const Sk4px& d, const Sk16b& aa) - - // Plus' clamp needs to happen after AA. skia:3852 - XFERMODE_AA(Plus) { // [ clamp(D + AA*S) ] - // We implement this as D + Min(S*AA, (1-D)) to fit the arguments to Min in 16 bits. - return d + - Sk4px::Wide(Sk16h::Min(s.mulWiden(aa), d.inv().mul255Widen())).div255RoundNarrow(); - } - - #undef XFERMODE_AA -#endif - -template -class SkT4pxXfermode : public SkProcCoeffXfermode { -public: - static SkXfermode* Create(const ProcCoeff& rec) { - return SkNEW_ARGS(SkT4pxXfermode, (rec)); - } - - void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override { - if (NULL == aa) { - Sk4px::MapDstSrc(n, dst, src, [&](const Sk4px& dst4, const Sk4px& src4) { - return ProcType::Xfer(src4, dst4); - }); - } else { - Sk4px::MapDstSrcAlpha(n, dst, src, aa, - [&](const Sk4px& dst4, const Sk4px& src4, const Sk16b& alpha) { - return xfer_aa(src4, dst4, alpha); - }); - } - } - -private: - SkT4pxXfermode(const ProcCoeff& rec) : SkProcCoeffXfermode(rec, ProcType::kMode) {} - - typedef SkProcCoeffXfermode INHERITED; -}; - /////////////////////////////////////////////////////////////////////////////// class SkDstOutXfermode : public SkProcCoeffXfermode { @@ -1351,29 +1233,9 @@ SkXfermode* create_mode(int iMode) { rec.fProc = pp; } -#if defined(SK_USE_4PX_XFERMODES) - switch (mode) { - case SkXfermode::kClear_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kSrc_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kDst_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kSrcOver_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kDstOver_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kSrcIn_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kDstIn_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kSrcOut_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kDstOut_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kSrcATop_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kDstATop_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kXor_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kPlus_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kModulate_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kScreen_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kMultiply_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kDifference_Mode: return SkT4pxXfermode::Create(rec); - case SkXfermode::kExclusion_Mode: return SkT4pxXfermode::Create(rec); - default: break; + if (auto xfermode = SkCreate4pxXfermode(rec, mode)) { + return xfermode; } -#endif SkXfermode* xfer = NULL; diff --git a/src/opts/SkXfermode_opts_arm_neon.cpp b/src/opts/SkXfermode_opts_arm_neon.cpp index 12c2f57d4f..1759429c57 100644 --- a/src/opts/SkXfermode_opts_arm_neon.cpp +++ b/src/opts/SkXfermode_opts_arm_neon.cpp @@ -1,3 +1,5 @@ +// Copyright 2013 unknown + #include "SkXfermode.h" #include "SkXfermode_proccoeff.h" #include "SkColorPriv.h" @@ -5,6 +7,7 @@ #include #include "SkColor_opts_neon.h" #include "SkXfermode_opts_arm_neon.h" +#include "Sk4pxXfermode.h" #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) @@ -1010,11 +1013,11 @@ SK_COMPILE_ASSERT( SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, SkXfermode::Mode mode) { - - void* procSIMD = reinterpret_cast(gNEONXfermodeProcs[mode]); - - if (procSIMD != NULL) { - return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); + if (auto xfermode = SkCreate4pxXfermode(rec, mode)) { + return xfermode; + } + if (auto proc = gNEONXfermodeProcs[mode]) { + return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, (void*)proc)); } return NULL; } -- cgit v1.2.3