diff options
Diffstat (limited to 'src/opts')
-rw-r--r-- | src/opts/SkNx_neon.h | 120 | ||||
-rw-r--r-- | src/opts/SkNx_sse.h | 94 | ||||
-rw-r--r-- | src/opts/SkXfermode_opts_SSE2.cpp | 8 | ||||
-rw-r--r-- | src/opts/SkXfermode_opts_arm_neon.cpp | 1 |
4 files changed, 103 insertions, 120 deletions
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index da926e0b4c..b319807779 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -33,34 +33,7 @@ namespace { // See SkNx.h case 31: return op(v, 31); } return fVec template <> -class SkNb<2, 4> { -public: - SkNb(uint32x2_t vec) : fVec(vec) {} - - SkNb() {} - bool allTrue() const { return vget_lane_u32(fVec, 0) && vget_lane_u32(fVec, 1); } - bool anyTrue() const { return vget_lane_u32(fVec, 0) || vget_lane_u32(fVec, 1); } - - uint32x2_t fVec; -}; - -template <> -class SkNb<4, 4> { -public: - SkNb(uint32x4_t vec) : fVec(vec) {} - - SkNb() {} - bool allTrue() const { return vgetq_lane_u32(fVec, 0) && vgetq_lane_u32(fVec, 1) - && vgetq_lane_u32(fVec, 2) && vgetq_lane_u32(fVec, 3); } - bool anyTrue() const { return vgetq_lane_u32(fVec, 0) || vgetq_lane_u32(fVec, 1) - || vgetq_lane_u32(fVec, 2) || vgetq_lane_u32(fVec, 3); } - - uint32x4_t fVec; -}; - -template <> class SkNf<2, float> { - typedef SkNb<2, 4> Nb; public: SkNf(float32x2_t vec) : fVec(vec) {} @@ -93,12 +66,14 @@ public: #endif } - Nb operator == (const SkNf& o) const { return vceq_f32(fVec, o.fVec); } - Nb operator < (const SkNf& o) const { return vclt_f32(fVec, o.fVec); } - Nb operator > (const SkNf& o) const { return vcgt_f32(fVec, o.fVec); } - Nb operator <= (const SkNf& o) const { return vcle_f32(fVec, o.fVec); } - Nb operator >= (const SkNf& o) const { return vcge_f32(fVec, o.fVec); } - Nb operator != (const SkNf& o) const { return vmvn_u32(vceq_f32(fVec, o.fVec)); } + SkNf operator == (const SkNf& o) const { return vreinterpret_f32_u32(vceq_f32(fVec, o.fVec)); } + SkNf operator < (const SkNf& o) const { return vreinterpret_f32_u32(vclt_f32(fVec, o.fVec)); } + SkNf operator > (const SkNf& o) const { return vreinterpret_f32_u32(vcgt_f32(fVec, o.fVec)); } + SkNf operator <= (const SkNf& o) const { return vreinterpret_f32_u32(vcle_f32(fVec, o.fVec)); } + SkNf operator >= (const SkNf& o) const { return vreinterpret_f32_u32(vcge_f32(fVec, o.fVec)); } + SkNf operator != (const SkNf& o) const { + return vreinterpret_f32_u32(vmvn_u32(vceq_f32(fVec, o.fVec))); + } static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fVec); } @@ -126,25 +101,21 @@ public: return vget_lane_f32(fVec, k&1); } + bool allTrue() const { + auto v = vreinterpret_u32_f32(fVec); + return vget_lane_u32(v,0) && vget_lane_u32(v,1); + } + bool anyTrue() const { + auto v = vreinterpret_u32_f32(fVec); + return vget_lane_u32(v,0) || vget_lane_u32(v,1); + } + float32x2_t fVec; }; #if defined(SK_CPU_ARM64) template <> -class SkNb<2, 8> { -public: - SkNb(uint64x2_t vec) : fVec(vec) {} - - SkNb() {} - bool allTrue() const { return vgetq_lane_u64(fVec, 0) && vgetq_lane_u64(fVec, 1); } - bool anyTrue() const { return vgetq_lane_u64(fVec, 0) || vgetq_lane_u64(fVec, 1); } - - uint64x2_t fVec; -}; - -template <> class SkNf<2, double> { - typedef SkNb<2, 8> Nb; public: SkNf(float64x2_t vec) : fVec(vec) {} @@ -160,13 +131,14 @@ public: SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); } SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); } - Nb operator == (const SkNf& o) const { return vceqq_f64(fVec, o.fVec); } - Nb operator < (const SkNf& o) const { return vcltq_f64(fVec, o.fVec); } - Nb operator > (const SkNf& o) const { return vcgtq_f64(fVec, o.fVec); } - Nb operator <= (const SkNf& o) const { return vcleq_f64(fVec, o.fVec); } - Nb operator >= (const SkNf& o) const { return vcgeq_f64(fVec, o.fVec); } - Nb operator != (const SkNf& o) const { - return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(fVec, o.fVec)))); + // vreinterpretq_f64_u64 and vreinterpretq_f64_u32 don't seem to exist.... weird. + SkNf operator==(const SkNf& o) const { return (float64x2_t)(vceqq_f64(fVec, o.fVec)); } + SkNf operator <(const SkNf& o) const { return (float64x2_t)(vcltq_f64(fVec, o.fVec)); } + SkNf operator >(const SkNf& o) const { return (float64x2_t)(vcgtq_f64(fVec, o.fVec)); } + SkNf operator<=(const SkNf& o) const { return (float64x2_t)(vcleq_f64(fVec, o.fVec)); } + SkNf operator>=(const SkNf& o) const { return (float64x2_t)(vcgeq_f64(fVec, o.fVec)); } + SkNf operator != (const SkNf& o) const { + return (float64x2_t)(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(fVec, o.fVec)))); } static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.fVec); } @@ -202,6 +174,16 @@ public: return vgetq_lane_f64(fVec, k&1); } + // vreinterpretq_u64_f64 doesn't seem to exist.... weird. + bool allTrue() const { + auto v = (uint64x2_t)(fVec); + return vgetq_lane_u64(v,0) && vgetq_lane_u64(v,1); + } + bool anyTrue() const { + auto v = (uint64x2_t)(fVec); + return vgetq_lane_u64(v,0) || vgetq_lane_u64(v,1); + } + float64x2_t fVec; }; #endif//defined(SK_CPU_ARM64) @@ -235,7 +217,6 @@ public: template <> class SkNf<4, float> { - typedef SkNb<4, 4> Nb; public: SkNf(float32x4_t vec) : fVec(vec) {} @@ -270,12 +251,14 @@ public: #endif } - Nb operator == (const SkNf& o) const { return vceqq_f32(fVec, o.fVec); } - Nb operator < (const SkNf& o) const { return vcltq_f32(fVec, o.fVec); } - Nb operator > (const SkNf& o) const { return vcgtq_f32(fVec, o.fVec); } - Nb operator <= (const SkNf& o) const { return vcleq_f32(fVec, o.fVec); } - Nb operator >= (const SkNf& o) const { return vcgeq_f32(fVec, o.fVec); } - Nb operator != (const SkNf& o) const { return vmvnq_u32(vceqq_f32(fVec, o.fVec)); } + SkNf operator==(const SkNf& o) const { return vreinterpretq_f32_u32(vceqq_f32(fVec, o.fVec)); } + SkNf operator <(const SkNf& o) const { return vreinterpretq_f32_u32(vcltq_f32(fVec, o.fVec)); } + SkNf operator >(const SkNf& o) const { return vreinterpretq_f32_u32(vcgtq_f32(fVec, o.fVec)); } + SkNf operator<=(const SkNf& o) const { return vreinterpretq_f32_u32(vcleq_f32(fVec, o.fVec)); } + SkNf operator>=(const SkNf& o) const { return vreinterpretq_f32_u32(vcgeq_f32(fVec, o.fVec)); } + SkNf operator!=(const SkNf& o) const { + return vreinterpretq_f32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec))); + } static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.fVec); } @@ -303,6 +286,17 @@ public: return vgetq_lane_f32(fVec, k&3); } + bool allTrue() const { + auto v = vreinterpretq_u32_f32(fVec); + return vgetq_lane_u32(v,0) && vgetq_lane_u32(v,1) + && vgetq_lane_u32(v,2) && vgetq_lane_u32(v,3); + } + bool anyTrue() const { + auto v = vreinterpretq_u32_f32(fVec); + return vgetq_lane_u32(v,0) || vgetq_lane_u32(v,1) + || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); + } + float32x4_t fVec; }; @@ -363,12 +357,18 @@ public: SkNi operator - (const SkNi& o) const { return vsubq_u8(fVec, o.fVec); } static SkNi Min(const SkNi& a, const SkNi& b) { return vminq_u8(a.fVec, b.fVec); } + SkNi operator < (const SkNi& o) const { return vcltq_u8(fVec, o.fVec); } template <int k> uint8_t kth() const { SkASSERT(0 <= k && k < 15); return vgetq_lane_u8(fVec, k&16); } + SkNi thenElse(const SkNi& t, const SkNi& e) const { + return vorrq_u8(vandq_u8(t.fVec, fVec), + vbicq_u8(e.fVec, fVec)); + } + uint8x16_t fVec; }; diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index 12a4719d0c..9b4de700ee 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -12,46 +12,9 @@ namespace { // See SkNx.h -template <> -class SkNb<2, 4> { -public: - SkNb(const __m128i& vec) : fVec(vec) {} - - SkNb() {} - bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); } - bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); } - - __m128i fVec; -}; - -template <> -class SkNb<4, 4> { -public: - SkNb(const __m128i& vec) : fVec(vec) {} - - SkNb() {} - bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } - bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } - - __m128i fVec; -}; - -template <> -class SkNb<2, 8> { -public: - SkNb(const __m128i& vec) : fVec(vec) {} - - SkNb() {} - bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } - bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } - - __m128i fVec; -}; - template <> class SkNf<2, float> { - typedef SkNb<2, 4> Nb; public: SkNf(const __m128& vec) : fVec(vec) {} @@ -69,12 +32,12 @@ public: SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } - Nb operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); } - Nb operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps(fVec, o.fVec)); } - Nb operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); } - Nb operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); } - Nb operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); } - Nb operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); } + SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec); } + SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec); } + SkNf operator < (const SkNf& o) const { return _mm_cmplt_ps (fVec, o.fVec); } + SkNf operator > (const SkNf& o) const { return _mm_cmpgt_ps (fVec, o.fVec); } + SkNf operator <= (const SkNf& o) const { return _mm_cmple_ps (fVec, o.fVec); } + SkNf operator >= (const SkNf& o) const { return _mm_cmpge_ps (fVec, o.fVec); } static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); } @@ -93,12 +56,14 @@ public: return pun.fs[k&1]; } + bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fVec)) & 0xff); } + bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fVec)) & 0xff); } + __m128 fVec; }; template <> class SkNf<2, double> { - typedef SkNb<2, 8> Nb; public: SkNf(const __m128d& vec) : fVec(vec) {} @@ -114,12 +79,12 @@ public: SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); } SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); } - Nb operator == (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpeq_pd (fVec, o.fVec)); } - Nb operator != (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpneq_pd(fVec, o.fVec)); } - Nb operator < (const SkNf& o) const { return _mm_castpd_si128(_mm_cmplt_pd (fVec, o.fVec)); } - Nb operator > (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpgt_pd (fVec, o.fVec)); } - Nb operator <= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmple_pd (fVec, o.fVec)); } - Nb operator >= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpge_pd (fVec, o.fVec)); } + SkNf operator == (const SkNf& o) const { return _mm_cmpeq_pd (fVec, o.fVec); } + SkNf operator != (const SkNf& o) const { return _mm_cmpneq_pd(fVec, o.fVec); } + SkNf operator < (const SkNf& o) const { return _mm_cmplt_pd (fVec, o.fVec); } + SkNf operator > (const SkNf& o) const { return _mm_cmpgt_pd (fVec, o.fVec); } + SkNf operator <= (const SkNf& o) const { return _mm_cmple_pd (fVec, o.fVec); } + SkNf operator >= (const SkNf& o) const { return _mm_cmpge_pd (fVec, o.fVec); } static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.fVec); } @@ -138,6 +103,9 @@ public: return pun.ds[k&1]; } + bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castpd_si128(fVec)); } + bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castpd_si128(fVec)); } + __m128d fVec; }; @@ -181,7 +149,6 @@ public: template <> class SkNf<4, float> { - typedef SkNb<4, 4> Nb; public: SkNf(const __m128& vec) : fVec(vec) {} @@ -199,12 +166,12 @@ public: SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } - Nb operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); } - Nb operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps(fVec, o.fVec)); } - Nb operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); } - Nb operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); } - Nb operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); } - Nb operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); } + SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec); } + SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec); } + SkNf operator < (const SkNf& o) const { return _mm_cmplt_ps (fVec, o.fVec); } + SkNf operator > (const SkNf& o) const { return _mm_cmpgt_ps (fVec, o.fVec); } + SkNf operator <= (const SkNf& o) const { return _mm_cmple_ps (fVec, o.fVec); } + SkNf operator >= (const SkNf& o) const { return _mm_cmpge_ps (fVec, o.fVec); } static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); } @@ -223,6 +190,9 @@ public: return pun.fs[k&3]; } + bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castps_si128(fVec)); } + bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(fVec)); } + __m128 fVec; }; @@ -312,6 +282,11 @@ public: SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); } static SkNi Min(const SkNi& a, const SkNi& b) { return _mm_min_epu8(a.fVec, b.fVec); } + SkNi operator < (const SkNi& o) const { + // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use a signed compare. + auto flip = _mm_set1_epi8(char(0x80)); + return _mm_cmplt_epi8(_mm_xor_si128(flip, fVec), _mm_xor_si128(flip, o.fVec)); + } template <int k> uint8_t kth() const { SkASSERT(0 <= k && k < 16); @@ -320,6 +295,11 @@ public: return k % 2 == 0 ? pair : (pair >> 8); } + SkNi thenElse(const SkNi& t, const SkNi& e) const { + return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), + _mm_andnot_si128(fVec, e.fVec)); + } + __m128i fVec; }; diff --git a/src/opts/SkXfermode_opts_SSE2.cpp b/src/opts/SkXfermode_opts_SSE2.cpp index b92477094b..f8772808a7 100644 --- a/src/opts/SkXfermode_opts_SSE2.cpp +++ b/src/opts/SkXfermode_opts_SSE2.cpp @@ -515,15 +515,17 @@ void SkSSE2ProcCoeffXfermode::toString(SkString* str) const { SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, SkXfermode::Mode mode) { SkXfermodeProcSIMD proc = nullptr; - // TODO(mtklein): implement these Sk4px. switch (mode) { + // TODO(mtklein): Sk4pxXfermode has these now. Clean up. case SkProcCoeffXfermode::kOverlay_Mode: proc = overlay_modeproc_SSE2; break; case SkProcCoeffXfermode::kDarken_Mode: proc = darken_modeproc_SSE2; break; case SkProcCoeffXfermode::kLighten_Mode: proc = lighten_modeproc_SSE2; break; - case SkProcCoeffXfermode::kColorDodge_Mode: proc = colordodge_modeproc_SSE2; break; - case SkProcCoeffXfermode::kColorBurn_Mode: proc = colorburn_modeproc_SSE2; break; case SkProcCoeffXfermode::kHardLight_Mode: proc = hardlight_modeproc_SSE2; break; + + // TODO(mtklein): implement these with SkPMFloat. case SkProcCoeffXfermode::kSoftLight_Mode: proc = softlight_modeproc_SSE2; break; + case SkProcCoeffXfermode::kColorDodge_Mode: proc = colordodge_modeproc_SSE2; break; + case SkProcCoeffXfermode::kColorBurn_Mode: proc = colorburn_modeproc_SSE2; break; default: break; } return proc ? SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, (void*)proc)) : nullptr; diff --git a/src/opts/SkXfermode_opts_arm_neon.cpp b/src/opts/SkXfermode_opts_arm_neon.cpp index 1759429c57..205a00b4ce 100644 --- a/src/opts/SkXfermode_opts_arm_neon.cpp +++ b/src/opts/SkXfermode_opts_arm_neon.cpp @@ -1016,6 +1016,7 @@ SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, if (auto xfermode = SkCreate4pxXfermode(rec, mode)) { return xfermode; } + // TODO: Sk4pxXfermode now covers every mode found in this file. Delete them all! if (auto proc = gNEONXfermodeProcs[mode]) { return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, (void*)proc)); } |