diff options
| author | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 19:32:55 -0800 |
|---|---|---|
| committer | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 19:32:55 -0800 |
| commit | dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0 (patch) | |
| tree | 2ba8d1a0846d69b18f623515e8d9b5d9fe38b590 /libpixelflinger/fixed.cpp | |
| parent | e54eebbf1a908d65ee8cf80bab62821c05666d70 (diff) | |
| download | system_core-dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0.tar.gz system_core-dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0.tar.bz2 system_core-dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0.zip | |
auto import from //depot/cupcake/@135843
Diffstat (limited to 'libpixelflinger/fixed.cpp')
| -rw-r--r-- | libpixelflinger/fixed.cpp | 339 |
1 files changed, 339 insertions, 0 deletions
diff --git a/libpixelflinger/fixed.cpp b/libpixelflinger/fixed.cpp new file mode 100644 index 00000000..5b920628 --- /dev/null +++ b/libpixelflinger/fixed.cpp @@ -0,0 +1,339 @@ +/* libs/pixelflinger/fixed.cpp +** +** Copyright 2006, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +#include <stdio.h> + +#include <private/pixelflinger/ggl_context.h> +#include <private/pixelflinger/ggl_fixed.h> + + +// ------------------------------------------------------------------------ + +int32_t gglRecipQNormalized(int32_t x, int* exponent) +{ + const int32_t s = x>>31; + uint32_t a = s ? -x : x; + + // the result will overflow, so just set it to the biggest/inf value + if (ggl_unlikely(a <= 2LU)) { + *exponent = 0; + return s ? FIXED_MIN : FIXED_MAX; + } + + // Newton-Raphson iteration: + // x = r*(2 - a*r) + + const int32_t lz = gglClz(a); + a <<= lz; // 0.32 + uint32_t r = a; + // note: if a == 0x80000000, this means x was a power-of-2, in this + // case we don't need to compute anything. We get the reciprocal for + // (almost) free. + if (a != 0x80000000) { + r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a + // 0.32 + 2.30 = 2.62 -> 2.30 + // 2.30 + 2.30 = 4.60 -> 2.30 + r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30; + r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30; + } + + // shift right 1-bit to make room for the sign bit + *exponent = 30-lz-1; + r >>= 1; + return s ? -r : r; +} + +int32_t gglRecipQ(GGLfixed x, int q) +{ + int shift; + x = gglRecipQNormalized(x, &shift); + shift += 16-q; + x += 1L << (shift-1); // rounding + x >>= shift; + return x; +} + +// ------------------------------------------------------------------------ + +GGLfixed gglFastDivx(GGLfixed n, GGLfixed d) +{ + if ((d>>24) && ((d>>24)+1)) { + n >>= 8; + d >>= 8; + } + return gglMulx(n, gglRecip(d)); +} + +// ------------------------------------------------------------------------ + +static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = { + // 1/sqrt(x) with x = 1-N/16, N=[8...1] + 0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865 +}; + +GGLfixed gglSqrtRecipx(GGLfixed x) +{ + if (x == 0) return FIXED_MAX; + if (x == FIXED_ONE) return x; + const GGLfixed a = x; + const int32_t lz = gglClz(x); + x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7]; + const int32_t exp = lz - 16; + if (exp <= 0) x >>= -exp>>1; + else x <<= (exp>>1) + (exp & 1); + if (exp & 1) { + x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1; + } + // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x) + x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x))); + x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x))); + return x; +} + +GGLfixed gglSqrtx(GGLfixed a) +{ + // Compute a full precision square-root (24 bits accuracy) + GGLfixed r = 0; + GGLfixed bit = 0x800000; + int32_t bshift = 15; + do { + GGLfixed temp = bit + (r<<1); + if (bshift >= 8) temp <<= (bshift-8); + else temp >>= (8-bshift); + if (a >= temp) { + r += bit; + a -= temp; + } + bshift--; + } while (bit>>=1); + return r; +} + +// ------------------------------------------------------------------------ + +static const GGLfixed ggl_log_approx_tab[] = { + // -ln(x)/ln(2) with x = N/16, N=[8...16] + 0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000 +}; + +static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0] + 0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000 +}; + +GGLfixed gglPowx(GGLfixed x, GGLfixed y) +{ + // prerequisite: 0 <= x <= 1, and y >=0 + + // pow(x,y) = 2^(y*log2(x)) + // = 2^(y*log2(x*(2^exp)*(2^-exp)))) + // = 2^(y*(log2(X)-exp)) + // = 2^(log2(X)*y - y*exp) + // = 2^( - (-log2(X)*y + y*exp) ) + + int32_t exp = gglClz(x) - 16; + GGLfixed f = x << exp; + x = (f & 0x0FFF)<<4; + f = (f >> 12) & 0x7; + GGLfixed p = gglMulAddx( + ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x, + ggl_log_approx_tab[f]); + p = gglMulAddx(p, y, y*exp); + exp = gglFixedToIntFloor(p); + if (exp < 31) { + p = gglFracx(p); + x = (p & 0x1FFF)<<3; + p >>= 13; + p = gglMulAddx( + ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x, + ggl_alog_approx_tab[p]); + p >>= exp; + } else { + p = 0; + } + return p; + // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f; +} + +// ------------------------------------------------------------------------ + +int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) +{ + //int32_t r =int32_t((int64_t(n)<<i)/d); + const int32_t ds = n^d; + if (n<0) n = -n; + if (d<0) d = -d; + int nd = gglClz(d) - gglClz(n); + i += nd + 1; + if (nd > 0) d <<= nd; + else n <<= -nd; + uint32_t q = 0; + + int j = i & 7; + i >>= 3; + + // gcc deals with the code below pretty well. + // we get 3.75 cycles per bit in the main loop + // and 8 cycles per bit in the termination loop + if (ggl_likely(i)) { + n -= d; + do { + q <<= 8; + if (n>=0) q |= 128; + else n += d; + n = n*2 - d; + if (n>=0) q |= 64; + else n += d; + n = n*2 - d; + if (n>=0) q |= 32; + else n += d; + n = n*2 - d; + if (n>=0) q |= 16; + else n += d; + n = n*2 - d; + if (n>=0) q |= 8; + else n += d; + n = n*2 - d; + if (n>=0) q |= 4; + else n += d; + n = n*2 - d; + if (n>=0) q |= 2; + else n += d; + n = n*2 - d; + if (n>=0) q |= 1; + else n += d; + + if (--i == 0) + goto finish; + + n = n*2 - d; + } while(true); + do { + q <<= 1; + n = n*2 - d; + if (n>=0) q |= 1; + else n += d; + finish: ; + } while (j--); + return (ds<0) ? -q : q; + } + + n -= d; + if (n>=0) q |= 1; + else n += d; + j--; + goto finish; +} + +// ------------------------------------------------------------------------ + +// assumes that the int32_t values of a, b, and c are all positive +// use when both a and b are larger than c + +template <typename T> +static inline void swap(T& a, T& b) { + T t(a); + a = b; + b = t; +} + +static __attribute__((noinline)) +int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c) +{ + // first we compute a*b as a 64-bit integer + // (GCC generates umull with the code below) + uint64_t ab = uint64_t(a)*b; + uint32_t hi = ab>>32; + uint32_t lo = ab; + uint32_t result; + + // now perform the division + if (hi >= c) { + overflow: + result = 0x7fffffff; // basic overflow + } else if (hi == 0) { + result = lo/c; // note: c can't be 0 + if ((result >> 31) != 0) // result must fit in 31 bits + goto overflow; + } else { + uint32_t r = hi; + int bits = 31; + result = 0; + do { + r = (r << 1) | (lo >> 31); + lo <<= 1; + result <<= 1; + if (r >= c) { + r -= c; + result |= 1; + } + } while (bits--); + } + return int32_t(result); +} + +// assumes a >= 0 and c >= b >= 0 +static inline +int32_t quick_muldiv(int32_t a, int32_t b, int32_t c) +{ + int32_t r = 0, q = 0, i; + int leading = gglClz(a); + i = 32 - leading; + a <<= leading; + do { + r <<= 1; + if (a < 0) + r += b; + a <<= 1; + q <<= 1; + if (r >= c) { + r -= c; + q++; + } + asm(""::); // gcc generates better code this way + if (r >= c) { + r -= c; + q++; + } + } + while (--i); + return q; +} + +// this function computes a*b/c with 64-bit intermediate accuracy +// overflows (e.g. division by 0) are handled and return INT_MAX + +int32_t gglMulDivi(int32_t a, int32_t b, int32_t c) +{ + int32_t result; + int32_t sign = a^b^c; + + if (a < 0) a = -a; + if (b < 0) b = -b; + if (c < 0) c = -c; + + if (a < b) { + swap(a, b); + } + + if (b <= c) result = quick_muldiv(a, b, c); + else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c); + + if (sign < 0) + result = -result; + + return result; +} |
