summaryrefslogtreecommitdiffstats
path: root/libpixelflinger/include
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2015-11-05 18:03:18 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-11-05 18:03:18 +0000
commit251f8b69fbe5068aa3a9ebbc3dc42ed5b228fd73 (patch)
tree57b4ea4a368626400a8991c27888dacd15c997a7 /libpixelflinger/include
parentd3b177ccb5502306546a357d0897e2552384b18c (diff)
parentab5265ebe65eac3b9555eaa3f425cf8e2759d3b3 (diff)
downloadcore-251f8b69fbe5068aa3a9ebbc3dc42ed5b228fd73.tar.gz
core-251f8b69fbe5068aa3a9ebbc3dc42ed5b228fd73.tar.bz2
core-251f8b69fbe5068aa3a9ebbc3dc42ed5b228fd73.zip
Merge "Add MIPS64r6 support for libpixelflinger"
Diffstat (limited to 'libpixelflinger/include')
-rw-r--r--libpixelflinger/include/private/pixelflinger/ggl_context.h4
-rw-r--r--libpixelflinger/include/private/pixelflinger/ggl_fixed.h246
2 files changed, 248 insertions, 2 deletions
diff --git a/libpixelflinger/include/private/pixelflinger/ggl_context.h b/libpixelflinger/include/private/pixelflinger/ggl_context.h
index a18b2f769..d45dabcd0 100644
--- a/libpixelflinger/include/private/pixelflinger/ggl_context.h
+++ b/libpixelflinger/include/private/pixelflinger/ggl_context.h
@@ -42,7 +42,7 @@ inline uint32_t GGL_HOST_TO_RGBA(uint32_t v) {
#else
inline uint32_t GGL_RGBA_TO_HOST(uint32_t v) {
-#if defined(__mips__) && __mips==32 && __mips_isa_rev>=2
+#if defined(__mips__) && __mips_isa_rev>=2
uint32_t r;
__asm__("wsbh %0, %1;"
"rotr %0, %0, 16"
@@ -55,7 +55,7 @@ inline uint32_t GGL_RGBA_TO_HOST(uint32_t v) {
#endif
}
inline uint32_t GGL_HOST_TO_RGBA(uint32_t v) {
-#if defined(__mips__) && __mips==32 && __mips_isa_rev>=2
+#if defined(__mips__) && __mips_isa_rev>=2
uint32_t r;
__asm__("wsbh %0, %1;"
"rotr %0, %0, 16"
diff --git a/libpixelflinger/include/private/pixelflinger/ggl_fixed.h b/libpixelflinger/include/private/pixelflinger/ggl_fixed.h
index 787f6202b..17b85dd58 100644
--- a/libpixelflinger/include/private/pixelflinger/ggl_fixed.h
+++ b/libpixelflinger/include/private/pixelflinger/ggl_fixed.h
@@ -520,6 +520,252 @@ inline int64_t gglMulii(int32_t x, int32_t y)
return res;
}
+#elif defined(__mips__) && __mips_isa_rev == 6
+
+/*inline MIPS implementations*/
+inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
+inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
+ GGLfixed result,tmp,tmp1,tmp2;
+
+ if (__builtin_constant_p(shift)) {
+ if (shift == 0) {
+ asm ("mul %[res], %[a], %[b] \t\n"
+ : [res]"=&r"(result)
+ : [a]"r"(a),[b]"r"(b)
+ );
+ } else if (shift == 32)
+ {
+ asm ("mul %[res], %[a], %[b] \t\n"
+ "li %[tmp],1\t\n"
+ "sll %[tmp],%[tmp],0x1f\t\n"
+ "addu %[tmp1],%[tmp],%[res] \t\n"
+ "muh %[res], %[a], %[b] \t\n"
+ "sltu %[tmp1],%[tmp1],%[tmp]\t\n" /*obit*/
+ "sra %[tmp],%[tmp],0x1f \t\n"
+ "addu %[res],%[res],%[tmp]\t\n"
+ "addu %[res],%[res],%[tmp1]\t\n"
+ : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
+ : [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
+ );
+ } else if ((shift >0) && (shift < 32))
+ {
+ asm ("mul %[res], %[a], %[b] \t\n"
+ "li %[tmp],1 \t\n"
+ "sll %[tmp],%[tmp],%[shiftm1] \t\n"
+ "addu %[tmp1],%[tmp],%[res] \t\n"
+ "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/
+ "addu %[res],%[res],%[tmp] \t\n"
+ "muh %[tmp], %[a], %[b] \t\n"
+ "addu %[tmp],%[tmp],%[tmp1] \t\n"
+ "sll %[tmp],%[tmp],%[lshift] \t\n"
+ "srl %[res],%[res],%[rshift] \t\n"
+ "or %[res],%[res],%[tmp] \t\n"
+ : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+ : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
+ );
+ } else {
+ asm ("mul %[res], %[a], %[b] \t\n"
+ "li %[tmp],1 \t\n"
+ "sll %[tmp],%[tmp],%[shiftm1] \t\n"
+ "addu %[tmp1],%[tmp],%[res] \t\n"
+ "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/
+ "sra %[tmp2],%[tmp],0x1f \t\n"
+ "addu %[res],%[res],%[tmp] \t\n"
+ "muh %[tmp], %[a], %[b] \t\n"
+ "addu %[tmp],%[tmp],%[tmp2] \t\n"
+ "addu %[tmp],%[tmp],%[tmp1] \t\n" /*tmp=hi*/
+ "srl %[tmp2],%[res],%[rshift] \t\n"
+ "srav %[res], %[tmp],%[rshift]\t\n"
+ "sll %[tmp],%[tmp],1 \t\n"
+ "sll %[tmp],%[tmp],%[norbits] \t\n"
+ "or %[tmp],%[tmp],%[tmp2] \t\n"
+ "seleqz %[tmp],%[tmp],%[bit5] \t\n"
+ "selnez %[res],%[res],%[bit5] \t\n"
+ "or %[res],%[res],%[tmp] \t\n"
+ : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+ : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
+ );
+ }
+ } else {
+ asm ("mul %[res], %[a], %[b] \t\n"
+ "li %[tmp],1 \t\n"
+ "sll %[tmp],%[tmp],%[shiftm1] \t\n"
+ "addu %[tmp1],%[tmp],%[res] \t\n"
+ "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/
+ "sra %[tmp2],%[tmp],0x1f \t\n"
+ "addu %[res],%[res],%[tmp] \t\n"
+ "muh %[tmp], %[a], %[b] \t\n"
+ "addu %[tmp],%[tmp],%[tmp2] \t\n"
+ "addu %[tmp],%[tmp],%[tmp1] \t\n" /*tmp=hi*/
+ "srl %[tmp2],%[res],%[rshift] \t\n"
+ "srav %[res], %[tmp],%[rshift]\t\n"
+ "sll %[tmp],%[tmp],1 \t\n"
+ "sll %[tmp],%[tmp],%[norbits] \t\n"
+ "or %[tmp],%[tmp],%[tmp2] \t\n"
+ "seleqz %[tmp],%[tmp],%[bit5] \t\n"
+ "selnez %[res],%[res],%[bit5] \t\n"
+ "or %[res],%[res],%[tmp] \t\n"
+ : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+ : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
+ );
+ }
+ return result;
+}
+
+inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
+inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
+ GGLfixed result,t,tmp1,tmp2;
+
+ if (__builtin_constant_p(shift)) {
+ if (shift == 0) {
+ asm ("mul %[lo], %[a], %[b] \t\n"
+ "addu %[lo],%[lo],%[c] \t\n"
+ : [lo]"=&r"(result)
+ : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+ );
+ } else if (shift == 32) {
+ asm ("muh %[lo], %[a], %[b] \t\n"
+ "addu %[lo],%[lo],%[c] \t\n"
+ : [lo]"=&r"(result)
+ : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+ );
+ } else if ((shift>0) && (shift<32)) {
+ asm ("mul %[res], %[a], %[b] \t\n"
+ "muh %[t], %[a], %[b] \t\n"
+ "srl %[res],%[res],%[rshift] \t\n"
+ "sll %[t],%[t],%[lshift] \t\n"
+ "or %[res],%[res],%[t] \t\n"
+ "addu %[res],%[res],%[c] \t\n"
+ : [res]"=&r"(result),[t]"=&r"(t)
+ : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
+ );
+ } else {
+ asm ("mul %[res], %[a], %[b] \t\n"
+ "muh %[t], %[a], %[b] \t\n"
+ "nor %[tmp1],$zero,%[shift]\t\n"
+ "srl %[res],%[res],%[shift] \t\n"
+ "sll %[tmp2],%[t],1 \t\n"
+ "sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
+ "or %[tmp1],%[tmp2],%[res] \t\n"
+ "srav %[res],%[t],%[shift] \t\n"
+ "andi %[tmp2],%[shift],0x20\t\n"
+ "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
+ "selnez %[res],%[res],%[tmp2]\t\n"
+ "or %[res],%[res],%[tmp1]\t\n"
+ "addu %[res],%[res],%[c] \t\n"
+ : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+ : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
+ );
+ }
+ } else {
+ asm ("mul %[res], %[a], %[b] \t\n"
+ "muh %[t], %[a], %[b] \t\n"
+ "nor %[tmp1],$zero,%[shift]\t\n"
+ "srl %[res],%[res],%[shift] \t\n"
+ "sll %[tmp2],%[t],1 \t\n"
+ "sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
+ "or %[tmp1],%[tmp2],%[res] \t\n"
+ "srav %[res],%[t],%[shift] \t\n"
+ "andi %[tmp2],%[shift],0x20\t\n"
+ "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
+ "selnez %[res],%[res],%[tmp2]\t\n"
+ "or %[res],%[res],%[tmp1]\t\n"
+ "addu %[res],%[res],%[c] \t\n"
+ : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+ : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
+ );
+ }
+ return result;
+}
+
+inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
+inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
+ GGLfixed result,t,tmp1,tmp2;
+
+ if (__builtin_constant_p(shift)) {
+ if (shift == 0) {
+ asm ("mul %[lo], %[a], %[b] \t\n"
+ "subu %[lo],%[lo],%[c] \t\n"
+ : [lo]"=&r"(result)
+ : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+ );
+ } else if (shift == 32) {
+ asm ("muh %[lo], %[a], %[b] \t\n"
+ "subu %[lo],%[lo],%[c] \t\n"
+ : [lo]"=&r"(result)
+ : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+ );
+ } else if ((shift>0) && (shift<32)) {
+ asm ("mul %[res], %[a], %[b] \t\n"
+ "muh %[t], %[a], %[b] \t\n"
+ "srl %[res],%[res],%[rshift] \t\n"
+ "sll %[t],%[t],%[lshift] \t\n"
+ "or %[res],%[res],%[t] \t\n"
+ "subu %[res],%[res],%[c] \t\n"
+ : [res]"=&r"(result),[t]"=&r"(t)
+ : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
+ );
+ } else {
+ asm ("mul %[res], %[a], %[b] \t\n"
+ "muh %[t], %[a], %[b] \t\n"
+ "nor %[tmp1],$zero,%[shift]\t\n"
+ "srl %[res],%[res],%[shift] \t\n"
+ "sll %[tmp2],%[t],1 \t\n"
+ "sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
+ "or %[tmp1],%[tmp2],%[res] \t\n"
+ "srav %[res],%[t],%[shift] \t\n"
+ "andi %[tmp2],%[shift],0x20\t\n"
+ "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
+ "selnez %[res],%[res],%[tmp2]\t\n"
+ "or %[res],%[res],%[tmp1]\t\n"
+ "subu %[res],%[res],%[c] \t\n"
+ : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+ : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
+ );
+ }
+ } else {
+ asm ("mul %[res], %[a], %[b] \t\n"
+ "muh %[t], %[a], %[b] \t\n"
+ "nor %[tmp1],$zero,%[shift]\t\n"
+ "srl %[res],%[res],%[shift] \t\n"
+ "sll %[tmp2],%[t],1 \t\n"
+ "sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
+ "or %[tmp1],%[tmp2],%[res] \t\n"
+ "srav %[res],%[t],%[shift] \t\n"
+ "andi %[tmp2],%[shift],0x20\t\n"
+ "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
+ "selnez %[res],%[res],%[tmp2]\t\n"
+ "or %[res],%[res],%[tmp1]\t\n"
+ "subu %[res],%[res],%[c] \t\n"
+ : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+ : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
+ );
+ }
+ return result;
+}
+
+inline int64_t gglMulii(int32_t x, int32_t y) CONST;
+inline int64_t gglMulii(int32_t x, int32_t y) {
+ union {
+ struct {
+#if defined(__MIPSEL__)
+ int32_t lo;
+ int32_t hi;
+#elif defined(__MIPSEB__)
+ int32_t hi;
+ int32_t lo;
+#endif
+ } s;
+ int64_t res;
+ }u;
+ asm("mul %0, %2, %3 \t\n"
+ "muh %1, %2, %3 \t\n"
+ : "=r"(u.s.lo), "=&r"(u.s.hi)
+ : "%r"(x), "r"(y)
+ );
+ return u.res;
+}
+
#else // ----------------------------------------------------------------------
inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;