Merge "Add MIPS64r6 support for libpixelflinger"

author: Elliott Hughes <enh@google.com> 2015-11-05 18:03:18 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> 2015-11-05 18:03:18 +0000
commit: 251f8b69fbe5068aa3a9ebbc3dc42ed5b228fd73 (patch)
tree: 57b4ea4a368626400a8991c27888dacd15c997a7 /libpixelflinger/include
parent: d3b177ccb5502306546a357d0897e2552384b18c (diff)
parent: ab5265ebe65eac3b9555eaa3f425cf8e2759d3b3 (diff)
download: core-251f8b69fbe5068aa3a9ebbc3dc42ed5b228fd73.tar.gz
core-251f8b69fbe5068aa3a9ebbc3dc42ed5b228fd73.tar.bz2
core-251f8b69fbe5068aa3a9ebbc3dc42ed5b228fd73.zip
2 files changed, 248 insertions, 2 deletions
diff --git a/libpixelflinger/include/private/pixelflinger/ggl_context.h b/libpixelflinger/include/private/pixelflinger/ggl_context.h
index a18b2f769..d45dabcd0 100644
--- a/libpixelflinger/include/private/pixelflinger/ggl_context.h
+++ b/libpixelflinger/include/private/pixelflinger/ggl_context.h
@@ -42,7 +42,7 @@ inline uint32_t GGL_HOST_TO_RGBA(uint32_t v) {
 #else
 
 inline uint32_t GGL_RGBA_TO_HOST(uint32_t v) {
-#if defined(__mips__) && __mips==32 && __mips_isa_rev>=2
+#if defined(__mips__) && __mips_isa_rev>=2
     uint32_t r;
     __asm__("wsbh %0, %1;"
         "rotr %0, %0, 16"
@@ -55,7 +55,7 @@ inline uint32_t GGL_RGBA_TO_HOST(uint32_t v) {
 #endif
 }
 inline uint32_t GGL_HOST_TO_RGBA(uint32_t v) {
-#if defined(__mips__) && __mips==32 && __mips_isa_rev>=2
+#if defined(__mips__) && __mips_isa_rev>=2
     uint32_t r;
     __asm__("wsbh %0, %1;"
         "rotr %0, %0, 16"
diff --git a/libpixelflinger/include/private/pixelflinger/ggl_fixed.h b/libpixelflinger/include/private/pixelflinger/ggl_fixed.h
index 787f6202b..17b85dd58 100644
--- a/libpixelflinger/include/private/pixelflinger/ggl_fixed.h
+++ b/libpixelflinger/include/private/pixelflinger/ggl_fixed.h
@@ -520,6 +520,252 @@ inline int64_t gglMulii(int32_t x, int32_t y)
     return res;
 }
 
+#elif defined(__mips__) && __mips_isa_rev == 6
+
+/*inline MIPS implementations*/
+inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
+inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
+    GGLfixed result,tmp,tmp1,tmp2;
+
+    if (__builtin_constant_p(shift)) {
+        if (shift == 0) {
+            asm ("mul %[res], %[a], %[b] \t\n"
+            : [res]"=&r"(result)
+            : [a]"r"(a),[b]"r"(b)
+            );
+        } else if (shift == 32)
+        {
+            asm ("mul %[res], %[a], %[b] \t\n"
+            "li  %[tmp],1\t\n"
+            "sll  %[tmp],%[tmp],0x1f\t\n"
+            "addu %[tmp1],%[tmp],%[res] \t\n"
+            "muh %[res], %[a], %[b] \t\n"
+            "sltu %[tmp1],%[tmp1],%[tmp]\t\n"   /*obit*/
+            "sra %[tmp],%[tmp],0x1f \t\n"
+            "addu %[res],%[res],%[tmp]\t\n"
+            "addu %[res],%[res],%[tmp1]\t\n"
+            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
+            : [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
+            );
+        } else if ((shift >0) && (shift < 32))
+        {
+            asm ("mul %[res], %[a], %[b] \t\n"
+            "li  %[tmp],1 \t\n"
+            "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
+            "addu %[tmp1],%[tmp],%[res] \t\n"
+            "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
+            "addu  %[res],%[res],%[tmp] \t\n"
+            "muh %[tmp], %[a], %[b] \t\n"
+            "addu  %[tmp],%[tmp],%[tmp1] \t\n"
+            "sll   %[tmp],%[tmp],%[lshift] \t\n"
+            "srl   %[res],%[res],%[rshift]    \t\n"
+            "or    %[res],%[res],%[tmp] \t\n"
+            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+            : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
+            );
+        } else {
+            asm ("mul %[res], %[a], %[b] \t\n"
+            "li  %[tmp],1 \t\n"
+            "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
+            "addu %[tmp1],%[tmp],%[res] \t\n"
+            "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
+            "sra  %[tmp2],%[tmp],0x1f \t\n"
+            "addu  %[res],%[res],%[tmp] \t\n"
+            "muh  %[tmp], %[a], %[b]   \t\n"
+            "addu  %[tmp],%[tmp],%[tmp2] \t\n"
+            "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
+            "srl   %[tmp2],%[res],%[rshift]    \t\n"
+            "srav  %[res], %[tmp],%[rshift]\t\n"
+            "sll   %[tmp],%[tmp],1 \t\n"
+            "sll   %[tmp],%[tmp],%[norbits] \t\n"
+            "or    %[tmp],%[tmp],%[tmp2] \t\n"
+            "seleqz  %[tmp],%[tmp],%[bit5] \t\n"
+            "selnez  %[res],%[res],%[bit5] \t\n"
+            "or    %[res],%[res],%[tmp] \t\n"
+            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+            : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
+            );
+        }
+    } else {
+        asm ("mul %[res], %[a], %[b] \t\n"
+        "li  %[tmp],1 \t\n"
+        "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
+        "addu %[tmp1],%[tmp],%[res] \t\n"
+        "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
+        "sra  %[tmp2],%[tmp],0x1f \t\n"
+        "addu  %[res],%[res],%[tmp] \t\n"
+        "muh  %[tmp], %[a], %[b] \t\n"
+        "addu  %[tmp],%[tmp],%[tmp2] \t\n"
+        "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
+        "srl   %[tmp2],%[res],%[rshift]    \t\n"
+        "srav  %[res], %[tmp],%[rshift]\t\n"
+        "sll   %[tmp],%[tmp],1 \t\n"
+        "sll   %[tmp],%[tmp],%[norbits] \t\n"
+        "or    %[tmp],%[tmp],%[tmp2] \t\n"
+        "seleqz  %[tmp],%[tmp],%[bit5] \t\n"
+        "selnez  %[res],%[res],%[bit5] \t\n"
+        "or    %[res],%[res],%[tmp] \t\n"
+         : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+         : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
+         );
+        }
+        return result;
+}
+
+inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
+inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
+    GGLfixed result,t,tmp1,tmp2;
+
+    if (__builtin_constant_p(shift)) {
+        if (shift == 0) {
+                 asm ("mul %[lo], %[a], %[b] \t\n"
+                 "addu  %[lo],%[lo],%[c]    \t\n"
+                 : [lo]"=&r"(result)
+                 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+                 );
+                } else if (shift == 32) {
+                    asm ("muh %[lo], %[a], %[b] \t\n"
+                    "addu  %[lo],%[lo],%[c]    \t\n"
+                    : [lo]"=&r"(result)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+                    );
+                } else if ((shift>0) && (shift<32)) {
+                    asm ("mul %[res], %[a], %[b] \t\n"
+                    "muh  %[t], %[a], %[b] \t\n"
+                    "srl   %[res],%[res],%[rshift]    \t\n"
+                    "sll   %[t],%[t],%[lshift]     \t\n"
+                    "or  %[res],%[res],%[t]    \t\n"
+                    "addu  %[res],%[res],%[c]    \t\n"
+                    : [res]"=&r"(result),[t]"=&r"(t)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
+                    );
+                } else {
+                    asm ("mul %[res], %[a], %[b] \t\n"
+                    "muh %[t], %[a], %[b] \t\n"
+                    "nor %[tmp1],$zero,%[shift]\t\n"
+                    "srl   %[res],%[res],%[shift]    \t\n"
+                    "sll   %[tmp2],%[t],1     \t\n"
+                    "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
+                    "or  %[tmp1],%[tmp2],%[res]    \t\n"
+                    "srav  %[res],%[t],%[shift]     \t\n"
+                    "andi %[tmp2],%[shift],0x20\t\n"
+                    "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
+                    "selnez %[res],%[res],%[tmp2]\t\n"
+                    "or %[res],%[res],%[tmp1]\t\n"
+                    "addu  %[res],%[res],%[c]    \t\n"
+                    : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
+                    );
+                }
+            } else {
+                asm ("mul %[res], %[a], %[b] \t\n"
+                "muh %[t], %[a], %[b] \t\n"
+                "nor %[tmp1],$zero,%[shift]\t\n"
+                "srl   %[res],%[res],%[shift]    \t\n"
+                "sll   %[tmp2],%[t],1     \t\n"
+                "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
+                "or  %[tmp1],%[tmp2],%[res]    \t\n"
+                "srav  %[res],%[t],%[shift]     \t\n"
+                "andi %[tmp2],%[shift],0x20\t\n"
+                "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
+                "selnez %[res],%[res],%[tmp2]\t\n"
+                "or %[res],%[res],%[tmp1]\t\n"
+                "addu  %[res],%[res],%[c]    \t\n"
+                : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+                : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
+                );
+            }
+            return result;
+}
+
+inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
+inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
+    GGLfixed result,t,tmp1,tmp2;
+
+    if (__builtin_constant_p(shift)) {
+        if (shift == 0) {
+                 asm ("mul %[lo], %[a], %[b] \t\n"
+                 "subu  %[lo],%[lo],%[c]    \t\n"
+                 : [lo]"=&r"(result)
+                 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+                 );
+                } else if (shift == 32) {
+                    asm ("muh %[lo], %[a], %[b] \t\n"
+                    "subu  %[lo],%[lo],%[c]    \t\n"
+                    : [lo]"=&r"(result)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+                    );
+                } else if ((shift>0) && (shift<32)) {
+                    asm ("mul %[res], %[a], %[b] \t\n"
+                    "muh %[t], %[a], %[b] \t\n"
+                    "srl   %[res],%[res],%[rshift]    \t\n"
+                    "sll   %[t],%[t],%[lshift]     \t\n"
+                    "or  %[res],%[res],%[t]    \t\n"
+                    "subu  %[res],%[res],%[c]    \t\n"
+                    : [res]"=&r"(result),[t]"=&r"(t)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
+                    );
+                } else {
+                    asm ("mul %[res], %[a], %[b] \t\n"
+                    "muh %[t], %[a], %[b] \t\n"
+                    "nor %[tmp1],$zero,%[shift]\t\n"
+                    "srl   %[res],%[res],%[shift]    \t\n"
+                    "sll   %[tmp2],%[t],1     \t\n"
+                    "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
+                    "or  %[tmp1],%[tmp2],%[res]    \t\n"
+                    "srav  %[res],%[t],%[shift]     \t\n"
+                    "andi %[tmp2],%[shift],0x20\t\n"
+                    "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
+                    "selnez %[res],%[res],%[tmp2]\t\n"
+                    "or %[res],%[res],%[tmp1]\t\n"
+                    "subu  %[res],%[res],%[c]    \t\n"
+                    : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
+                     );
+                    }
+                } else {
+                asm ("mul %[res], %[a], %[b] \t\n"
+                "muh %[t], %[a], %[b] \t\n"
+                "nor %[tmp1],$zero,%[shift]\t\n"
+                "srl   %[res],%[res],%[shift]    \t\n"
+                "sll   %[tmp2],%[t],1     \t\n"
+                "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
+                "or  %[tmp1],%[tmp2],%[res]    \t\n"
+                "srav  %[res],%[t],%[shift]     \t\n"
+                "andi %[tmp2],%[shift],0x20\t\n"
+                "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
+                "selnez %[res],%[res],%[tmp2]\t\n"
+                "or %[res],%[res],%[tmp1]\t\n"
+                "subu  %[res],%[res],%[c]    \t\n"
+                : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+                : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
+                );
+            }
+    return result;
+}
+
+inline int64_t gglMulii(int32_t x, int32_t y) CONST;
+inline int64_t gglMulii(int32_t x, int32_t y) {
+    union {
+        struct {
+#if defined(__MIPSEL__)
+            int32_t lo;
+            int32_t hi;
+#elif defined(__MIPSEB__)
+            int32_t hi;
+            int32_t lo;
+#endif
+        } s;
+        int64_t res;
+    }u;
+    asm("mul %0, %2, %3 \t\n"
+        "muh %1, %2, %3 \t\n"
+        : "=r"(u.s.lo), "=&r"(u.s.hi)
+        : "%r"(x), "r"(y)
+        );
+    return u.res;
+}
+
 #else // ----------------------------------------------------------------------
 
 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
author	Elliott Hughes <enh@google.com>	2015-11-05 18:03:18 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	2015-11-05 18:03:18 +0000
commit	251f8b69fbe5068aa3a9ebbc3dc42ed5b228fd73 (patch)
tree	57b4ea4a368626400a8991c27888dacd15c997a7 /libpixelflinger/include
parent	d3b177ccb5502306546a357d0897e2552384b18c (diff)
parent	ab5265ebe65eac3b9555eaa3f425cf8e2759d3b3 (diff)
download	core-251f8b69fbe5068aa3a9ebbc3dc42ed5b228fd73.tar.gz core-251f8b69fbe5068aa3a9ebbc3dc42ed5b228fd73.tar.bz2 core-251f8b69fbe5068aa3a9ebbc3dc42ed5b228fd73.zip