aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/libgcc/config/spu/multi3.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/libgcc/config/spu/multi3.c')
-rw-r--r--gcc-4.9/libgcc/config/spu/multi3.c119
1 files changed, 119 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/spu/multi3.c b/gcc-4.9/libgcc/config/spu/multi3.c
new file mode 100644
index 000000000..69097fa0e
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/multi3.c
@@ -0,0 +1,119 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <spu_intrinsics.h>
+
+typedef int TItype __attribute__ ((mode (TI)));
+
+union qword_TItype
+ {
+ qword q;
+ TItype t;
+ };
+
+inline static qword
+si_from_TItype (TItype t)
+{
+ union qword_TItype u;
+ u.t = t;
+ return u.q;
+}
+
+inline static TItype
+si_to_TItype (qword q)
+{
+ union qword_TItype u;
+ u.q = q;
+ return u.t;
+}
+
+/* A straight forward vectorization and unrolling of
+ * short l[8], r[8];
+ * TItype total = 0;
+ * for (i = 0; i < 8; i++)
+ * for (j = 0; j < 8; j++)
+ * total += (TItype)((l[7-i] * r[7-j]) << (16 * (i + j)));
+ */
+TItype
+__multi3 (TItype l, TItype r)
+{
+ qword u = si_from_TItype (l);
+ qword v = si_from_TItype (r);
+ qword splat0 = si_shufb (v, v, si_ilh (0x0001));
+ qword splat1 = si_shufb (v, v, si_ilh (0x0203));
+ qword splat2 = si_shufb (v, v, si_ilh (0x0405));
+ qword splat3 = si_shufb (v, v, si_ilh (0x0607));
+ qword splat4 = si_shufb (v, v, si_ilh (0x0809));
+ qword splat5 = si_shufb (v, v, si_ilh (0x0a0b));
+ qword splat6 = si_shufb (v, v, si_ilh (0x0c0d));
+ qword splat7 = si_shufb (v, v, si_ilh (0x0e0f));
+
+ qword part0l = si_shlqbyi (si_mpyu (u, splat0), 14);
+ qword part1h = si_shlqbyi (si_mpyhhu (u, splat1), 14);
+ qword part1l = si_shlqbyi (si_mpyu (u, splat1), 12);
+ qword part2h = si_shlqbyi (si_mpyhhu (u, splat2), 12);
+ qword part2l = si_shlqbyi (si_mpyu (u, splat2), 10);
+ qword part3h = si_shlqbyi (si_mpyhhu (u, splat3), 10);
+ qword part3l = si_shlqbyi (si_mpyu (u, splat3), 8);
+ qword part4h = si_shlqbyi (si_mpyhhu (u, splat4), 8);
+ qword part4l = si_shlqbyi (si_mpyu (u, splat4), 6);
+ qword part5h = si_shlqbyi (si_mpyhhu (u, splat5), 6);
+ qword part5l = si_shlqbyi (si_mpyu (u, splat5), 4);
+ qword part6h = si_shlqbyi (si_mpyhhu (u, splat6), 4);
+ qword part6l = si_shlqbyi (si_mpyu (u, splat6), 2);
+ qword part7h = si_shlqbyi (si_mpyhhu (u, splat7), 2);
+ qword part7l = si_mpyu (u, splat7);
+
+ qword carry, total0, total1, total2, total3, total4;
+ qword total5, total6, total7, total8, total9, total10;
+ qword total;
+
+ total0 = si_a (si_a (si_a (part0l, part1h), si_a (part1l, part2h)), part7l);
+ total1 = si_a (part2l, part3h);
+ total2 = si_a (part3l, part4h);
+ total3 = si_a (part4l, part5h);
+ total4 = si_a (part5l, part6h);
+ total5 = si_a (part6l, part7h);
+ total6 = si_a (total0, total1);
+ total7 = si_a (total2, total3);
+ total8 = si_a (total4, total5);
+ total9 = si_a (total6, total7);
+ total10 = si_a (total8, total9);
+
+ carry = si_cg (part2l, part3h);
+ carry = si_a (carry, si_cg (part3l, part4h));
+ carry = si_a (carry, si_cg (part4l, part5h));
+ carry = si_a (carry, si_cg (part5l, part6h));
+ carry = si_a (carry, si_cg (part6l, part7h));
+ carry = si_a (carry, si_cg (total0, total1));
+ carry = si_a (carry, si_cg (total2, total3));
+ carry = si_a (carry, si_cg (total4, total5));
+ carry = si_a (carry, si_cg (total6, total7));
+ carry = si_a (carry, si_cg (total8, total9));
+ carry = si_shlqbyi (carry, 4);
+
+ total = si_cg (total10, carry);
+ total = si_shlqbyi (total, 4);
+ total = si_cgx (total10, carry, total);
+ total = si_shlqbyi (total, 4);
+ total = si_addx (total10, carry, total);
+ return si_to_TItype (total);
+}