aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/libgcc/config/spu
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/libgcc/config/spu')
-rw-r--r--gcc-4.9/libgcc/config/spu/cache.S43
-rw-r--r--gcc-4.9/libgcc/config/spu/cachemgr.c438
-rw-r--r--gcc-4.9/libgcc/config/spu/divmodti4.c188
-rw-r--r--gcc-4.9/libgcc/config/spu/divv2df3.c195
-rw-r--r--gcc-4.9/libgcc/config/spu/float_disf.c31
-rw-r--r--gcc-4.9/libgcc/config/spu/float_unsdidf.c54
-rw-r--r--gcc-4.9/libgcc/config/spu/float_unsdisf.c31
-rw-r--r--gcc-4.9/libgcc/config/spu/float_unssidf.c45
-rw-r--r--gcc-4.9/libgcc/config/spu/mfc_multi_tag_release.c72
-rw-r--r--gcc-4.9/libgcc/config/spu/mfc_multi_tag_reserve.c84
-rw-r--r--gcc-4.9/libgcc/config/spu/mfc_tag_release.c59
-rw-r--r--gcc-4.9/libgcc/config/spu/mfc_tag_reserve.c51
-rw-r--r--gcc-4.9/libgcc/config/spu/mfc_tag_table.c39
-rw-r--r--gcc-4.9/libgcc/config/spu/multi3.c119
-rw-r--r--gcc-4.9/libgcc/config/spu/t-elf59
15 files changed, 1508 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/spu/cache.S b/gcc-4.9/libgcc/config/spu/cache.S
new file mode 100644
index 000000000..5506079c7
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/cache.S
@@ -0,0 +1,43 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+ .data
+ .p2align 7
+ .global __cache
+__cache:
+ .rept __CACHE_SIZE__ * 8
+ .fill 128
+ .endr
+
+ .p2align 7
+ .global __cache_tag_array
+__cache_tag_array:
+ .rept __CACHE_SIZE__ * 2
+ .long 1, 1, 1, 1
+ .fill 128-16
+ .endr
+__end_cache_tag_array:
+
+ .globl __cache_tag_array_size
+ .set __cache_tag_array_size, __end_cache_tag_array-__cache_tag_array
+
diff --git a/gcc-4.9/libgcc/config/spu/cachemgr.c b/gcc-4.9/libgcc/config/spu/cachemgr.c
new file mode 100644
index 000000000..572718cf3
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/cachemgr.c
@@ -0,0 +1,438 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <spu_mfcio.h>
+#include <spu_internals.h>
+#include <spu_intrinsics.h>
+#include <spu_cache.h>
+
+extern unsigned long long __ea_local_store;
+extern char __cache_tag_array_size;
+
+#define LINE_SIZE 128
+#define TAG_MASK (LINE_SIZE - 1)
+
+#define WAYS 4
+#define SET_MASK ((int) &__cache_tag_array_size - LINE_SIZE)
+
+#define CACHE_LINES ((int) &__cache_tag_array_size / \
+ sizeof (struct __cache_tag_array) * WAYS)
+
+struct __cache_tag_array
+{
+ unsigned int tag_lo[WAYS];
+ unsigned int tag_hi[WAYS];
+ void *base[WAYS];
+ int reserved[WAYS];
+ vector unsigned short dirty_bits[WAYS];
+};
+
+extern struct __cache_tag_array __cache_tag_array[];
+extern char __cache[];
+
+/* In order to make the code seem a little cleaner, and to avoid having
+ 64/32 bit ifdefs all over the place, we use macros. */
+
+#ifdef __EA64__
+typedef unsigned long long addr;
+
+#define CHECK_TAG(_entry, _way, _tag) \
+ ((_entry)->tag_lo[(_way)] == ((_tag) & 0xFFFFFFFF) \
+ && (_entry)->tag_hi[(_way)] == ((_tag) >> 32))
+
+#define GET_TAG(_entry, _way) \
+ ((unsigned long long)(_entry)->tag_hi[(_way)] << 32 \
+ | (unsigned long long)(_entry)->tag_lo[(_way)])
+
+#define SET_TAG(_entry, _way, _tag) \
+ (_entry)->tag_lo[(_way)] = (_tag) & 0xFFFFFFFF; \
+ (_entry)->tag_hi[(_way)] = (_tag) >> 32
+
+#else /*__EA32__*/
+typedef unsigned long addr;
+
+#define CHECK_TAG(_entry, _way, _tag) \
+ ((_entry)->tag_lo[(_way)] == (_tag))
+
+#define GET_TAG(_entry, _way) \
+ ((_entry)->tag_lo[(_way)])
+
+#define SET_TAG(_entry, _way, _tag) \
+ (_entry)->tag_lo[(_way)] = (_tag)
+
+#endif
+
+/* In GET_ENTRY, we cast away the high 32 bits,
+ as the tag is only in the low 32. */
+
+#define GET_ENTRY(_addr) \
+ ((struct __cache_tag_array *) \
+ si_to_uint (si_a (si_and (si_from_uint ((unsigned int) (addr) (_addr)), \
+ si_from_uint (SET_MASK)), \
+ si_from_uint ((unsigned int) __cache_tag_array))))
+
+#define GET_CACHE_LINE(_addr, _way) \
+ ((void *) (__cache + ((_addr) & SET_MASK) * WAYS) + ((_way) * LINE_SIZE));
+
+#define CHECK_DIRTY(_vec) (si_to_uint (si_orx ((qword) (_vec))))
+#define SET_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] = 1)
+#define CHECK_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] == 1)
+
+#define LS_FLAG 0x80000000
+#define SET_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] |= LS_FLAG)
+#define CHECK_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] & LS_FLAG)
+#define GET_LRU(_entry, _way) ((_entry)->reserved[(_way)] & ~LS_FLAG)
+
+static int dma_tag = 32;
+
+static void
+__cache_evict_entry (struct __cache_tag_array *entry, int way)
+{
+ addr tag = GET_TAG (entry, way);
+
+ if (CHECK_DIRTY (entry->dirty_bits[way]) && !CHECK_IS_LS (entry, way))
+ {
+#ifdef NONATOMIC
+ /* Non-atomic writes. */
+ unsigned int oldmask, mach_stat;
+ char *line = ((void *) 0);
+
+ /* Enter critical section. */
+ mach_stat = spu_readch (SPU_RdMachStat);
+ spu_idisable ();
+
+ /* Issue DMA request. */
+ line = GET_CACHE_LINE (entry->tag_lo[way], way);
+ mfc_put (line, tag, LINE_SIZE, dma_tag, 0, 0);
+
+ /* Wait for DMA completion. */
+ oldmask = mfc_read_tag_mask ();
+ mfc_write_tag_mask (1 << dma_tag);
+ mfc_read_tag_status_all ();
+ mfc_write_tag_mask (oldmask);
+
+ /* Leave critical section. */
+ if (__builtin_expect (mach_stat & 1, 0))
+ spu_ienable ();
+#else
+ /* Allocate a buffer large enough that we know it has 128 bytes
+ that are 128 byte aligned (for DMA). */
+
+ char buffer[LINE_SIZE + 127];
+ qword *buf_ptr = (qword *) (((unsigned int) (buffer) + 127) & ~127);
+ qword *line = GET_CACHE_LINE (entry->tag_lo[way], way);
+ qword bits;
+ unsigned int mach_stat;
+
+ /* Enter critical section. */
+ mach_stat = spu_readch (SPU_RdMachStat);
+ spu_idisable ();
+
+ do
+ {
+ /* We atomically read the current memory into a buffer
+ modify the dirty bytes in the buffer, and write it
+ back. If writeback fails, loop and try again. */
+
+ mfc_getllar (buf_ptr, tag, 0, 0);
+ mfc_read_atomic_status ();
+
+ /* The method we're using to write 16 dirty bytes into
+ the buffer at a time uses fsmb which in turn uses
+ the least significant 16 bits of word 0, so we
+ load the bits and rotate so that the first bit of
+ the bitmap is in the first bit that fsmb will use. */
+
+ bits = (qword) entry->dirty_bits[way];
+ bits = si_rotqbyi (bits, -2);
+
+ /* Si_fsmb creates the mask of dirty bytes.
+ Use selb to nab the appropriate bits. */
+ buf_ptr[0] = si_selb (buf_ptr[0], line[0], si_fsmb (bits));
+
+ /* Rotate to next 16 byte section of cache. */
+ bits = si_rotqbyi (bits, 2);
+
+ buf_ptr[1] = si_selb (buf_ptr[1], line[1], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+ buf_ptr[2] = si_selb (buf_ptr[2], line[2], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+ buf_ptr[3] = si_selb (buf_ptr[3], line[3], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+ buf_ptr[4] = si_selb (buf_ptr[4], line[4], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+ buf_ptr[5] = si_selb (buf_ptr[5], line[5], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+ buf_ptr[6] = si_selb (buf_ptr[6], line[6], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+ buf_ptr[7] = si_selb (buf_ptr[7], line[7], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+
+ mfc_putllc (buf_ptr, tag, 0, 0);
+ }
+ while (mfc_read_atomic_status ());
+
+ /* Leave critical section. */
+ if (__builtin_expect (mach_stat & 1, 0))
+ spu_ienable ();
+#endif
+ }
+
+ /* In any case, marking the lo tag with 1 which denotes empty. */
+ SET_EMPTY (entry, way);
+ entry->dirty_bits[way] = (vector unsigned short) si_from_uint (0);
+}
+
+void
+__cache_evict (__ea void *ea)
+{
+ addr tag = (addr) ea & ~TAG_MASK;
+ struct __cache_tag_array *entry = GET_ENTRY (ea);
+ int i = 0;
+
+ /* Cycles through all the possible ways an address could be at
+ and evicts the way if found. */
+
+ for (i = 0; i < WAYS; i++)
+ if (CHECK_TAG (entry, i, tag))
+ __cache_evict_entry (entry, i);
+}
+
+static void *
+__cache_fill (int way, addr tag)
+{
+ unsigned int oldmask, mach_stat;
+ char *line = ((void *) 0);
+
+ /* Reserve our DMA tag. */
+ if (dma_tag == 32)
+ dma_tag = mfc_tag_reserve ();
+
+ /* Enter critical section. */
+ mach_stat = spu_readch (SPU_RdMachStat);
+ spu_idisable ();
+
+ /* Issue DMA request. */
+ line = GET_CACHE_LINE (tag, way);
+ mfc_get (line, tag, LINE_SIZE, dma_tag, 0, 0);
+
+ /* Wait for DMA completion. */
+ oldmask = mfc_read_tag_mask ();
+ mfc_write_tag_mask (1 << dma_tag);
+ mfc_read_tag_status_all ();
+ mfc_write_tag_mask (oldmask);
+
+ /* Leave critical section. */
+ if (__builtin_expect (mach_stat & 1, 0))
+ spu_ienable ();
+
+ return (void *) line;
+}
+
+static void
+__cache_miss (__ea void *ea, struct __cache_tag_array *entry, int way)
+{
+
+ addr tag = (addr) ea & ~TAG_MASK;
+ unsigned int lru = 0;
+ int i = 0;
+ int idx = 0;
+
+ /* If way > 4, then there are no empty slots, so we must evict
+ the least recently used entry. */
+ if (way >= 4)
+ {
+ for (i = 0; i < WAYS; i++)
+ {
+ if (GET_LRU (entry, i) > lru)
+ {
+ lru = GET_LRU (entry, i);
+ idx = i;
+ }
+ }
+ __cache_evict_entry (entry, idx);
+ way = idx;
+ }
+
+ /* Set the empty entry's tag and fill it's cache line. */
+
+ SET_TAG (entry, way, tag);
+ entry->reserved[way] = 0;
+
+ /* Check if the address is just an effective address within the
+ SPU's local store. */
+
+ /* Because the LS is not 256k aligned, we can't do a nice and mask
+ here to compare, so we must check the whole range. */
+
+ if ((addr) ea >= (addr) __ea_local_store
+ && (addr) ea < (addr) (__ea_local_store + 0x40000))
+ {
+ SET_IS_LS (entry, way);
+ entry->base[way] =
+ (void *) ((unsigned int) ((addr) ea -
+ (addr) __ea_local_store) & ~0x7f);
+ }
+ else
+ {
+ entry->base[way] = __cache_fill (way, tag);
+ }
+}
+
+void *
+__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty)
+{
+#ifdef __EA64__
+ unsigned int tag_hi;
+ qword etag_hi;
+#endif
+ unsigned int tag_lo;
+ struct __cache_tag_array *entry;
+
+ qword etag_lo;
+ qword equal;
+ qword bit_mask;
+ qword way;
+
+ /* This first chunk, we merely fill the pointer and tag. */
+
+ entry = GET_ENTRY (ea);
+
+#ifndef __EA64__
+ tag_lo =
+ si_to_uint (si_andc
+ (si_shufb
+ (si_from_uint ((addr) ea), si_from_uint (0),
+ si_from_uint (0x00010203)), si_from_uint (TAG_MASK)));
+#else
+ tag_lo =
+ si_to_uint (si_andc
+ (si_shufb
+ (si_from_ullong ((addr) ea), si_from_uint (0),
+ si_from_uint (0x04050607)), si_from_uint (TAG_MASK)));
+
+ tag_hi =
+ si_to_uint (si_shufb
+ (si_from_ullong ((addr) ea), si_from_uint (0),
+ si_from_uint (0x00010203)));
+#endif
+
+ /* Increment LRU in reserved bytes. */
+ si_stqd (si_ai (si_lqd (si_from_ptr (entry), 48), 1),
+ si_from_ptr (entry), 48);
+
+missreturn:
+ /* Check if the entry's lo_tag is equal to the address' lo_tag. */
+ etag_lo = si_lqd (si_from_ptr (entry), 0);
+ equal = si_ceq (etag_lo, si_from_uint (tag_lo));
+#ifdef __EA64__
+ /* And the high tag too. */
+ etag_hi = si_lqd (si_from_ptr (entry), 16);
+ equal = si_and (equal, (si_ceq (etag_hi, si_from_uint (tag_hi))));
+#endif
+
+ if ((si_to_uint (si_orx (equal)) == 0))
+ goto misshandler;
+
+ if (n_bytes_dirty)
+ {
+ /* way = 0x40,0x50,0x60,0x70 for each way, which is also the
+ offset of the appropriate dirty bits. */
+ way = si_shli (si_clz (si_gbb (equal)), 2);
+
+ /* To create the bit_mask, we set it to all 1s (uint -1), then we
+ shift it over (128 - n_bytes_dirty) times. */
+
+ bit_mask = si_from_uint (-1);
+
+ bit_mask =
+ si_shlqby (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) / 8));
+
+ bit_mask =
+ si_shlqbi (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) % 8));
+
+ /* Rotate it around to the correct offset. */
+ bit_mask =
+ si_rotqby (bit_mask,
+ si_from_uint (-1 * ((addr) ea & TAG_MASK) / 8));
+
+ bit_mask =
+ si_rotqbi (bit_mask,
+ si_from_uint (-1 * ((addr) ea & TAG_MASK) % 8));
+
+ /* Update the dirty bits. */
+ si_stqx (si_or (si_lqx (si_from_ptr (entry), way), bit_mask),
+ si_from_ptr (entry), way);
+ };
+
+ /* We've definitely found the right entry, set LRU (reserved) to 0
+ maintaining the LS flag (MSB). */
+
+ si_stqd (si_andc
+ (si_lqd (si_from_ptr (entry), 48),
+ si_and (equal, si_from_uint (~(LS_FLAG)))),
+ si_from_ptr (entry), 48);
+
+ return (void *)
+ si_to_uint (si_a
+ (si_orx
+ (si_and (si_lqd (si_from_ptr (entry), 32), equal)),
+ si_from_uint (((unsigned int) (addr) ea) & TAG_MASK)));
+
+misshandler:
+ equal = si_ceqi (etag_lo, 1);
+ __cache_miss (ea, entry, (si_to_uint (si_clz (si_gbb (equal))) - 16) >> 2);
+ goto missreturn;
+}
+
+void *
+__cache_fetch (__ea void *ea)
+{
+ return __cache_fetch_dirty (ea, 0);
+}
+
+void
+__cache_touch (__ea void *ea __attribute__ ((unused)))
+{
+ /* NO-OP for now. */
+}
+
+void __cache_flush (void) __attribute__ ((destructor));
+void
+__cache_flush (void)
+{
+ struct __cache_tag_array *entry = __cache_tag_array;
+ unsigned int i;
+ int j;
+
+ /* Cycle through each cache entry and evict all used ways. */
+
+ for (i = 0; i < CACHE_LINES / WAYS; i++)
+ {
+ for (j = 0; j < WAYS; j++)
+ if (!CHECK_EMPTY (entry, j))
+ __cache_evict_entry (entry, j);
+
+ entry++;
+ }
+}
diff --git a/gcc-4.9/libgcc/config/spu/divmodti4.c b/gcc-4.9/libgcc/config/spu/divmodti4.c
new file mode 100644
index 000000000..79192a987
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/divmodti4.c
@@ -0,0 +1,188 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <spu_intrinsics.h>
+
+typedef unsigned int UTItype __attribute__ ((mode (TI)));
+typedef int TItype __attribute__ ((mode (TI)));
+TItype __divti3 (TItype u, TItype v);
+TItype __modti3 (TItype u, TItype v);
+UTItype __udivti3 (UTItype u, UTItype v);
+UTItype __umodti3 (UTItype u, UTItype v);
+UTItype __udivmodti4 (UTItype u, UTItype v, UTItype *w);
+
+union qword_UTItype
+ {
+ qword q;
+ UTItype t;
+ };
+
+inline static qword
+si_from_UTItype (UTItype t)
+{
+ union qword_UTItype u;
+ u.t = t;
+ return u.q;
+}
+
+inline static UTItype
+si_to_UTItype (qword q)
+{
+ union qword_UTItype u;
+ u.q = q;
+ return u.t;
+}
+
+inline static unsigned int
+count_leading_zeros (UTItype x)
+{
+ qword c = si_clz (*(qword *) & x);
+ qword cmp0 = si_cgti (c, 31);
+ qword cmp1 = si_and (cmp0, si_shlqbyi (cmp0, 4));
+ qword cmp2 = si_and (cmp1, si_shlqbyi (cmp0, 8));
+ qword s = si_a (c, si_and (cmp0, si_shlqbyi (c, 4)));
+ s = si_a (s, si_and (cmp1, si_shlqbyi (c, 8)));
+ s = si_a (s, si_and (cmp2, si_shlqbyi (c, 12)));
+ return si_to_uint (s);
+}
+
+/* Based on implementation of udivmodsi4, which is essentially
+ * an optimized version of libgcc/udivmodsi4.c
+ clz %7,%2
+ clz %4,%1
+ il %5,1
+ fsmbi %0,0
+ sf %7,%4,%7
+ ori %3,%1,0
+ shl %5,%5,%7
+ shl %4,%2,%7
+1: or %8,%0,%5
+ rotmi %5,%5,-1
+ clgt %6,%4,%3
+ sf %7,%4,%3
+ rotmi %4,%4,-1
+ selb %0,%8,%0,%6
+ selb %3,%7,%3,%6
+3: brnz %5,1b
+ */
+
+UTItype
+__udivmodti4 (UTItype num, UTItype den, UTItype * rp)
+{
+ qword shift =
+ si_from_uint (count_leading_zeros (den) - count_leading_zeros (num));
+ qword n0 = si_from_UTItype (num);
+ qword d0 = si_from_UTItype (den);
+ qword bit = si_andi (si_fsmbi (1), 1);
+ qword r0 = si_il (0);
+ qword m1 = si_fsmbi (0x000f);
+ qword mask, r1, n1;
+
+ d0 = si_shlqbybi (si_shlqbi (d0, shift), shift);
+ bit = si_shlqbybi (si_shlqbi (bit, shift), shift);
+
+ do
+ {
+ r1 = si_or (r0, bit);
+
+ // n1 = n0 - d0 in TImode
+ n1 = si_bg (d0, n0);
+ n1 = si_shlqbyi (n1, 4);
+ n1 = si_sf (m1, n1);
+ n1 = si_bgx (d0, n0, n1);
+ n1 = si_shlqbyi (n1, 4);
+ n1 = si_sf (m1, n1);
+ n1 = si_bgx (d0, n0, n1);
+ n1 = si_shlqbyi (n1, 4);
+ n1 = si_sf (m1, n1);
+ n1 = si_sfx (d0, n0, n1);
+
+ mask = si_fsm (si_cgti (n1, -1));
+ r0 = si_selb (r0, r1, mask);
+ n0 = si_selb (n0, n1, mask);
+ bit = si_rotqmbii (bit, -1);
+ d0 = si_rotqmbii (d0, -1);
+ }
+ while (si_to_uint (si_orx (bit)));
+ if (rp)
+ *rp = si_to_UTItype (n0);
+ return si_to_UTItype (r0);
+}
+
+UTItype
+__udivti3 (UTItype n, UTItype d)
+{
+ return __udivmodti4 (n, d, (UTItype *)0);
+}
+
+UTItype
+__umodti3 (UTItype n, UTItype d)
+{
+ UTItype w;
+ __udivmodti4 (n, d, &w);
+ return w;
+}
+
+TItype
+__divti3 (TItype n, TItype d)
+{
+ int c = 0;
+ TItype w;
+
+ if (n < 0)
+ {
+ c = ~c;
+ n = -n;
+ }
+ if (d < 0)
+ {
+ c = ~c;
+ d = -d;
+ }
+
+ w = __udivmodti4 (n, d, (UTItype *)0);
+ if (c)
+ w = -w;
+ return w;
+}
+
+TItype
+__modti3 (TItype n, TItype d)
+{
+ int c = 0;
+ TItype w;
+
+ if (n < 0)
+ {
+ c = ~c;
+ n = -n;
+ }
+ if (d < 0)
+ {
+ c = ~c;
+ d = -d;
+ }
+
+ __udivmodti4 (n, d, (UTItype *) &w);
+ if (c)
+ w = -w;
+ return w;
+}
diff --git a/gcc-4.9/libgcc/config/spu/divv2df3.c b/gcc-4.9/libgcc/config/spu/divv2df3.c
new file mode 100644
index 000000000..aca64d2a4
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/divv2df3.c
@@ -0,0 +1,195 @@
+/* Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <spu_intrinsics.h>
+
+vector double __divv2df3 (vector double a_in, vector double b_in);
+
+/* __divv2df3 divides the vector dividend a by the vector divisor b and
+ returns the resulting vector quotient. Maximum error about 0.5 ulp
+ over entire double range including denorms, compared to true result
+ in round-to-nearest rounding mode. Handles Inf or NaN operands and
+ results correctly. */
+
+vector double
+__divv2df3 (vector double a_in, vector double b_in)
+{
+ /* Variables */
+ vec_int4 exp, exp_bias;
+ vec_uint4 no_underflow, overflow;
+ vec_float4 mant_bf, inv_bf;
+ vec_ullong2 exp_a, exp_b;
+ vec_ullong2 a_nan, a_zero, a_inf, a_denorm, a_denorm0;
+ vec_ullong2 b_nan, b_zero, b_inf, b_denorm, b_denorm0;
+ vec_ullong2 nan;
+ vec_uint4 a_exp, b_exp;
+ vec_ullong2 a_mant_0, b_mant_0;
+ vec_ullong2 a_exp_1s, b_exp_1s;
+ vec_ullong2 sign_exp_mask;
+
+ vec_double2 a, b;
+ vec_double2 mant_a, mant_b, inv_b, q0, q1, q2, mult;
+
+ /* Constants */
+ vec_uint4 exp_mask_u32 = spu_splats((unsigned int)0x7FF00000);
+ vec_uchar16 splat_hi = (vec_uchar16){0,1,2,3, 0,1,2,3, 8, 9,10,11, 8,9,10,11};
+ vec_uchar16 swap_32 = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
+ vec_ullong2 exp_mask = spu_splats(0x7FF0000000000000ULL);
+ vec_ullong2 sign_mask = spu_splats(0x8000000000000000ULL);
+ vec_float4 onef = spu_splats(1.0f);
+ vec_double2 one = spu_splats(1.0);
+ vec_double2 exp_53 = (vec_double2)spu_splats(0x0350000000000000ULL);
+
+ sign_exp_mask = spu_or(sign_mask, exp_mask);
+
+ /* Extract the floating point components from each of the operands including
+ * exponent and mantissa.
+ */
+ a_exp = (vec_uint4)spu_and((vec_uint4)a_in, exp_mask_u32);
+ a_exp = spu_shuffle(a_exp, a_exp, splat_hi);
+ b_exp = (vec_uint4)spu_and((vec_uint4)b_in, exp_mask_u32);
+ b_exp = spu_shuffle(b_exp, b_exp, splat_hi);
+
+ a_mant_0 = (vec_ullong2)spu_cmpeq((vec_uint4)spu_andc((vec_ullong2)a_in, sign_exp_mask), 0);
+ a_mant_0 = spu_and(a_mant_0, spu_shuffle(a_mant_0, a_mant_0, swap_32));
+
+ b_mant_0 = (vec_ullong2)spu_cmpeq((vec_uint4)spu_andc((vec_ullong2)b_in, sign_exp_mask), 0);
+ b_mant_0 = spu_and(b_mant_0, spu_shuffle(b_mant_0, b_mant_0, swap_32));
+
+ a_exp_1s = (vec_ullong2)spu_cmpeq(a_exp, exp_mask_u32);
+ b_exp_1s = (vec_ullong2)spu_cmpeq(b_exp, exp_mask_u32);
+
+ /* Identify all possible special values that must be accommodated including:
+ * +-denorm, +-0, +-infinity, and NaNs.
+ */
+ a_denorm0= (vec_ullong2)spu_cmpeq(a_exp, 0);
+ a_nan = spu_andc(a_exp_1s, a_mant_0);
+ a_zero = spu_and (a_denorm0, a_mant_0);
+ a_inf = spu_and (a_exp_1s, a_mant_0);
+ a_denorm = spu_andc(a_denorm0, a_zero);
+
+ b_denorm0= (vec_ullong2)spu_cmpeq(b_exp, 0);
+ b_nan = spu_andc(b_exp_1s, b_mant_0);
+ b_zero = spu_and (b_denorm0, b_mant_0);
+ b_inf = spu_and (b_exp_1s, b_mant_0);
+ b_denorm = spu_andc(b_denorm0, b_zero);
+
+ /* Scale denorm inputs to into normalized numbers by conditionally scaling the
+ * input parameters.
+ */
+ a = spu_sub(spu_or(a_in, exp_53), spu_sel(exp_53, a_in, sign_mask));
+ a = spu_sel(a_in, a, a_denorm);
+
+ b = spu_sub(spu_or(b_in, exp_53), spu_sel(exp_53, b_in, sign_mask));
+ b = spu_sel(b_in, b, b_denorm);
+
+ /* Extract the divisor and dividend exponent and force parameters into the signed
+ * range [1.0,2.0) or [-1.0,2.0).
+ */
+ exp_a = spu_and((vec_ullong2)a, exp_mask);
+ exp_b = spu_and((vec_ullong2)b, exp_mask);
+
+ mant_a = spu_sel(a, one, (vec_ullong2)exp_mask);
+ mant_b = spu_sel(b, one, (vec_ullong2)exp_mask);
+
+ /* Approximate the single reciprocal of b by using
+ * the single precision reciprocal estimate followed by one
+ * single precision iteration of Newton-Raphson.
+ */
+ mant_bf = spu_roundtf(mant_b);
+ inv_bf = spu_re(mant_bf);
+ inv_bf = spu_madd(spu_nmsub(mant_bf, inv_bf, onef), inv_bf, inv_bf);
+
+ /* Perform 2 more Newton-Raphson iterations in double precision. The
+ * result (q1) is in the range (0.5, 2.0).
+ */
+ inv_b = spu_extend(inv_bf);
+ inv_b = spu_madd(spu_nmsub(mant_b, inv_b, one), inv_b, inv_b);
+ q0 = spu_mul(mant_a, inv_b);
+ q1 = spu_madd(spu_nmsub(mant_b, q0, mant_a), inv_b, q0);
+
+ /* Determine the exponent correction factor that must be applied
+ * to q1 by taking into account the exponent of the normalized inputs
+ * and the scale factors that were applied to normalize them.
+ */
+ exp = spu_rlmaska(spu_sub((vec_int4)exp_a, (vec_int4)exp_b), -20);
+ exp = spu_add(exp, (vec_int4)spu_add(spu_and((vec_int4)a_denorm, -0x34), spu_and((vec_int4)b_denorm, 0x34)));
+
+ /* Bias the quotient exponent depending on the sign of the exponent correction
+ * factor so that a single multiplier will ensure the entire double precision
+ * domain (including denorms) can be achieved.
+ *
+ * exp bias q1 adjust exp
+ * ===== ======== ==========
+ * positive 2^+65 -65
+ * negative 2^-64 +64
+ */
+ exp_bias = spu_xor(spu_rlmaska(exp, -31), 64);
+ exp = spu_sub(exp, exp_bias);
+
+ q1 = spu_sel(q1, (vec_double2)spu_add((vec_int4)q1, spu_sl(exp_bias, 20)), exp_mask);
+
+ /* Compute a multiplier (mult) to applied to the quotient (q1) to produce the
+ * expected result. On overflow, clamp the multiplier to the maximum non-infinite
+ * number in case the rounding mode is not round-to-nearest.
+ */
+ exp = spu_add(exp, 0x3FF);
+ no_underflow = spu_cmpgt(exp, 0);
+ overflow = spu_cmpgt(exp, 0x7FE);
+ exp = spu_and(spu_sl(exp, 20), (vec_int4)no_underflow);
+ exp = spu_and(exp, (vec_int4)exp_mask);
+
+ mult = spu_sel((vec_double2)exp, (vec_double2)(spu_add((vec_uint4)exp_mask, -1)), (vec_ullong2)overflow);
+
+ /* Handle special value conditions. These include:
+ *
+ * 1) IF either operand is a NaN OR both operands are 0 or INFINITY THEN a NaN
+ * results.
+ * 2) ELSE IF the dividend is an INFINITY OR the divisor is 0 THEN a INFINITY results.
+ * 3) ELSE IF the dividend is 0 OR the divisor is INFINITY THEN a 0 results.
+ */
+ mult = spu_andc(mult, (vec_double2)spu_or(a_zero, b_inf));
+ mult = spu_sel(mult, (vec_double2)exp_mask, spu_or(a_inf, b_zero));
+
+ nan = spu_or(a_nan, b_nan);
+ nan = spu_or(nan, spu_and(a_zero, b_zero));
+ nan = spu_or(nan, spu_and(a_inf, b_inf));
+
+ mult = spu_or(mult, (vec_double2)nan);
+
+ /* Scale the final quotient */
+
+ q2 = spu_mul(q1, mult);
+
+ return (q2);
+}
+
+
+/* We use the same function for vector and scalar division. Provide the
+ scalar entry point as an alias. */
+double __divdf3 (double a, double b)
+ __attribute__ ((__alias__ ("__divv2df3")));
+
+/* Some toolchain builds used the __fast_divdf3 name for this helper function.
+ Provide this as another alternate entry point for compatibility. */
+double __fast_divdf3 (double a, double b)
+ __attribute__ ((__alias__ ("__divv2df3")));
+
diff --git a/gcc-4.9/libgcc/config/spu/float_disf.c b/gcc-4.9/libgcc/config/spu/float_disf.c
new file mode 100644
index 000000000..971ea1c01
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/float_disf.c
@@ -0,0 +1,31 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Prototype. */
+float __floatdisf (long long x);
+
+float __floatdisf (long long x)
+{
+ /* The SPU back-end now generates inline code for this conversion.
+ This file is solely used to provide the __floatdisf functions
+ for objects generated with prior versions of GCC. */
+ return x;
+}
diff --git a/gcc-4.9/libgcc/config/spu/float_unsdidf.c b/gcc-4.9/libgcc/config/spu/float_unsdidf.c
new file mode 100644
index 000000000..ad760637b
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/float_unsdidf.c
@@ -0,0 +1,54 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <spu_intrinsics.h>
+const unsigned char __didf_scale[16] __attribute__ ((__aligned__ (16))) = {
+ 0x00, 0x00, 0x04, 0x3e,
+ 0x00, 0x00, 0x04, 0x1e,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+};
+const unsigned char __didf_pat[16] __attribute__ ((__aligned__ (16))) = {
+ 0x02, 0x03, 0x10, 0x11,
+ 0x12, 0x13, 0x80, 0x80,
+ 0x06, 0x07, 0x14, 0x15,
+ 0x16, 0x17, 0x80, 0x80
+};
+
+/* double __float_unsdidf (unsigned long long int)
+ Construct two exact doubles representing the high and low parts (in
+ parallel), then add them. */
+qword __float_unsdidf (qword DI);
+qword
+__float_unsdidf (qword DI)
+{
+ qword t0, t1, t2, t3, t4, t5, t6, t7, t8;
+ t0 = si_clz (DI);
+ t1 = si_shl (DI, t0);
+ t2 = si_ceqi (t0, 32);
+ t3 = si_sf (t0, *(const qword *) __didf_scale);
+ t4 = si_a (t1, t1);
+ t5 = si_andc (t3, t2);
+ t6 = si_shufb (t5, t4, *(const qword *) __didf_pat);
+ t7 = si_shlqbii (t6, 4);
+ t8 = si_shlqbyi (t7, 8);
+ return si_dfa (t7, t8);
+}
diff --git a/gcc-4.9/libgcc/config/spu/float_unsdisf.c b/gcc-4.9/libgcc/config/spu/float_unsdisf.c
new file mode 100644
index 000000000..ae43eecbc
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/float_unsdisf.c
@@ -0,0 +1,31 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Prototype. */
+float __floatundisf (unsigned long long x);
+
+float __floatundisf (unsigned long long x)
+{
+ /* The SPU back-end now generates inline code for this conversion.
+ This file is solely used to provide the __floatundisf function
+ for objects generated with prior versions of GCC. */
+ return x;
+}
diff --git a/gcc-4.9/libgcc/config/spu/float_unssidf.c b/gcc-4.9/libgcc/config/spu/float_unssidf.c
new file mode 100644
index 000000000..54b8f5874
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/float_unssidf.c
@@ -0,0 +1,45 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <spu_intrinsics.h>
+const unsigned char __sidf_pat[16] __attribute__ ((__aligned__ (16))) = {
+ 0x02, 0x03, 0x10, 0x11,
+ 0x12, 0x13, 0x80, 0x80,
+ 0x06, 0x07, 0x14, 0x15,
+ 0x16, 0x17, 0x80, 0x80
+};
+
+/* double __float_unssidf (unsigned int SI) */
+qword __float_unssidf (qword SI);
+qword
+__float_unssidf (qword SI)
+{
+ qword t0, t1, t2, t3, t4, t5, t6, t7;
+ t0 = si_clz (SI);
+ t1 = si_il (1054);
+ t2 = si_shl (SI, t0);
+ t3 = si_ceqi (t0, 32);
+ t4 = si_sf (t0, t1);
+ t5 = si_a (t2, t2);
+ t6 = si_andc (t4, t3);
+ t7 = si_shufb (t6, t5, *(const qword *) __sidf_pat);
+ return si_shlqbii (t7, 4);
+}
diff --git a/gcc-4.9/libgcc/config/spu/mfc_multi_tag_release.c b/gcc-4.9/libgcc/config/spu/mfc_multi_tag_release.c
new file mode 100644
index 000000000..ceddfd8c0
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/mfc_multi_tag_release.c
@@ -0,0 +1,72 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <spu_mfcio.h>
+extern vector unsigned int __mfc_tag_table;
+
+/* Release a sequential group of tags from exclusive use. The sequential
+ group of tags is the range starting from <first_tag> through
+ <first_tag>+<number_of_tags>-1. Upon successful release, MFC_DMA_TAG_VALID
+ is returned and the tags become available for future reservation.
+
+ If the specified tags were not previously reserved, no action is
+ taken and MFC_DMA_TAG_INVALID is returned. */
+
+unsigned int
+__mfc_multi_tag_release (unsigned int first_tag, unsigned int number_of_tags)
+{
+ vector unsigned int table_copy, tmp, tmp1;
+ vector unsigned int one = (vector unsigned int)
+ { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
+ vector unsigned int is_invalid;
+ unsigned int last_tag;
+ vector unsigned int has_been_reserved;
+
+ last_tag = first_tag + number_of_tags;
+
+ table_copy = spu_sl (one, number_of_tags);
+ table_copy = spu_rl (table_copy, -last_tag);
+ table_copy = spu_xor (table_copy, -1);
+
+ /* Make sure the tags are in range and valid. */
+ tmp = spu_cmpgt (spu_promote(last_tag, 0), 32);
+ tmp1 = spu_cmpgt (spu_promote(number_of_tags, 0), 32);
+ is_invalid = spu_cmpgt (spu_promote(first_tag, 0), 31);
+
+ /* All bits are set to 1 if invalid, 0 if valid. */
+ is_invalid = spu_or (tmp, is_invalid);
+ is_invalid = spu_or (tmp1, is_invalid);
+
+ /* check whether these tags have been reserved */
+ tmp = spu_rlmask (one, (int)-number_of_tags);
+ tmp1 = spu_sl (__mfc_tag_table, first_tag);
+ has_been_reserved = spu_cmpgt(tmp1, tmp);
+
+ is_invalid = spu_or (has_been_reserved, is_invalid);
+
+ table_copy = spu_sel (__mfc_tag_table, table_copy, table_copy);
+ __mfc_tag_table = spu_sel (table_copy, __mfc_tag_table, is_invalid);
+
+ return spu_extract (is_invalid, 0);
+}
+
diff --git a/gcc-4.9/libgcc/config/spu/mfc_multi_tag_reserve.c b/gcc-4.9/libgcc/config/spu/mfc_multi_tag_reserve.c
new file mode 100644
index 000000000..1ced0afbb
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/mfc_multi_tag_reserve.c
@@ -0,0 +1,84 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <spu_mfcio.h>
+extern vector unsigned int __mfc_tag_table;
+
+/* Reserve a sequential group of tags for exclusive use. The number of
+ tags to be reserved is specified by the <number_of_tags> parameter.
+ This routine returns the first tag ID for a sequential list of
+ available tags and marks them as reserved. The reserved group
+ of tags is in the range starting from the returned tag through
+ the returned tag + <number_of_tags>-1.
+
+ If the number of tags requested exceeds the number of available
+ sequential tags, then MFC_DMA_TAG_INVALID is returned indicating
+ that the request could not be serviced. */
+
+unsigned int
+__mfc_multi_tag_reserve (unsigned int number_of_tags)
+{
+ vector unsigned int table_copy;
+ vector unsigned int one = (vector unsigned int)
+ { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
+ vector unsigned int count_busy, is_valid;
+ vector unsigned int count_total;
+ vector unsigned int count_avail = (vector unsigned int) { 0, 0, 0, 0 };
+ vector unsigned int index = (vector unsigned int) { 0, 0, 0, 0 };
+
+ table_copy = __mfc_tag_table;
+
+
+ /* count_busy: number of consecutive busy tags
+ count_avail: number of consecutive free tags
+ table_copy: temporary copy of the tag table
+ count_total: sum of count_busy and count_avail
+ index: index of the current working tag */
+ do
+ {
+ table_copy = spu_sl (table_copy, count_avail);
+
+ count_busy = spu_cntlz (table_copy);
+ table_copy = spu_sl (table_copy, count_busy);
+ count_avail = spu_cntlz (spu_xor(table_copy, -1));
+ count_total = spu_add (count_busy, count_avail);
+ index = spu_add (index, count_total);
+ }
+ while (spu_extract (count_avail, 0) < number_of_tags
+ && spu_extract (table_copy, 0) != 0);
+
+ index = spu_sub (index, count_avail);
+
+ /* is_valid is set to 0xFFFFFFFF if table_copy == 0, 0 otherwise. */
+ is_valid = spu_cmpeq (table_copy, 0);
+ index = spu_sel (index, is_valid, is_valid);
+
+ /* Now I need to actually mark the tags as used. */
+ table_copy = spu_sl (one, number_of_tags);
+ table_copy = spu_rl (table_copy, -number_of_tags - spu_extract (index, 0));
+ table_copy = spu_sel (table_copy, __mfc_tag_table, table_copy);
+ __mfc_tag_table = spu_sel (table_copy, __mfc_tag_table, is_valid);
+
+ return spu_extract (index, 0);
+}
+
diff --git a/gcc-4.9/libgcc/config/spu/mfc_tag_release.c b/gcc-4.9/libgcc/config/spu/mfc_tag_release.c
new file mode 100644
index 000000000..b3de2da9c
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/mfc_tag_release.c
@@ -0,0 +1,59 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <spu_mfcio.h>
+extern vector unsigned int __mfc_tag_table;
+
+/* Release the specified DMA tag from exclusive use. Once released, the
+ tag is available for future reservation. Upon successful release,
+ MFC_DMA_TAG_VALID is returned. If the specified tag is not in the
+ range 0 to 31, or had not been reserved, no action is taken and
+ MFC_DMA_TAG_INVALID is returned. */
+
+unsigned int
+__mfc_tag_release (unsigned int tag)
+{
+ vector unsigned int is_invalid;
+ vector unsigned int mask = (vector unsigned int)
+ { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+ vector signed int zero = (vector signed int) { 0, 0, 0, 0 };
+
+ vector signed int has_been_reserved;
+
+ /* Check if the tag is out of range. */
+ is_invalid = spu_cmpgt (spu_promote (tag, 0), 31);
+
+ /* Check whether the tag has been reserved, set to all 1 if has not
+ been reserved, 0 otherwise. */
+ has_been_reserved = (vector signed int) spu_rl (__mfc_tag_table, tag);
+ has_been_reserved = (vector signed int) spu_cmpgt (zero, has_been_reserved);
+
+ /* Set invalid. */
+ is_invalid = spu_or ((vector unsigned int) has_been_reserved, is_invalid);
+
+ mask = spu_rlmask (mask, (int)(-tag));
+ __mfc_tag_table = spu_or (__mfc_tag_table, mask);
+
+ return spu_extract(is_invalid, 0);
+}
+
diff --git a/gcc-4.9/libgcc/config/spu/mfc_tag_reserve.c b/gcc-4.9/libgcc/config/spu/mfc_tag_reserve.c
new file mode 100644
index 000000000..49626d1c4
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/mfc_tag_reserve.c
@@ -0,0 +1,51 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <spu_mfcio.h>
+extern vector unsigned int __mfc_tag_table;
+
+/* Reserves a DMA tag for exclusive use. This routine returns an available
+ tag id in the range 0 to 31 and marks the tag as reserved. If no tags
+ are available, MFC_DMA_TAG_INVALID is returned indicating that all tags
+ are already reserved. */
+
+unsigned int
+__mfc_tag_reserve (void)
+{
+ vector unsigned int mask = (vector unsigned int)
+ { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+ vector unsigned int count_zeros, is_valid;
+ vector signed int count_neg;
+
+ count_zeros = spu_cntlz (__mfc_tag_table);
+ count_neg = spu_sub (0, (vector signed int) count_zeros);
+
+ mask = spu_rlmask (mask, (vector signed int) count_neg);
+ __mfc_tag_table = spu_andc (__mfc_tag_table, mask);
+
+ is_valid = spu_cmpeq (count_zeros, 32);
+ count_zeros = spu_sel (count_zeros, is_valid, is_valid);
+
+ return spu_extract (count_zeros, 0);
+}
+
diff --git a/gcc-4.9/libgcc/config/spu/mfc_tag_table.c b/gcc-4.9/libgcc/config/spu/mfc_tag_table.c
new file mode 100644
index 000000000..92e00d9be
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/mfc_tag_table.c
@@ -0,0 +1,39 @@
+/* Copyright (C) 2007-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* The free tag table used by the MFC tag manager, with tag0
+ reserved for the overlay manager. */
+__vector unsigned int
+__mfc_tag_table = (__vector unsigned int) { 0x7FFFFFFF, -1, -1, -1 };
+
+/* Arrange to release tag0 if overlays are not present. */
+static void __mfc_tag_init (void) __attribute__ ((constructor));
+
+static void
+__mfc_tag_init (void)
+{
+ extern void _ovly_table __attribute__ ((weak));
+
+ if (&_ovly_table == 0)
+ __mfc_tag_table = (__vector unsigned int) { -1, -1, -1, -1 };
+}
diff --git a/gcc-4.9/libgcc/config/spu/multi3.c b/gcc-4.9/libgcc/config/spu/multi3.c
new file mode 100644
index 000000000..69097fa0e
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/multi3.c
@@ -0,0 +1,119 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <spu_intrinsics.h>
+
+typedef int TItype __attribute__ ((mode (TI)));
+
+union qword_TItype
+ {
+ qword q;
+ TItype t;
+ };
+
+inline static qword
+si_from_TItype (TItype t)
+{
+ union qword_TItype u;
+ u.t = t;
+ return u.q;
+}
+
+inline static TItype
+si_to_TItype (qword q)
+{
+ union qword_TItype u;
+ u.q = q;
+ return u.t;
+}
+
+/* A straight forward vectorization and unrolling of
+ * short l[8], r[8];
+ * TItype total = 0;
+ * for (i = 0; i < 8; i++)
+ * for (j = 0; j < 8; j++)
+ * total += (TItype)((l[7-i] * r[7-j]) << (16 * (i + j)));
+ */
+TItype
+__multi3 (TItype l, TItype r)
+{
+ qword u = si_from_TItype (l);
+ qword v = si_from_TItype (r);
+ qword splat0 = si_shufb (v, v, si_ilh (0x0001));
+ qword splat1 = si_shufb (v, v, si_ilh (0x0203));
+ qword splat2 = si_shufb (v, v, si_ilh (0x0405));
+ qword splat3 = si_shufb (v, v, si_ilh (0x0607));
+ qword splat4 = si_shufb (v, v, si_ilh (0x0809));
+ qword splat5 = si_shufb (v, v, si_ilh (0x0a0b));
+ qword splat6 = si_shufb (v, v, si_ilh (0x0c0d));
+ qword splat7 = si_shufb (v, v, si_ilh (0x0e0f));
+
+ qword part0l = si_shlqbyi (si_mpyu (u, splat0), 14);
+ qword part1h = si_shlqbyi (si_mpyhhu (u, splat1), 14);
+ qword part1l = si_shlqbyi (si_mpyu (u, splat1), 12);
+ qword part2h = si_shlqbyi (si_mpyhhu (u, splat2), 12);
+ qword part2l = si_shlqbyi (si_mpyu (u, splat2), 10);
+ qword part3h = si_shlqbyi (si_mpyhhu (u, splat3), 10);
+ qword part3l = si_shlqbyi (si_mpyu (u, splat3), 8);
+ qword part4h = si_shlqbyi (si_mpyhhu (u, splat4), 8);
+ qword part4l = si_shlqbyi (si_mpyu (u, splat4), 6);
+ qword part5h = si_shlqbyi (si_mpyhhu (u, splat5), 6);
+ qword part5l = si_shlqbyi (si_mpyu (u, splat5), 4);
+ qword part6h = si_shlqbyi (si_mpyhhu (u, splat6), 4);
+ qword part6l = si_shlqbyi (si_mpyu (u, splat6), 2);
+ qword part7h = si_shlqbyi (si_mpyhhu (u, splat7), 2);
+ qword part7l = si_mpyu (u, splat7);
+
+ qword carry, total0, total1, total2, total3, total4;
+ qword total5, total6, total7, total8, total9, total10;
+ qword total;
+
+ total0 = si_a (si_a (si_a (part0l, part1h), si_a (part1l, part2h)), part7l);
+ total1 = si_a (part2l, part3h);
+ total2 = si_a (part3l, part4h);
+ total3 = si_a (part4l, part5h);
+ total4 = si_a (part5l, part6h);
+ total5 = si_a (part6l, part7h);
+ total6 = si_a (total0, total1);
+ total7 = si_a (total2, total3);
+ total8 = si_a (total4, total5);
+ total9 = si_a (total6, total7);
+ total10 = si_a (total8, total9);
+
+ carry = si_cg (part2l, part3h);
+ carry = si_a (carry, si_cg (part3l, part4h));
+ carry = si_a (carry, si_cg (part4l, part5h));
+ carry = si_a (carry, si_cg (part5l, part6h));
+ carry = si_a (carry, si_cg (part6l, part7h));
+ carry = si_a (carry, si_cg (total0, total1));
+ carry = si_a (carry, si_cg (total2, total3));
+ carry = si_a (carry, si_cg (total4, total5));
+ carry = si_a (carry, si_cg (total6, total7));
+ carry = si_a (carry, si_cg (total8, total9));
+ carry = si_shlqbyi (carry, 4);
+
+ total = si_cg (total10, carry);
+ total = si_shlqbyi (total, 4);
+ total = si_cgx (total10, carry, total);
+ total = si_shlqbyi (total, 4);
+ total = si_addx (total10, carry, total);
+ return si_to_TItype (total);
+}
diff --git a/gcc-4.9/libgcc/config/spu/t-elf b/gcc-4.9/libgcc/config/spu/t-elf
new file mode 100644
index 000000000..29536e881
--- /dev/null
+++ b/gcc-4.9/libgcc/config/spu/t-elf
@@ -0,0 +1,59 @@
+# Don't let CTOR_LIST end up in sdata section.
+# FIXME: This is the default.
+CRTSTUFF_T_CFLAGS =
+
+# We exclude those because the libgcc2.c default versions do not support
+# the SPU single-precision format (round towards zero). We provide our
+# own versions below and/or via direct expansion.
+LIB2FUNCS_EXCLUDE = _floatdisf _floatundisf _floattisf _floatunstisf
+
+LIB2ADD_ST = $(srcdir)/config/spu/float_unssidf.c \
+ $(srcdir)/config/spu/float_unsdidf.c \
+ $(srcdir)/config/spu/float_unsdisf.c \
+ $(srcdir)/config/spu/float_disf.c \
+ $(srcdir)/config/spu/mfc_tag_table.c \
+ $(srcdir)/config/spu/mfc_tag_reserve.c \
+ $(srcdir)/config/spu/mfc_tag_release.c \
+ $(srcdir)/config/spu/mfc_multi_tag_reserve.c \
+ $(srcdir)/config/spu/mfc_multi_tag_release.c \
+ $(srcdir)/config/spu/multi3.c \
+ $(srcdir)/config/spu/divmodti4.c \
+ $(srcdir)/config/spu/divv2df3.c
+
+# Build TImode conversion routines to support Fortran 128-bit
+# integer data types.
+LIB2_SIDITI_CONV_FUNCS = yes
+
+HOST_LIBGCC2_CFLAGS += -mwarn-reloc -D__IN_LIBGCC2
+
+# Neither gcc or newlib seem to have a standard way to generate multiple
+# crt*.o files. So we don't use the standard crt0.o name anymore.
+
+cachemgr.o: $(srcdir)/config/spu/cachemgr.c
+ $(gcc_compile) -c $<
+
+# Specialised rule to add a -D flag.
+cachemgr_nonatomic.o: $(srcdir)/config/spu/cachemgr.c
+ $(gcc_compile) -DNONATOMIC -c $<
+
+libgcc_%.a: %.o
+ $(AR_FOR_TARGET) -rcs $@ $<
+
+cache8k.o: $(srcdir)/config/spu/cache.S
+ $(gcc_compile) -D__CACHE_SIZE__=8 -c $<
+
+cache16k.o: $(srcdir)/config/spu/cache.S
+ $(gcc_compile) -D__CACHE_SIZE__=16 -c $<
+
+cache32k.o: $(srcdir)/config/spu/cache.S
+ $(gcc_compile) -D__CACHE_SIZE__=32 -c $<
+
+cache64k.o: $(srcdir)/config/spu/cache.S
+ $(gcc_compile) -D__CACHE_SIZE__=64 -c $<
+
+cache128k.o: $(srcdir)/config/spu/cache.S
+ $(gcc_compile) -D__CACHE_SIZE__=128 -c $<
+
+# We provide our own version of __divdf3 that performs better and has
+# better support for non-default rounding modes.
+DPBIT_FUNCS := $(filter-out _div_df, $(DPBIT_FUNCS))