aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/libgcc/config/epiphany/udivsi3-float.S
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/libgcc/config/epiphany/udivsi3-float.S')
-rw-r--r--gcc-4.9/libgcc/config/epiphany/udivsi3-float.S83
1 files changed, 83 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/epiphany/udivsi3-float.S b/gcc-4.9/libgcc/config/epiphany/udivsi3-float.S
new file mode 100644
index 000000000..f1e2d7b17
--- /dev/null
+++ b/gcc-4.9/libgcc/config/epiphany/udivsi3-float.S
@@ -0,0 +1,83 @@
+/* Unsigned 32 bit division optimized for Epiphany.
+ Copyright (C) 2009-2014 Free Software Foundation, Inc.
+ Contributed by Embecosm on behalf of Adapteva, Inc.
+
+This file is part of GCC.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "epiphany-asm.h"
+
+ FSTAB (__udivsi3,T_UINT)
+ .global SYM(__udivsi3)
+ .balign 4
+ HIDDEN_FUNC(__udivsi3)
+SYM(__udivsi3):
+ sub TMP0,r0,r1
+ bltu .Lret0
+ float TMP2,r0
+ mov TMP1,%low(0xb0800000) ; ??? this would be faster with small data
+ float TMP3,r1
+ movt TMP1,%high(0xb0800000)
+ asr TMP0,r0,8
+ sub TMP0,TMP0,TMP1
+ movt TMP1,%high(0x00810000)
+ movgteu TMP2,TMP0
+ bblt .Lret1
+ sub TMP2,TMP2,TMP1
+ sub TMP2,TMP2,TMP3
+ mov TMP3,0
+ movltu TMP2,TMP3
+ lsr TMP2,TMP2,23
+ lsl r1,r1,TMP2
+ mov TMP0,1
+ lsl TMP0,TMP0,TMP2
+ sub r0,r0,r1
+ bltu .Ladd_back
+ add TMP3,TMP3,TMP0
+ sub r0,r0,r1
+ bltu .Ladd_back
+.Lsub_loop:; More than two iterations are rare, so it makes sense to leave
+ ; this label here to reduce average branch penalties.
+ add TMP3,TMP3,TMP0
+ sub r0,r0,r1
+ bgteu .Lsub_loop
+.Ladd_back:
+ add r0,r0,r1
+ sub TMP1,r1,1
+ mov r1,%low(.L0step)
+ movt r1,%high(.L0step)
+ lsl TMP2,TMP2,3
+ sub r1,r1,TMP2
+ jr r1
+ .rep 30
+ lsl r0,r0,1
+ sub.l r1,r0,TMP1
+ movgteu r0,r1
+ .endr
+.L0step:sub r1,TMP0,1 ; mask result bits from steps ...
+ and r0,r0,r1
+ orr r0,r0,TMP3 ; ... and combine with first bits.
+ rts
+.Lret0: mov r0,0
+ rts
+.Lret1: mov r0,1
+ rts
+ ENDFUNC(__udivsi3)