diff options
Diffstat (limited to 'gcc-4.9/libgcc/config/epiphany/udivsi3-float.S')
-rw-r--r-- | gcc-4.9/libgcc/config/epiphany/udivsi3-float.S | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/epiphany/udivsi3-float.S b/gcc-4.9/libgcc/config/epiphany/udivsi3-float.S new file mode 100644 index 000000000..f1e2d7b17 --- /dev/null +++ b/gcc-4.9/libgcc/config/epiphany/udivsi3-float.S @@ -0,0 +1,83 @@ +/* Unsigned 32 bit division optimized for Epiphany. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by Embecosm on behalf of Adapteva, Inc. + +This file is part of GCC. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "epiphany-asm.h" + + FSTAB (__udivsi3,T_UINT) + .global SYM(__udivsi3) + .balign 4 + HIDDEN_FUNC(__udivsi3) +SYM(__udivsi3): + sub TMP0,r0,r1 + bltu .Lret0 + float TMP2,r0 + mov TMP1,%low(0xb0800000) ; ??? this would be faster with small data + float TMP3,r1 + movt TMP1,%high(0xb0800000) + asr TMP0,r0,8 + sub TMP0,TMP0,TMP1 + movt TMP1,%high(0x00810000) + movgteu TMP2,TMP0 + bblt .Lret1 + sub TMP2,TMP2,TMP1 + sub TMP2,TMP2,TMP3 + mov TMP3,0 + movltu TMP2,TMP3 + lsr TMP2,TMP2,23 + lsl r1,r1,TMP2 + mov TMP0,1 + lsl TMP0,TMP0,TMP2 + sub r0,r0,r1 + bltu .Ladd_back + add TMP3,TMP3,TMP0 + sub r0,r0,r1 + bltu .Ladd_back +.Lsub_loop:; More than two iterations are rare, so it makes sense to leave + ; this label here to reduce average branch penalties. + add TMP3,TMP3,TMP0 + sub r0,r0,r1 + bgteu .Lsub_loop +.Ladd_back: + add r0,r0,r1 + sub TMP1,r1,1 + mov r1,%low(.L0step) + movt r1,%high(.L0step) + lsl TMP2,TMP2,3 + sub r1,r1,TMP2 + jr r1 + .rep 30 + lsl r0,r0,1 + sub.l r1,r0,TMP1 + movgteu r0,r1 + .endr +.L0step:sub r1,TMP0,1 ; mask result bits from steps ... + and r0,r0,r1 + orr r0,r0,TMP3 ; ... and combine with first bits. + rts +.Lret0: mov r0,0 + rts +.Lret1: mov r0,1 + rts + ENDFUNC(__udivsi3) |