aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S')
-rw-r--r--gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S274
1 files changed, 274 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S
new file mode 100644
index 000000000..ef54ffd7c
--- /dev/null
+++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S
@@ -0,0 +1,274 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+ Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+ on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/*
+ - calculate 15..18 bit inverse using a table of approximating polynoms.
+ precision is higher for polynoms used to evaluate input with larger
+ value.
+ - do one newton-raphson iteration step to double the precision,
+ then multiply this with the divisor
+ -> more time to decide if dividend is subnormal
+ - the worst error propagation is on the side of the value range
+ with the least initial defect, thus giving us about 30 bits precision.
+ */
+#include "../arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+ .global __divsf3
+ FUNC(__divsf3)
+ .balign 4
+__divsf3:
+ push_s blink
+ push_s r1
+ bl.d __divsf3_c
+ push_s r0
+ ld_s r1,[sp,4]
+ st_s r0,[sp,4]
+ bl.d __divsf3_asm
+ pop_s r0
+ pop_s r1
+ pop_s blink
+ cmp r0,r1
+#if 1
+ bne abort
+ jeq_s [blink]
+ b abort
+#else
+ bne abort
+ j_s [blink]
+#endif
+ ENDFUNC(__divsf3)
+#define __divsf3 __divsf3_asm
+#endif /* DEBUG */
+
+ FUNC(__divsf3)
+ .balign 4
+.Ldivtab:
+ .long 0xfc0ffff0
+ .long 0xf46ffefd
+ .long 0xed1ffd2a
+ .long 0xe627fa8e
+ .long 0xdf7ff73b
+ .long 0xd917f33b
+ .long 0xd2f7eea3
+ .long 0xcd1fe986
+ .long 0xc77fe3e7
+ .long 0xc21fdddb
+ .long 0xbcefd760
+ .long 0xb7f7d08c
+ .long 0xb32fc960
+ .long 0xae97c1ea
+ .long 0xaa27ba26
+ .long 0xa5e7b22e
+ .long 0xa1cfa9fe
+ .long 0x9ddfa1a0
+ .long 0x9a0f990c
+ .long 0x9667905d
+ .long 0x92df878a
+ .long 0x8f6f7e84
+ .long 0x8c27757e
+ .long 0x88f76c54
+ .long 0x85df630c
+ .long 0x82e759c5
+ .long 0x8007506d
+ .long 0x7d3f470a
+ .long 0x7a8f3da2
+ .long 0x77ef341e
+ .long 0x756f2abe
+ .long 0x72f7212d
+ .long 0x709717ad
+ .long 0x6e4f0e44
+ .long 0x6c1704d6
+ .long 0x69e6fb44
+ .long 0x67cef1d7
+ .long 0x65c6e872
+ .long 0x63cedf18
+ .long 0x61e6d5cd
+ .long 0x6006cc6d
+ .long 0x5e36c323
+ .long 0x5c76b9f3
+ .long 0x5abeb0b7
+ .long 0x5916a79b
+ .long 0x57769e77
+ .long 0x55de954d
+ .long 0x54568c4e
+ .long 0x52d6834d
+ .long 0x51667a7f
+ .long 0x4ffe71b5
+ .long 0x4e9e68f1
+ .long 0x4d466035
+ .long 0x4bf65784
+ .long 0x4aae4ede
+ .long 0x496e4646
+ .long 0x48363dbd
+ .long 0x47063547
+ .long 0x45de2ce5
+ .long 0x44be2498
+ .long 0x43a61c64
+ .long 0x4296144a
+ .long 0x41860c0e
+ .long 0x407e03ee
+.L7f800000:
+ .long 0x7f800000
+ .balign 4
+ .global __divsf3_support
+__divsf3_support:
+.Linf_NaN:
+ bclr.f 0,r0,31 ; 0/0 -> NaN
+ xor_s r0,r0,r1
+ bmsk r1,r0,30
+ bic_s r0,r0,r1
+ sub.eq r0,r0,1
+ j_s.d [blink]
+ or r0,r0,r9
+.Lret0:
+ xor_s r0,r0,r1
+ bmsk r1,r0,30
+ j_s.d [blink]
+ bic_s r0,r0,r1
+/* N.B. the spacing between divtab and the sub3 to get its address must
+ be a multiple of 8. */
+__divsf3:
+ lsr r2,r1,17
+ sub3 r3,pcl,37 ; (.-.Ldivtab) >> 3
+ bmsk_s r2,r2,5
+ ld.as r5,[r3,r2]
+ asl r4,r1,9
+ ld.as r9,[pcl,-13]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
+ mulu64 r5,r4
+ and.f r11,r1,r9
+ asl r6,r1,8
+ bset r6,r6,31
+ beq.d .Ldenorm_fp1
+ asl r5,r5,13
+ breq.d r11,r9,.Linf_nan_fp1
+ and.f r2,r0,r9
+ sub r7,r5,mhi
+ mulu64 r7,r6
+ beq.d .Ldenorm_fp0
+ asl r12,r0,8
+ breq.d r2,r9,.Linf_nan_fp0
+ mulu64 mhi,r7
+.Lpast_denorm_fp1:
+ bset r3,r12,31
+.Lpast_denorm_fp0:
+ cmp_s r3,r6
+ lsr.cc r3,r3,1
+ add_s r2,r2, /* wait for immediate */ \
+ 0x3f000000
+ sub r7,r7,mhi ; u1.31 inverse, about 30 bit
+ mulu64 r3,r7
+ sbc r2,r2,r11
+ xor.f 0,r0,r1
+ and r0,r2,r9
+ bclr r3,r9,23 ; 0x7f000000
+ brhs.d r2,r3,.Linf_denorm
+ bxor.mi r0,r0,31
+.Lpast_denorm:
+ add r3,mhi,0x22 ; round to nearest or higher
+ tst r3,0x3c ; check if rounding was unsafe
+ lsr r3,r3,6
+ jne.d [blink] ; return if rounding was safe.
+ add_s r0,r0,r3
+ /* work out exact rounding if we fall through here. */
+ /* We know that the exact result cannot be represented in single
+ precision. Find the mid-point between the two nearest
+ representable values, multiply with the divisor, and check if
+ the result is larger than the dividend. */
+ add_s r3,r3,r3
+ sub_s r3,r3,1
+ mulu64 r3,r6
+ asr.f 0,r0,1 ; for round-to-even in case this is a denorm
+ rsub r2,r9,25
+ asl_s r12,r12,r2
+ sub.f 0,r12,mlo
+ j_s.d [blink]
+ sub.mi r0,r0,1
+.Linf_nan_fp1:
+ lsr_s r0,r0,31
+ bmsk.f 0,r1,22
+ asl_s r0,r0,31
+ bne_s 0f ; inf/inf -> nan
+ brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
+0: j_s.d [blink]
+ mov r0,-1
+.Lsigned0:
+.Linf_nan_fp0:
+ tst_s r1,r1
+ j_s.d [blink]
+ bxor.mi r0,r0,31
+ .balign 4
+ .global __divsf3
+/* For denormal results, it is possible that an exact result needs
+ rounding, and thus the round-to-even rule has to come into play. */
+.Linf_denorm:
+ brlo r2,0xc0000000,.Linf
+.Ldenorm:
+ asr_s r2,r2,23
+ bic r0,r0,r9
+ neg r9,r2
+ brlo.d r9,25,.Lpast_denorm
+ lsr r3,mlo,r9
+ /* Fall through: return +- 0 */
+ j_s [blink]
+.Linf:
+ j_s.d [blink]
+ or r0,r0,r9
+ .balign 4
+.Ldenorm_fp1:
+ bclr r6,r6,31
+ norm.f r12,r6 ; flag for x/0 -> Inf check
+ add r6,r6,r6
+ rsub r5,r12,16
+ ror r5,r1,r5
+ asl r6,r6,r12
+ bmsk r5,r5,5
+ ld.as r5,[r3,r5]
+ add r4,r6,r6
+ ; load latency
+ mulu64 r5,r4
+ bic.ne.f 0, \
+ 0x60000000,r0 ; large number / denorm -> Inf
+ asl r5,r5,13
+ sub r7,r5,mhi
+ beq.d .Linf_NaN
+ mulu64 r7,r6
+ asl_s r12,r12,23
+ and.f r2,r0,r9
+ add_s r2,r2,r12
+ asl r12,r0,8
+ bne.d .Lpast_denorm_fp1
+.Ldenorm_fp0: mulu64 mhi,r7
+ bclr r12,r12,31
+ norm.f r3,r12 ; flag for 0/x -> 0 check
+ bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
+ beq_s .Lret0
+ asl_s r12,r12,r3
+ asl_s r3,r3,23
+ add_s r12,r12,r12
+ add r11,r11,r3
+ b.d .Lpast_denorm_fp0
+ mov_s r3,r12
+ ENDFUNC(__divsf3)