aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/libgcc/config/arc/ieee-754/muldf3.S
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/libgcc/config/arc/ieee-754/muldf3.S')
-rw-r--r--gcc-4.9/libgcc/config/arc/ieee-754/muldf3.S235
1 files changed, 235 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/muldf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/muldf3.S
new file mode 100644
index 000000000..7826fe75f
--- /dev/null
+++ b/gcc-4.9/libgcc/config/arc/ieee-754/muldf3.S
@@ -0,0 +1,235 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+ Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+ on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* XMAC schedule: directly back-to-back multiplies stall; the third
+ instruction after a multiply stalls unless it is also a multiply. */
+#include "arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+ .global __muldf3
+ .balign 4
+__muldf3:
+ push_s blink
+ push_s r2
+ push_s r3
+ push_s r0
+ bl.d __muldf3_c
+ push_s r1
+ ld_s r2,[sp,12]
+ ld_s r3,[sp,8]
+ st_s r0,[sp,12]
+ st_s r1,[sp,8]
+ pop_s r1
+ bl.d __muldf3_asm
+ pop_s r0
+ pop_s r3
+ pop_s r2
+ pop_s blink
+ cmp r0,r2
+ cmp.eq r1,r3
+ jeq_s [blink]
+ b abort
+#define __muldf3 __muldf3_asm
+#endif /* DEBUG */
+/* N.B. This is optimized for ARC700.
+ ARC600 has very different scheduling / instruction selection criteria. */
+/* For the standard multiplier, instead of mpyu rx,DBL0L,DBL1L; tst rx,rx ,
+ we can do:
+ sub rx,DBL0L,1; bic rx,DBL0L,rx; lsr rx,rx; norm rx,rx; asl.f 0,DBL1L,rx */
+
+__muldf3_support: /* This label makes debugger output saner. */
+/* If one number is denormal, subtract some from the exponent of the other
+ one (if the other exponent is too small, return 0), and normalize the
+ denormal. Then re-run the computation. */
+ .balign 4
+ FUNC(__muldf3)
+.Ldenorm_dbl0:
+ mov_s r12,DBL0L
+ mov_s DBL0L,DBL1L
+ mov_s DBL1L,r12
+ mov_s r12,DBL0H
+ mov_s DBL0H,DBL1H
+ mov_s DBL1H,r12
+ and r11,DBL0H,r9
+.Ldenorm_dbl1:
+ brhs r11,r9,.Linf_nan
+ brhs 0x3ca00001,r11,.Lret0
+ sub_s DBL0H,DBL0H,DBL1H
+ bmsk_s DBL1H,DBL1H,30
+ add_s DBL0H,DBL0H,DBL1H
+ breq_s DBL1H,0,.Ldenorm_2
+ norm r12,DBL1H
+
+ sub_s r12,r12,10
+ asl r5,r12,20
+ asl_s DBL1H,DBL1H,r12
+ sub DBL0H,DBL0H,r5
+ neg r5,r12
+ lsr r6,DBL1L,r5
+ asl_s DBL1L,DBL1L,r12
+ b.d __muldf3
+ add_s DBL1H,DBL1H,r6
+
+ .balign 4
+.Linf_nan:
+ bclr r12,DBL1H,31
+ xor_s DBL1H,DBL1H,DBL0H
+ bclr_s DBL0H,DBL0H,31
+ max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
+ or.f 0,DBL0H,DBL0L
+ mov_s DBL0L,0
+ or.ne.f DBL1L,DBL1L,r12
+ not_s DBL0H,DBL0L ; inf * 0 -> NaN
+ mov.ne DBL0H,r8
+ tst_s DBL1H,DBL1H
+ j_s.d [blink]
+ bset.mi DBL0H,DBL0H,31
+
+.Lret0: xor_s DBL0H,DBL0H,DBL1H
+ bclr DBL1H,DBL0H,31
+ xor_s DBL0H,DBL0H,DBL1H
+ j_s.d [blink]
+ mov_l DBL0L,0
+
+ .balign 4
+.Ldenorm_2:
+ breq_s DBL1L,0,.Lret0 ; 0 input -> 0 output
+ norm.f r12,DBL1L
+
+ mov.mi r12,21
+ add.pl r12,r12,22
+ neg r11,r12
+ asl_s r12,r12,20
+ lsr.f DBL1H,DBL1L,r11
+ ror DBL1L,DBL1L,r11
+ sub_s DBL0H,DBL0H,r12
+ mov.eq DBL1H,DBL1L
+ sub_s DBL1L,DBL1L,DBL1H
+ /* Fall through. */
+ .global __muldf3
+ .balign 4
+__muldf3:
+ ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)]
+ mpyhu r4,DBL0L,DBL1L
+ bmsk r6,DBL0H,19
+ bset r6,r6,20
+ mpyu r7,r6,DBL1L
+ and r11,DBL0H,r9
+ breq r11,0,.Ldenorm_dbl0
+ mpyhu r8,r6,DBL1L
+ bmsk r10,DBL1H,19
+ bset r10,r10,20
+ mpyhu r5,r10,DBL0L
+ add.f r4,r4,r7
+ and r12,DBL1H,r9
+ mpyhu r7,r6,r10
+ breq r12,0,.Ldenorm_dbl1
+ adc.f r5,r5,r8
+ mpyu r8,r10,DBL0L
+ breq r11,r9,.Linf_nan
+ breq r12,r9,.Linf_nan
+ mpyu r6,r6,r10
+ add.cs r7,r7,1
+ add.f r4,r4,r8
+ mpyu r10,DBL1L,DBL0L
+ bclr r8,r9,30 ; 0x3ff00000
+ adc.f r5,r5,r6
+ ; XMAC write-back stall / std. mult stall is one cycle later
+ bclr r6,r9,20 ; 0x7fe00000
+ add.cs r7,r7,1 ; fraction product in r7:r5:r4
+ tst r10,r10
+ bset.ne r4,r4,0 ; put least significant word into sticky bit
+ lsr.f r10,r7,9
+ add_l r12,r12,r11 ; add exponents
+ rsub.eq r8,r8,r9 ; 0x40000000
+ sub r12,r12,r8 ; subtract bias + implicit 1
+ brhs.d r12,r6,.Linf_denorm
+ rsub r10,r10,12
+.Lshift_frac:
+ neg r8,r10
+ asl r6,r4,r10
+ lsr DBL0L,r4,r8
+ add.f 0,r6,r6
+ btst.eq DBL0L,0
+ cmp.eq r4,r4 ; round to nearest / round to even
+ asl r4,r5,r10
+ lsr r5,r5,r8
+ adc.f DBL0L,DBL0L,r4
+ xor.f 0,DBL0H,DBL1H
+ asl r7,r7,r10
+ add_s r12,r12,r5
+ adc DBL0H,r12,r7
+ j_s.d [blink]
+ bset.mi DBL0H,DBL0H,31
+
+/* We have checked for infinity / NaN input before, and transformed
+ denormalized inputs into normalized inputs. Thus, the worst case
+ exponent overflows are:
+ 1 + 1 - 0x400 == 0xc02 : maximum underflow
+ 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
+ N.B. 0x7e and 0x7f are also values for overflow.
+
+ If (r12 <= -54), we have an underflow to zero. */
+ .balign 4
+.Linf_denorm:
+ brlo r12,0xc0000000,.Linf
+ asr r6,r12,20
+ mov_s r12,0
+ add.f r10,r10,r6
+ brgt r10,0,.Lshift_frac
+ beq_s .Lround_frac
+ add.f r10,r10,32
+.Lshift32_frac:
+ tst r4,r4
+ mov r4,r5
+ bset.ne r4,r4,1
+ mov r5,r7
+ mov r7,0
+ brge r10,1,.Lshift_frac
+ breq r10,0,.Lround_frac
+ add.f r10,r10,32
+ brgt r10,21,.Lshift32_frac
+ b_s .Lret0
+
+.Lround_frac:
+ add.f 0,r4,r4
+ btst.eq r5,0
+ mov_s DBL0L,r5
+ mov_s DBL0H,r7
+ adc.eq.f DBL0L,DBL0L,0
+ j_s.d [blink]
+
+ adc.eq DBL0H,DBL0H,0
+
+.Linf: xor.f DBL1H,DBL1H,DBL0H
+ mov_s DBL0L,0
+ mov_s DBL0H,r9
+ j_s.d [blink]
+ bset.mi DBL0H,DBL0H,31
+ ENDFUNC(__muldf3)
+
+ .balign 4
+.L7ff00000:
+ .long 0x7ff00000