aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S')
-rw-r--r--gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S524
1 files changed, 524 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S
new file mode 100644
index 000000000..80b6455ac
--- /dev/null
+++ b/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S
@@ -0,0 +1,524 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+ Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+ on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "arc-ieee-754.h"
+#if 0 /* DEBUG */
+ .global __adddf3
+ .balign 4
+__adddf3:
+ push_s blink
+ push_s r2
+ push_s r3
+ push_s r0
+ bl.d __adddf3_c
+ push_s r1
+ ld_s r2,[sp,12]
+ ld_s r3,[sp,8]
+ st_s r0,[sp,12]
+ st_s r1,[sp,8]
+ pop_s r1
+ bl.d __adddf3_asm
+ pop_s r0
+ pop_s r3
+ pop_s r2
+ pop_s blink
+ cmp r0,r2
+ cmp.eq r1,r3
+ jeq_s [blink]
+ bl abort
+ .global __subdf3
+ .balign 4
+__subdf3:
+ push_s blink
+ push_s r2
+ push_s r3
+ push_s r0
+ bl.d __subdf3_c
+ push_s r1
+ ld_s r2,[sp,12]
+ ld_s r3,[sp,8]
+ st_s r0,[sp,12]
+ st_s r1,[sp,8]
+ pop_s r1
+ bl.d __subdf3_asm
+ pop_s r0
+ pop_s r3
+ pop_s r2
+ pop_s blink
+ cmp r0,r2
+ cmp.eq r1,r3
+ jeq_s [blink]
+ bl abort
+#define __adddf3 __adddf3_asm
+#define __subdf3 __subdf3_asm
+#endif /* DEBUG */
+/* N.B. This is optimized for ARC700.
+ ARC600 has very different scheduling / instruction selection criteria. */
+
+/* inputs: DBL0, DBL1 (r0-r3)
+ output: DBL0 (r0, r1)
+ clobber: r2-r10, r12, flags
+ All NaN highword bits must be 1. NaN low word is random. */
+
+ .balign 4
+ .global __adddf3
+ .global __subdf3
+ .long 0x7ff00000 ; exponent mask
+ FUNC(__adddf3)
+ FUNC(__subdf3)
+__subdf3:
+ bxor_l DBL1H,DBL1H,31
+__adddf3:
+ ld r9,[pcl,-8]
+ bmsk r4,DBL0H,30
+ xor r10,DBL0H,DBL1H
+ and r6,DBL1H,r9
+ sub.f r12,r4,r6
+ asr_s r12,r12,20
+ blo .Ldbl1_gt
+ brhs r4,r9,.Linf_nan
+ brhs r12,32,.Large_shift
+ brne r12,0,.Lsmall_shift
+ brge r10,0,.Ladd_same_exp ; r12 == 0
+
+/* After subtracting, we need to normalize; when shifting to place the
+ leading 1 into position for the implicit 1 and adding that to DBL0H,
+ we increment the exponent. Thus, we have to subtract one more than
+ the shift count from the exponent beforehand. Iff the exponent drops thus
+ below zero (before adding in the fraction with the leading one), we have
+ generated a denormal number. Denormal handling is basicallly reducing the
+ shift count so that we produce a zero exponent instead; however, this way
+ the shift count can become zero (if we started out with exponent 1).
+ Therefore, a simple min operation is not good enough, since we don't
+ want to handle a zero normalizing shift in the main path.
+ On the plus side, we don't need to check for denorm input, the result
+ of subtracing these looks just the same as denormals generated during
+ subtraction. */
+ bmsk r7,DBL1H,30
+ cmp r4,r7
+ cmp.eq DBL0L,DBL1L
+ blo .L_rsub_same_exp
+ sub.f DBL0L,DBL0L,DBL1L
+ bmsk r12,DBL0H,19
+ bic DBL1H,DBL0H,r12
+ sbc.f r4,r4,r7
+ beq_l .Large_cancel
+ norm DBL1L,r4
+ b.d .Lsub_done_same_exp
+ sub r12,DBL1L,9
+
+ .balign 4
+.Linf_nan:
+ ; If both inputs are inf, but with different signs, the result is NaN.
+ asr r12,r10,31
+ or_s DBL1H,DBL1H,r12
+ j_s.d [blink]
+ or.eq DBL0H,DBL0H,DBL1H
+
+ .balign 4
+.L_rsub_same_exp:
+ rsub.f DBL0L,DBL0L,DBL1L
+ bmsk r12,DBL1H,19
+ bic_s DBL1H,DBL1H,r12
+ sbc.f r4,r7,r4
+ beq_l .Large_cancel
+ norm DBL1L,r4
+
+ sub r12,DBL1L,9
+.Lsub_done_same_exp:
+ asl_s r12,r12,20
+ sub_s DBL1L,DBL1L,10
+ sub DBL0H,DBL1H,r12
+ xor.f 0,DBL0H,DBL1H
+ bmi .Ldenorm
+.Lpast_denorm:
+ neg_s r12,DBL1L
+ lsr r7,DBL0L,r12
+ asl r12,r4,DBL1L
+ asl_s DBL0L,DBL0L,DBL1L
+ add_s r12,r12,r7
+ j_s.d [blink]
+ add_l DBL0H,DBL0H,r12
+ .balign 4
+.Ladd_same_exp:
+ /* This is a special case because we can't test for need to shift
+ down by checking if bit 20 of DBL0H changes. OTOH, here we know
+ that we always need to shift down. */
+ ; The implicit 1 of DBL0 is not shifted together with the
+ ; fraction, thus effectively doubled, compensating for not setting
+ ; implicit1 for DBL1
+ add_s r12,DBL0L,DBL1L
+ lsr.f 0,r12,2 ; round to even
+ breq r6,0,.Ldenorm_add
+ adc.f DBL0L,DBL0L,DBL1L
+ sub r7,DBL1H,DBL0H
+ sub1 r7,r7,r9 ; boost exponent by 2/2
+ rrc DBL0L,DBL0L
+ asr.f r7,r7 ; DBL1.fraction/2 - DBL0.fraction/2 ; exp++
+ add.cs.f DBL0L,DBL0L,0x80000000
+ add_l DBL0H,DBL0H,r7 ; DBL0.implicit1 not shifted for DBL1.implicit1
+ add.cs DBL0H,DBL0H,1
+ bic.f 0,r9,DBL0H ; check for overflow -> infinity.
+ jne_l [blink]
+ and DBL0H,DBL0H,0xfff00000
+ j_s.d [blink]
+ mov_s DBL0L,0
+ .balign 4
+.Large_shift:
+ brhs r12,55,.Lret_dbl0
+ bmsk_s DBL1H,DBL1H,19
+ brne r6,0,.Lno_denorm_large_shift
+ brhi.d r12,33,.Lfixed_denorm_large_shift
+ sub_s r12,r12,1
+ breq r12,31, .Lfixed_denorm_small_shift
+.Lshift32:
+ mov_s r12,DBL1L
+ mov_s DBL1L,DBL1H
+ brlt.d r10,0,.Lsub
+ mov_s DBL1H,0
+ b_s .Ladd
+.Ldenorm_add:
+ cmp_s r12,DBL1L
+ mov_s DBL0L,r12
+ j_s.d [blink]
+ adc DBL0H,r4,DBL1H
+
+.Lret_dbl0:
+ j_s [blink]
+ .balign 4
+.Lsmall_shift:
+ breq.d r6,0,.Ldenorm_small_shift
+ bmsk_s DBL1H,DBL1H,19
+ bset_s DBL1H,DBL1H,20
+.Lfixed_denorm_small_shift:
+ neg r8,r12
+ asl r4,DBL1H,r8
+ lsr_l DBL1H,DBL1H,r12
+ lsr r5,DBL1L,r12
+ asl r12,DBL1L,r8
+ brge.d r10,0,.Ladd
+ or DBL1L,r4,r5
+/* subtract, abs(DBL0) > abs(DBL1) */
+/* DBL0H, DBL0L: original values
+ DBL1H, DBL1L: fraction with explicit leading 1, shifted into place
+ r4: orig. DBL0H & 0x7fffffff
+ r6: orig. DBL1H & 0x7ff00000
+ r9: 0x7ff00000
+ r10: orig. DBL0H ^ DBL1H
+ r12: guard bits */
+ .balign 4
+.Lsub:
+ neg.f r12,r12
+ mov_s r7,DBL1H
+ bmsk r5,DBL0H,19
+ sbc.f DBL0L,DBL0L,DBL1L
+ bic DBL1H,DBL0H,r5
+ bset r5,r5,20
+ sbc.f r4,r5,r7
+ beq_l .Large_cancel_sub
+ norm DBL1L,r4
+ bmsk r6,DBL1H,30
+.Lsub_done:
+ sub_s DBL1L,DBL1L,9
+ breq DBL1L,1,.Lsub_done_noshift
+ asl r5,DBL1L,20
+ sub_s DBL1L,DBL1L,1
+ brlo r6,r5,.Ldenorm_sub
+ sub DBL0H,DBL1H,r5
+.Lpast_denorm_sub:
+ neg_s DBL1H,DBL1L
+ lsr r6,r12,DBL1H
+ asl_s r12,r12,DBL1L
+ and r8,r6,1
+ add1.f 0,r8,r12
+ add.ne.f r12,r12,r12
+ asl r8,DBL0L,DBL1L
+ lsr r12,DBL0L,DBL1H
+ adc.f DBL0L,r8,r6
+ asl r5,r4,DBL1L
+ add_s DBL0H,DBL0H,r12
+ j_s.d [blink]
+ adc DBL0H,DBL0H,r5
+
+ .balign 4
+.Lno_denorm_large_shift:
+ breq.d r12,32,.Lshift32
+ bset_l DBL1H,DBL1H,20
+.Lfixed_denorm_large_shift:
+ neg r8,r12
+ asl r4,DBL1H,r8
+ lsr r5,DBL1L,r12
+ asl.f 0,DBL1L,r8
+ lsr DBL1L,DBL1H,r12
+ or r12,r4,r5
+ tst.eq r12,1
+ or.ne r12,r12,2
+ brlt.d r10,0,.Lsub
+ mov_s DBL1H,0
+ b_l .Ladd
+
+ ; If a denorm is produced without shifting, we have an exact result -
+ ; no need for rounding.
+ .balign 4
+.Ldenorm_sub:
+ lsr DBL1L,r6,20
+ xor DBL0H,r6,DBL1H
+ brne.d DBL1L,1,.Lpast_denorm_sub
+ sub_s DBL1L,DBL1L,1
+.Lsub_done_noshift:
+ add.f 0,r12,r12
+ btst.eq DBL0L,0
+ cmp.eq r12,r12
+ add.cs.f DBL0L,DBL0L,1
+ bclr r4,r4,20
+ j_s.d [blink]
+ adc DBL0H,DBL1H,r4
+
+ .balign 4
+.Ldenorm_small_shift:
+ brne.d r12,1,.Lfixed_denorm_small_shift
+ sub_l r12,r12,1
+ brlt r10,0,.Lsub
+.Ladd: ; bit 20 of DBL1H is clear and bit 0 of r12 does not matter
+ add.f DBL0L,DBL0L,DBL1L
+ add_s DBL1H,DBL1H,DBL0H
+ add.cs DBL1H,DBL1H,1
+ xor_l DBL0H,DBL0H,DBL1H
+ bbit0 DBL0H,20,.Lno_shiftdown
+ lsr.f DBL0H,DBL1H
+ and r4,DBL0L,2
+ bmsk DBL0H,DBL0H,18
+ sbc DBL0H,DBL1H,DBL0H
+ rrc.f DBL0L,DBL0L
+ or.f r12,r12,r4
+ cmp.eq r12,r12
+ add.cs.f DBL0L,DBL0L,1
+ bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
+ jne.d [blink] ; ... non-zero fraction
+ add.cs DBL0H,DBL0H,1
+ mov_s DBL0L,0
+ bmsk DBL1H,DBL0H,19
+ j_s.d [blink]
+ bic_s DBL0H,DBL0H,DBL1H
+.Lno_shiftdown:
+ mov_s DBL0H,DBL1H
+ add.f 0,r12,r12
+ btst.eq DBL0L,0
+ cmp.eq r12,r12
+ add.cs.f DBL0L,DBL0L,1
+ j_s.d [blink]
+ add.cs DBL0H,DBL0H,1
+ .balign 4
+.Ldenorm:
+ bmsk DBL0H,DBL1H,30
+ lsr r12,DBL0H,20
+ xor_s DBL0H,DBL0H,DBL1H
+ sub_l DBL1L,r12,1
+ bgt .Lpast_denorm
+ j_s.d [blink]
+ add_l DBL0H,DBL0H,r4
+
+ .balign 4
+.Large_cancel:
+ ;DBL0L: mantissa DBL1H: sign & exponent
+ norm.f DBL1L,DBL0L
+ bmsk DBL0H,DBL1H,30
+ add_s DBL1L,DBL1L,22
+ mov.mi DBL1L,21
+ add_s r12,DBL1L,1
+ asl_s r12,r12,20
+ beq_s .Lret0
+ brhs.d DBL0H,r12,.Lpast_denorm_large_cancel
+ sub DBL0H,DBL1H,r12
+ bmsk DBL0H,DBL1H,30
+ lsr r12,DBL0H,20
+ xor_s DBL0H,DBL0H,DBL1H
+ sub.f DBL1L,r12,1
+ jle [blink]
+.Lpast_denorm_large_cancel:
+ rsub.f r7,DBL1L,32
+ lsr r7,DBL0L,r7
+ asl_s DBL0L,DBL0L,DBL1L
+ mov.ls r7,DBL0L
+ add_s DBL0H,DBL0H,r7
+ j_s.d [blink]
+ mov.ls DBL0L,0
+.Lret0:
+ j_s.d [blink]
+ mov_l DBL0H,0
+
+/* r4:DBL0L:r12 : unnormalized result fraction
+ DBL1H: result sign and exponent */
+/* When seeing large cancellation, only the topmost guard bit might be set. */
+ .balign 4
+.Large_cancel_sub:
+ norm.f DBL1L,DBL0L
+ bpnz.d 0f
+ bmsk DBL0H,DBL1H,30
+ mov r5,22<<20
+ bne.d 1f
+ mov_s DBL1L,21
+ bset r5,r5,5+20
+ add_s DBL1L,DBL1L,32
+ brne r12,0,1f
+ j_s.d [blink]
+ mov_l DBL0H,0
+ .balign 4
+0: add r5,DBL1L,23
+ asl r5,r5,20
+ add_s DBL1L,DBL1L,22
+1: brlo DBL0H,r5,.Ldenorm_large_cancel_sub
+ sub DBL0H,DBL1H,r5
+.Lpast_denorm_large_cancel_sub:
+ rsub.f r7,DBL1L,32
+ lsr r12,r12,r7
+ lsr r7,DBL0L,r7
+ asl_s DBL0L,DBL0L,DBL1L
+ add.ge DBL0H,DBL0H,r7
+ add_s DBL0L,DBL0L,r12
+ add.lt DBL0H,DBL0H,DBL0L
+ mov.eq DBL0L,r12
+ j_s.d [blink]
+ mov.lt DBL0L,0
+ .balign 4
+.Ldenorm_large_cancel_sub:
+ lsr r5,DBL0H,20
+ xor_s DBL0H,DBL0H,DBL1H
+ brgt.d r5,1,.Lpast_denorm_large_cancel_sub
+ sub DBL1L,r5,1
+ j_l [blink] ; denorm, no shift -> no rounding needed.
+
+/* r4: DBL0H & 0x7fffffff
+ r6: DBL1H & 0x7ff00000
+ r9: 0x7ff00000
+ r10: sign difference
+ r12: shift count (negative) */
+ .balign 4
+.Ldbl1_gt:
+ brhs r6,r9,.Lret_dbl1 ; inf or NaN
+ neg r8,r12
+ brhs r8,32,.Large_shift_dbl0
+.Lsmall_shift_dbl0:
+ breq.d r6,0,.Ldenorm_small_shift_dbl0
+ bmsk_s DBL0H,DBL0H,19
+ bset_s DBL0H,DBL0H,20
+.Lfixed_denorm_small_shift_dbl0:
+ asl r4,DBL0H,r12
+ lsr DBL0H,DBL0H,r8
+ lsr r5,DBL0L,r8
+ asl r12,DBL0L,r12
+ brge.d r10,0,.Ladd_dbl1_gt
+ or DBL0L,r4,r5
+/* subtract, abs(DBL0) < abs(DBL1) */
+/* DBL0H, DBL0L: fraction with explicit leading 1, shifted into place
+ DBL1H, DBL1L: original values
+ r6: orig. DBL1H & 0x7ff00000
+ r9: 0x7ff00000
+ r12: guard bits */
+ .balign 4
+.Lrsub:
+ neg.f r12,r12
+ bmsk r7,DBL1H,19
+ mov_s r5,DBL0H
+ sbc.f DBL0L,DBL1L,DBL0L
+ bic DBL1H,DBL1H,r7
+ bset r7,r7,20
+ sbc.f r4,r7,r5
+ beq_l .Large_cancel_sub
+ norm DBL1L,r4
+ b_l .Lsub_done ; note: r6 is already set up.
+
+.Lret_dbl1:
+ mov_s DBL0H,DBL1H
+ j_s.d [blink]
+ mov_l DBL0L,DBL1L
+ .balign 4
+.Ldenorm_small_shift_dbl0:
+ sub.f r8,r8,1
+ bne.d .Lfixed_denorm_small_shift_dbl0
+ add_s r12,r12,1
+ brlt r10,0,.Lrsub
+.Ladd_dbl1_gt: ; bit 20 of DBL0H is clear and bit 0 of r12 does not matter
+ add.f DBL0L,DBL0L,DBL1L
+ add_s DBL0H,DBL0H,DBL1H
+ add.cs DBL0H,DBL0H,1
+ xor DBL1H,DBL0H,DBL1H
+ bbit0 DBL1H,20,.Lno_shiftdown_dbl1_gt
+ lsr.f DBL1H,DBL0H
+ and r4,DBL0L,2
+ bmsk DBL1H,DBL1H,18
+ sbc DBL0H,DBL0H,DBL1H
+ rrc.f DBL0L,DBL0L
+ or.f r12,r12,r4
+ cmp.eq r12,r12
+ add.cs.f DBL0L,DBL0L,1
+ bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
+ jne.d [blink] ; ... non-zero fraction
+ add.cs DBL0H,DBL0H,1
+ mov_s DBL0L,0
+ bmsk DBL1H,DBL0H,19
+ j_s.d [blink]
+ bic_s DBL0H,DBL0H,DBL1H
+.Lno_shiftdown_dbl1_gt:
+ add.f 0,r12,r12
+ btst.eq DBL0L,0
+ cmp.eq r12,r12
+ add.cs.f DBL0L,DBL0L,1
+ j_s.d [blink]
+ add.cs DBL0H,DBL0H,1
+
+ .balign 4
+.Large_shift_dbl0:
+ brhs r8,55,.Lret_dbl1
+ bmsk_s DBL0H,DBL0H,19
+ brne r6,0,.Lno_denorm_large_shift_dbl0
+ add_s r12,r12,1
+ brne.d r8,33,.Lfixed_denorm_large_shift_dbl0
+ sub r8,r8,1
+ bset_s DBL0H,DBL0H,20
+.Lshift32_dbl0:
+ mov_s r12,DBL0L
+ mov_s DBL0L,DBL0H
+ brlt.d r10,0,.Lrsub
+ mov_s DBL0H,0
+ b_s .Ladd_dbl1_gt
+
+ .balign 4
+.Lno_denorm_large_shift_dbl0:
+ breq.d r8,32,.Lshift32_dbl0
+ bset_l DBL0H,DBL0H,20
+.Lfixed_denorm_large_shift_dbl0:
+ asl r4,DBL0H,r12
+ lsr r5,DBL0L,r8
+ asl.f 0,DBL0L,r12
+ lsr DBL0L,DBL0H,r8
+ or r12,r4,r5
+ tst.eq r12,1
+ or.ne r12,r12,2
+ brlt.d r10,0,.Lrsub
+ mov_s DBL0H,0
+ b_l .Ladd_dbl1_gt
+ ENDFUNC(__adddf3)
+ ENDFUNC(__subdf3)