1 files changed, 524 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S
new file mode 100644
index 000000000..80b6455ac
--- /dev/null
+++ b/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S
@@ -0,0 +1,524 @@
+/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
+   Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+		on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "arc-ieee-754.h"
+#if 0 /* DEBUG */
+	.global __adddf3
+	.balign 4
+__adddf3:
+	push_s blink
+	push_s r2
+	push_s r3
+	push_s r0
+	bl.d __adddf3_c
+	push_s r1
+	ld_s r2,[sp,12]
+	ld_s r3,[sp,8]
+	st_s r0,[sp,12]
+	st_s r1,[sp,8]
+	pop_s r1
+	bl.d __adddf3_asm
+	pop_s r0
+	pop_s r3
+	pop_s r2
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	bl abort
+	.global __subdf3
+	.balign 4
+__subdf3:
+	push_s blink
+	push_s r2
+	push_s r3
+	push_s r0
+	bl.d __subdf3_c
+	push_s r1
+	ld_s r2,[sp,12]
+	ld_s r3,[sp,8]
+	st_s r0,[sp,12]
+	st_s r1,[sp,8]
+	pop_s r1
+	bl.d __subdf3_asm
+	pop_s r0
+	pop_s r3
+	pop_s r2
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	bl abort
+#define __adddf3 __adddf3_asm
+#define __subdf3 __subdf3_asm
+#endif /* DEBUG */
+/* N.B. This is optimized for ARC700.
+  ARC600 has very different scheduling / instruction selection criteria.  */
+
+/* inputs: DBL0, DBL1 (r0-r3)
+   output: DBL0 (r0, r1)
+   clobber: r2-r10, r12, flags
+   All NaN highword bits must be 1.  NaN low word is random.  */
+
+	.balign 4
+	.global __adddf3
+	.global __subdf3
+	.long 0x7ff00000 ; exponent mask
+	FUNC(__adddf3)
+	FUNC(__subdf3)
+__subdf3:
+	bxor_l DBL1H,DBL1H,31
+__adddf3:
+	ld r9,[pcl,-8]
+	bmsk r4,DBL0H,30
+	xor r10,DBL0H,DBL1H
+	and r6,DBL1H,r9
+	sub.f r12,r4,r6
+	asr_s r12,r12,20
+	blo .Ldbl1_gt
+	brhs r4,r9,.Linf_nan
+	brhs r12,32,.Large_shift
+	brne r12,0,.Lsmall_shift
+	brge r10,0,.Ladd_same_exp ; r12 == 0
+
+/* After subtracting, we need to normalize; when shifting to place the
+  leading 1 into position for the implicit 1 and adding that to DBL0H,
+  we increment the exponent.  Thus, we have to subtract one more than
+  the shift count from the exponent beforehand.  Iff the exponent drops thus
+  below zero (before adding in the fraction with the leading one), we have
+  generated a denormal number.  Denormal handling is basicallly reducing the
+  shift count so that we produce a zero exponent instead; however, this way
+  the shift count can become zero (if we started out with exponent 1).
+  Therefore, a simple min operation is not good enough, since we don't
+  want to handle a zero normalizing shift in the main path.
+  On the plus side, we don't need to check for denorm input, the result
+  of subtracing these looks just the same as denormals generated during
+  subtraction.  */
+	bmsk r7,DBL1H,30
+	cmp r4,r7
+	cmp.eq DBL0L,DBL1L
+	blo .L_rsub_same_exp
+	sub.f DBL0L,DBL0L,DBL1L
+	bmsk r12,DBL0H,19
+	bic DBL1H,DBL0H,r12
+	sbc.f r4,r4,r7
+	beq_l .Large_cancel
+	norm DBL1L,r4
+	b.d .Lsub_done_same_exp
+	sub r12,DBL1L,9
+
+	.balign 4
+.Linf_nan:
+	; If both inputs are inf, but with different signs, the result is NaN.
+	asr r12,r10,31
+	or_s DBL1H,DBL1H,r12
+	j_s.d [blink]
+	or.eq DBL0H,DBL0H,DBL1H
+
+	.balign 4
+.L_rsub_same_exp:
+	rsub.f DBL0L,DBL0L,DBL1L
+	bmsk r12,DBL1H,19
+	bic_s DBL1H,DBL1H,r12
+	sbc.f r4,r7,r4
+	beq_l .Large_cancel
+	norm DBL1L,r4
+
+	sub r12,DBL1L,9
+.Lsub_done_same_exp:
+	asl_s r12,r12,20
+	sub_s DBL1L,DBL1L,10
+	sub DBL0H,DBL1H,r12
+	xor.f 0,DBL0H,DBL1H
+	bmi .Ldenorm
+.Lpast_denorm:
+	neg_s r12,DBL1L
+	lsr r7,DBL0L,r12
+	asl r12,r4,DBL1L
+	asl_s DBL0L,DBL0L,DBL1L
+	add_s r12,r12,r7
+	j_s.d [blink]
+	add_l DBL0H,DBL0H,r12
+	.balign 4
+.Ladd_same_exp:
+	/* This is a special case because we can't test for need to shift
+	   down by checking if bit 20 of DBL0H changes.  OTOH, here we know
+	   that we always need to shift down.  */
+	; The implicit 1 of DBL0 is not shifted together with the
+	;  fraction, thus effectively doubled, compensating for not setting
+	;  implicit1 for DBL1
+	add_s r12,DBL0L,DBL1L
+	lsr.f 0,r12,2 ; round to even
+	breq r6,0,.Ldenorm_add
+	adc.f DBL0L,DBL0L,DBL1L
+	sub r7,DBL1H,DBL0H
+	sub1 r7,r7,r9 ; boost exponent by 2/2
+	rrc DBL0L,DBL0L
+	asr.f r7,r7 ; DBL1.fraction/2 - DBL0.fraction/2 ; exp++
+	add.cs.f DBL0L,DBL0L,0x80000000
+	add_l DBL0H,DBL0H,r7 ; DBL0.implicit1 not shifted for DBL1.implicit1
+	add.cs DBL0H,DBL0H,1
+	bic.f 0,r9,DBL0H ; check for overflow -> infinity.
+	jne_l [blink]
+	and DBL0H,DBL0H,0xfff00000
+	j_s.d [blink]
+	mov_s DBL0L,0
+	.balign 4
+.Large_shift:
+	brhs r12,55,.Lret_dbl0
+	bmsk_s DBL1H,DBL1H,19
+	brne r6,0,.Lno_denorm_large_shift
+	brhi.d r12,33,.Lfixed_denorm_large_shift
+	sub_s r12,r12,1
+	breq r12,31, .Lfixed_denorm_small_shift
+.Lshift32:
+	mov_s r12,DBL1L
+	mov_s DBL1L,DBL1H
+	brlt.d r10,0,.Lsub
+	mov_s DBL1H,0
+	b_s .Ladd
+.Ldenorm_add:
+	cmp_s r12,DBL1L
+	mov_s DBL0L,r12
+	j_s.d [blink]
+	adc DBL0H,r4,DBL1H
+
+.Lret_dbl0:
+	j_s [blink]
+	.balign 4
+.Lsmall_shift:
+	breq.d r6,0,.Ldenorm_small_shift
+	bmsk_s DBL1H,DBL1H,19
+	bset_s DBL1H,DBL1H,20
+.Lfixed_denorm_small_shift:
+	neg r8,r12
+	asl r4,DBL1H,r8
+	lsr_l DBL1H,DBL1H,r12
+	lsr r5,DBL1L,r12
+	asl r12,DBL1L,r8
+	brge.d r10,0,.Ladd
+	or DBL1L,r4,r5
+/* subtract, abs(DBL0) > abs(DBL1) */
+/* DBL0H, DBL0L: original values
+   DBL1H, DBL1L: fraction with explicit leading 1, shifted into place
+   r4:  orig. DBL0H & 0x7fffffff
+   r6:  orig. DBL1H & 0x7ff00000
+   r9:  0x7ff00000
+   r10: orig. DBL0H ^ DBL1H
+   r12: guard bits */
+	.balign 4
+.Lsub:
+	neg.f r12,r12
+	mov_s r7,DBL1H
+	bmsk r5,DBL0H,19
+	sbc.f DBL0L,DBL0L,DBL1L
+	bic DBL1H,DBL0H,r5
+	bset r5,r5,20
+	sbc.f r4,r5,r7
+	beq_l .Large_cancel_sub
+	norm DBL1L,r4
+	bmsk r6,DBL1H,30
+.Lsub_done:
+	sub_s DBL1L,DBL1L,9
+	breq DBL1L,1,.Lsub_done_noshift
+	asl r5,DBL1L,20
+	sub_s DBL1L,DBL1L,1
+	brlo r6,r5,.Ldenorm_sub
+	sub DBL0H,DBL1H,r5
+.Lpast_denorm_sub:
+	neg_s DBL1H,DBL1L
+	lsr r6,r12,DBL1H
+	asl_s r12,r12,DBL1L
+	and r8,r6,1
+	add1.f 0,r8,r12
+	add.ne.f r12,r12,r12
+	asl r8,DBL0L,DBL1L
+	lsr r12,DBL0L,DBL1H
+	adc.f DBL0L,r8,r6
+	asl r5,r4,DBL1L
+	add_s DBL0H,DBL0H,r12
+	j_s.d [blink]
+	adc DBL0H,DBL0H,r5
+
+	.balign 4
+.Lno_denorm_large_shift:
+	breq.d r12,32,.Lshift32
+	bset_l DBL1H,DBL1H,20
+.Lfixed_denorm_large_shift:
+	neg r8,r12
+	asl r4,DBL1H,r8
+	lsr r5,DBL1L,r12
+	asl.f 0,DBL1L,r8
+	lsr DBL1L,DBL1H,r12
+	or r12,r4,r5
+	tst.eq r12,1
+	or.ne r12,r12,2
+	brlt.d r10,0,.Lsub
+	mov_s DBL1H,0
+	b_l .Ladd
+
+	; If a denorm is produced without shifting, we have an exact result -
+	; no need for rounding.
+	.balign 4
+.Ldenorm_sub:
+	lsr DBL1L,r6,20
+	xor DBL0H,r6,DBL1H
+	brne.d DBL1L,1,.Lpast_denorm_sub
+	sub_s DBL1L,DBL1L,1
+.Lsub_done_noshift:
+	add.f 0,r12,r12
+	btst.eq DBL0L,0
+	cmp.eq r12,r12
+	add.cs.f DBL0L,DBL0L,1
+	bclr r4,r4,20
+	j_s.d [blink]
+	adc DBL0H,DBL1H,r4
+
+	.balign 4
+.Ldenorm_small_shift:
+	brne.d r12,1,.Lfixed_denorm_small_shift
+	sub_l r12,r12,1
+	brlt r10,0,.Lsub
+.Ladd: ; bit 20 of DBL1H is clear and bit 0 of r12 does not matter
+	add.f DBL0L,DBL0L,DBL1L
+	add_s DBL1H,DBL1H,DBL0H
+	add.cs DBL1H,DBL1H,1
+	xor_l DBL0H,DBL0H,DBL1H
+	bbit0 DBL0H,20,.Lno_shiftdown
+	lsr.f DBL0H,DBL1H
+	and r4,DBL0L,2
+	bmsk DBL0H,DBL0H,18
+	sbc DBL0H,DBL1H,DBL0H
+	rrc.f DBL0L,DBL0L
+	or.f r12,r12,r4
+	cmp.eq r12,r12
+	add.cs.f DBL0L,DBL0L,1
+	bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
+	jne.d [blink]    ; ... non-zero fraction
+	add.cs DBL0H,DBL0H,1
+	mov_s DBL0L,0
+	bmsk DBL1H,DBL0H,19
+	j_s.d [blink]
+	bic_s DBL0H,DBL0H,DBL1H
+.Lno_shiftdown:
+	mov_s DBL0H,DBL1H
+	add.f 0,r12,r12
+	btst.eq DBL0L,0
+	cmp.eq r12,r12
+	add.cs.f DBL0L,DBL0L,1
+	j_s.d [blink]
+	add.cs DBL0H,DBL0H,1
+	.balign 4
+.Ldenorm:
+	bmsk DBL0H,DBL1H,30
+	lsr r12,DBL0H,20
+	xor_s DBL0H,DBL0H,DBL1H
+	sub_l DBL1L,r12,1
+	bgt .Lpast_denorm
+	j_s.d [blink]
+	add_l DBL0H,DBL0H,r4
+
+	.balign 4
+.Large_cancel:
+	;DBL0L: mantissa DBL1H: sign & exponent
+	norm.f DBL1L,DBL0L
+	bmsk DBL0H,DBL1H,30
+	add_s DBL1L,DBL1L,22
+	mov.mi DBL1L,21
+	add_s r12,DBL1L,1
+	asl_s r12,r12,20
+	beq_s .Lret0
+	brhs.d DBL0H,r12,.Lpast_denorm_large_cancel
+	sub DBL0H,DBL1H,r12
+	bmsk DBL0H,DBL1H,30
+	lsr r12,DBL0H,20
+	xor_s DBL0H,DBL0H,DBL1H
+	sub.f DBL1L,r12,1
+	jle [blink]
+.Lpast_denorm_large_cancel:
+	rsub.f r7,DBL1L,32
+	lsr r7,DBL0L,r7
+	asl_s DBL0L,DBL0L,DBL1L
+	mov.ls r7,DBL0L
+	add_s DBL0H,DBL0H,r7
+	j_s.d [blink]
+	mov.ls DBL0L,0
+.Lret0:
+	j_s.d	[blink]
+	mov_l	DBL0H,0
+
+/* r4:DBL0L:r12 : unnormalized result fraction
+   DBL1H: result sign and exponent         */
+/* When seeing large cancellation, only the topmost guard bit might be set.  */
+	.balign 4
+.Large_cancel_sub:
+	norm.f DBL1L,DBL0L
+	bpnz.d 0f
+	bmsk DBL0H,DBL1H,30
+	mov r5,22<<20
+	bne.d 1f
+	mov_s DBL1L,21
+	bset r5,r5,5+20
+	add_s DBL1L,DBL1L,32
+	brne r12,0,1f
+	j_s.d	[blink]
+	mov_l	DBL0H,0
+	.balign 4
+0:	add r5,DBL1L,23
+	asl r5,r5,20
+	add_s DBL1L,DBL1L,22
+1:	brlo DBL0H,r5,.Ldenorm_large_cancel_sub
+	sub DBL0H,DBL1H,r5
+.Lpast_denorm_large_cancel_sub:
+	rsub.f r7,DBL1L,32
+	lsr r12,r12,r7
+	lsr r7,DBL0L,r7
+	asl_s DBL0L,DBL0L,DBL1L
+	add.ge DBL0H,DBL0H,r7
+	add_s DBL0L,DBL0L,r12
+	add.lt DBL0H,DBL0H,DBL0L
+	mov.eq DBL0L,r12
+	j_s.d [blink]
+	mov.lt DBL0L,0
+	.balign 4
+.Ldenorm_large_cancel_sub:
+	lsr r5,DBL0H,20
+	xor_s DBL0H,DBL0H,DBL1H
+	brgt.d r5,1,.Lpast_denorm_large_cancel_sub
+	sub DBL1L,r5,1
+	j_l [blink] ; denorm, no shift -> no rounding needed.
+
+/* r4: DBL0H & 0x7fffffff
+   r6: DBL1H & 0x7ff00000
+   r9: 0x7ff00000
+   r10: sign difference
+   r12: shift count (negative) */
+	.balign 4
+.Ldbl1_gt:
+	brhs r6,r9,.Lret_dbl1 ; inf or NaN
+	neg r8,r12
+	brhs r8,32,.Large_shift_dbl0
+.Lsmall_shift_dbl0:
+	breq.d r6,0,.Ldenorm_small_shift_dbl0
+	bmsk_s DBL0H,DBL0H,19
+	bset_s DBL0H,DBL0H,20
+.Lfixed_denorm_small_shift_dbl0:
+	asl r4,DBL0H,r12
+	lsr DBL0H,DBL0H,r8
+	lsr r5,DBL0L,r8
+	asl r12,DBL0L,r12
+	brge.d r10,0,.Ladd_dbl1_gt
+	or DBL0L,r4,r5
+/* subtract, abs(DBL0) < abs(DBL1) */
+/* DBL0H, DBL0L: fraction with explicit leading 1, shifted into place
+   DBL1H, DBL1L: original values
+   r6:  orig. DBL1H & 0x7ff00000
+   r9:  0x7ff00000
+   r12: guard bits */
+	.balign 4
+.Lrsub:
+	neg.f r12,r12
+	bmsk r7,DBL1H,19
+	mov_s r5,DBL0H
+	sbc.f DBL0L,DBL1L,DBL0L
+	bic DBL1H,DBL1H,r7
+	bset r7,r7,20
+	sbc.f r4,r7,r5
+	beq_l .Large_cancel_sub
+	norm DBL1L,r4
+	b_l .Lsub_done ; note: r6 is already set up.
+
+.Lret_dbl1:
+	mov_s DBL0H,DBL1H
+	j_s.d [blink]
+	mov_l DBL0L,DBL1L
+	.balign 4
+.Ldenorm_small_shift_dbl0:
+	sub.f r8,r8,1
+	bne.d .Lfixed_denorm_small_shift_dbl0
+	add_s r12,r12,1
+	brlt r10,0,.Lrsub
+.Ladd_dbl1_gt: ; bit 20 of DBL0H is clear and bit 0 of r12 does not matter
+	add.f DBL0L,DBL0L,DBL1L
+	add_s DBL0H,DBL0H,DBL1H
+	add.cs DBL0H,DBL0H,1
+	xor DBL1H,DBL0H,DBL1H
+	bbit0 DBL1H,20,.Lno_shiftdown_dbl1_gt
+	lsr.f DBL1H,DBL0H
+	and r4,DBL0L,2
+	bmsk DBL1H,DBL1H,18
+	sbc DBL0H,DBL0H,DBL1H
+	rrc.f DBL0L,DBL0L
+	or.f r12,r12,r4
+	cmp.eq r12,r12
+	add.cs.f DBL0L,DBL0L,1
+	bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
+	jne.d [blink]    ; ... non-zero fraction
+	add.cs DBL0H,DBL0H,1
+	mov_s DBL0L,0
+	bmsk DBL1H,DBL0H,19
+	j_s.d [blink]
+	bic_s DBL0H,DBL0H,DBL1H
+.Lno_shiftdown_dbl1_gt:
+	add.f 0,r12,r12
+	btst.eq DBL0L,0
+	cmp.eq r12,r12
+	add.cs.f DBL0L,DBL0L,1
+	j_s.d [blink]
+	add.cs DBL0H,DBL0H,1
+
+	.balign 4
+.Large_shift_dbl0:
+	brhs r8,55,.Lret_dbl1
+	bmsk_s DBL0H,DBL0H,19
+	brne r6,0,.Lno_denorm_large_shift_dbl0
+	add_s r12,r12,1
+	brne.d r8,33,.Lfixed_denorm_large_shift_dbl0
+	sub r8,r8,1
+	bset_s DBL0H,DBL0H,20
+.Lshift32_dbl0:
+	mov_s r12,DBL0L
+	mov_s DBL0L,DBL0H
+	brlt.d r10,0,.Lrsub
+	mov_s DBL0H,0
+	b_s .Ladd_dbl1_gt
+
+	.balign 4
+.Lno_denorm_large_shift_dbl0:
+	breq.d r8,32,.Lshift32_dbl0
+	bset_l DBL0H,DBL0H,20
+.Lfixed_denorm_large_shift_dbl0:
+	asl r4,DBL0H,r12
+	lsr r5,DBL0L,r8
+	asl.f 0,DBL0L,r12
+	lsr DBL0L,DBL0H,r8
+	or r12,r4,r5
+	tst.eq r12,1
+	or.ne r12,r12,2
+	brlt.d r10,0,.Lrsub
+	mov_s DBL0H,0
+	b_l .Ladd_dbl1_gt
+	ENDFUNC(__adddf3)
+	ENDFUNC(__subdf3)