diff options
Diffstat (limited to 'gcc-4.9/libgcc/config/arc/ieee-754')
38 files changed, 6629 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S new file mode 100644 index 000000000..80b6455ac --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S @@ -0,0 +1,524 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +#if 0 /* DEBUG */ + .global __adddf3 + .balign 4 +__adddf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __adddf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __adddf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + bl abort + .global __subdf3 + .balign 4 +__subdf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __subdf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __subdf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + bl abort +#define __adddf3 __adddf3_asm +#define __subdf3 __subdf3_asm +#endif /* DEBUG */ +/* N.B. This is optimized for ARC700. + ARC600 has very different scheduling / instruction selection criteria. */ + +/* inputs: DBL0, DBL1 (r0-r3) + output: DBL0 (r0, r1) + clobber: r2-r10, r12, flags + All NaN highword bits must be 1. NaN low word is random. */ + + .balign 4 + .global __adddf3 + .global __subdf3 + .long 0x7ff00000 ; exponent mask + FUNC(__adddf3) + FUNC(__subdf3) +__subdf3: + bxor_l DBL1H,DBL1H,31 +__adddf3: + ld r9,[pcl,-8] + bmsk r4,DBL0H,30 + xor r10,DBL0H,DBL1H + and r6,DBL1H,r9 + sub.f r12,r4,r6 + asr_s r12,r12,20 + blo .Ldbl1_gt + brhs r4,r9,.Linf_nan + brhs r12,32,.Large_shift + brne r12,0,.Lsmall_shift + brge r10,0,.Ladd_same_exp ; r12 == 0 + +/* After subtracting, we need to normalize; when shifting to place the + leading 1 into position for the implicit 1 and adding that to DBL0H, + we increment the exponent. Thus, we have to subtract one more than + the shift count from the exponent beforehand. Iff the exponent drops thus + below zero (before adding in the fraction with the leading one), we have + generated a denormal number. Denormal handling is basicallly reducing the + shift count so that we produce a zero exponent instead; however, this way + the shift count can become zero (if we started out with exponent 1). + Therefore, a simple min operation is not good enough, since we don't + want to handle a zero normalizing shift in the main path. + On the plus side, we don't need to check for denorm input, the result + of subtracing these looks just the same as denormals generated during + subtraction. */ + bmsk r7,DBL1H,30 + cmp r4,r7 + cmp.eq DBL0L,DBL1L + blo .L_rsub_same_exp + sub.f DBL0L,DBL0L,DBL1L + bmsk r12,DBL0H,19 + bic DBL1H,DBL0H,r12 + sbc.f r4,r4,r7 + beq_l .Large_cancel + norm DBL1L,r4 + b.d .Lsub_done_same_exp + sub r12,DBL1L,9 + + .balign 4 +.Linf_nan: + ; If both inputs are inf, but with different signs, the result is NaN. + asr r12,r10,31 + or_s DBL1H,DBL1H,r12 + j_s.d [blink] + or.eq DBL0H,DBL0H,DBL1H + + .balign 4 +.L_rsub_same_exp: + rsub.f DBL0L,DBL0L,DBL1L + bmsk r12,DBL1H,19 + bic_s DBL1H,DBL1H,r12 + sbc.f r4,r7,r4 + beq_l .Large_cancel + norm DBL1L,r4 + + sub r12,DBL1L,9 +.Lsub_done_same_exp: + asl_s r12,r12,20 + sub_s DBL1L,DBL1L,10 + sub DBL0H,DBL1H,r12 + xor.f 0,DBL0H,DBL1H + bmi .Ldenorm +.Lpast_denorm: + neg_s r12,DBL1L + lsr r7,DBL0L,r12 + asl r12,r4,DBL1L + asl_s DBL0L,DBL0L,DBL1L + add_s r12,r12,r7 + j_s.d [blink] + add_l DBL0H,DBL0H,r12 + .balign 4 +.Ladd_same_exp: + /* This is a special case because we can't test for need to shift + down by checking if bit 20 of DBL0H changes. OTOH, here we know + that we always need to shift down. */ + ; The implicit 1 of DBL0 is not shifted together with the + ; fraction, thus effectively doubled, compensating for not setting + ; implicit1 for DBL1 + add_s r12,DBL0L,DBL1L + lsr.f 0,r12,2 ; round to even + breq r6,0,.Ldenorm_add + adc.f DBL0L,DBL0L,DBL1L + sub r7,DBL1H,DBL0H + sub1 r7,r7,r9 ; boost exponent by 2/2 + rrc DBL0L,DBL0L + asr.f r7,r7 ; DBL1.fraction/2 - DBL0.fraction/2 ; exp++ + add.cs.f DBL0L,DBL0L,0x80000000 + add_l DBL0H,DBL0H,r7 ; DBL0.implicit1 not shifted for DBL1.implicit1 + add.cs DBL0H,DBL0H,1 + bic.f 0,r9,DBL0H ; check for overflow -> infinity. + jne_l [blink] + and DBL0H,DBL0H,0xfff00000 + j_s.d [blink] + mov_s DBL0L,0 + .balign 4 +.Large_shift: + brhs r12,55,.Lret_dbl0 + bmsk_s DBL1H,DBL1H,19 + brne r6,0,.Lno_denorm_large_shift + brhi.d r12,33,.Lfixed_denorm_large_shift + sub_s r12,r12,1 + breq r12,31, .Lfixed_denorm_small_shift +.Lshift32: + mov_s r12,DBL1L + mov_s DBL1L,DBL1H + brlt.d r10,0,.Lsub + mov_s DBL1H,0 + b_s .Ladd +.Ldenorm_add: + cmp_s r12,DBL1L + mov_s DBL0L,r12 + j_s.d [blink] + adc DBL0H,r4,DBL1H + +.Lret_dbl0: + j_s [blink] + .balign 4 +.Lsmall_shift: + breq.d r6,0,.Ldenorm_small_shift + bmsk_s DBL1H,DBL1H,19 + bset_s DBL1H,DBL1H,20 +.Lfixed_denorm_small_shift: + neg r8,r12 + asl r4,DBL1H,r8 + lsr_l DBL1H,DBL1H,r12 + lsr r5,DBL1L,r12 + asl r12,DBL1L,r8 + brge.d r10,0,.Ladd + or DBL1L,r4,r5 +/* subtract, abs(DBL0) > abs(DBL1) */ +/* DBL0H, DBL0L: original values + DBL1H, DBL1L: fraction with explicit leading 1, shifted into place + r4: orig. DBL0H & 0x7fffffff + r6: orig. DBL1H & 0x7ff00000 + r9: 0x7ff00000 + r10: orig. DBL0H ^ DBL1H + r12: guard bits */ + .balign 4 +.Lsub: + neg.f r12,r12 + mov_s r7,DBL1H + bmsk r5,DBL0H,19 + sbc.f DBL0L,DBL0L,DBL1L + bic DBL1H,DBL0H,r5 + bset r5,r5,20 + sbc.f r4,r5,r7 + beq_l .Large_cancel_sub + norm DBL1L,r4 + bmsk r6,DBL1H,30 +.Lsub_done: + sub_s DBL1L,DBL1L,9 + breq DBL1L,1,.Lsub_done_noshift + asl r5,DBL1L,20 + sub_s DBL1L,DBL1L,1 + brlo r6,r5,.Ldenorm_sub + sub DBL0H,DBL1H,r5 +.Lpast_denorm_sub: + neg_s DBL1H,DBL1L + lsr r6,r12,DBL1H + asl_s r12,r12,DBL1L + and r8,r6,1 + add1.f 0,r8,r12 + add.ne.f r12,r12,r12 + asl r8,DBL0L,DBL1L + lsr r12,DBL0L,DBL1H + adc.f DBL0L,r8,r6 + asl r5,r4,DBL1L + add_s DBL0H,DBL0H,r12 + j_s.d [blink] + adc DBL0H,DBL0H,r5 + + .balign 4 +.Lno_denorm_large_shift: + breq.d r12,32,.Lshift32 + bset_l DBL1H,DBL1H,20 +.Lfixed_denorm_large_shift: + neg r8,r12 + asl r4,DBL1H,r8 + lsr r5,DBL1L,r12 + asl.f 0,DBL1L,r8 + lsr DBL1L,DBL1H,r12 + or r12,r4,r5 + tst.eq r12,1 + or.ne r12,r12,2 + brlt.d r10,0,.Lsub + mov_s DBL1H,0 + b_l .Ladd + + ; If a denorm is produced without shifting, we have an exact result - + ; no need for rounding. + .balign 4 +.Ldenorm_sub: + lsr DBL1L,r6,20 + xor DBL0H,r6,DBL1H + brne.d DBL1L,1,.Lpast_denorm_sub + sub_s DBL1L,DBL1L,1 +.Lsub_done_noshift: + add.f 0,r12,r12 + btst.eq DBL0L,0 + cmp.eq r12,r12 + add.cs.f DBL0L,DBL0L,1 + bclr r4,r4,20 + j_s.d [blink] + adc DBL0H,DBL1H,r4 + + .balign 4 +.Ldenorm_small_shift: + brne.d r12,1,.Lfixed_denorm_small_shift + sub_l r12,r12,1 + brlt r10,0,.Lsub +.Ladd: ; bit 20 of DBL1H is clear and bit 0 of r12 does not matter + add.f DBL0L,DBL0L,DBL1L + add_s DBL1H,DBL1H,DBL0H + add.cs DBL1H,DBL1H,1 + xor_l DBL0H,DBL0H,DBL1H + bbit0 DBL0H,20,.Lno_shiftdown + lsr.f DBL0H,DBL1H + and r4,DBL0L,2 + bmsk DBL0H,DBL0H,18 + sbc DBL0H,DBL1H,DBL0H + rrc.f DBL0L,DBL0L + or.f r12,r12,r4 + cmp.eq r12,r12 + add.cs.f DBL0L,DBL0L,1 + bic.f 0,r9,DBL0H ; check for generating infinity with possible ... + jne.d [blink] ; ... non-zero fraction + add.cs DBL0H,DBL0H,1 + mov_s DBL0L,0 + bmsk DBL1H,DBL0H,19 + j_s.d [blink] + bic_s DBL0H,DBL0H,DBL1H +.Lno_shiftdown: + mov_s DBL0H,DBL1H + add.f 0,r12,r12 + btst.eq DBL0L,0 + cmp.eq r12,r12 + add.cs.f DBL0L,DBL0L,1 + j_s.d [blink] + add.cs DBL0H,DBL0H,1 + .balign 4 +.Ldenorm: + bmsk DBL0H,DBL1H,30 + lsr r12,DBL0H,20 + xor_s DBL0H,DBL0H,DBL1H + sub_l DBL1L,r12,1 + bgt .Lpast_denorm + j_s.d [blink] + add_l DBL0H,DBL0H,r4 + + .balign 4 +.Large_cancel: + ;DBL0L: mantissa DBL1H: sign & exponent + norm.f DBL1L,DBL0L + bmsk DBL0H,DBL1H,30 + add_s DBL1L,DBL1L,22 + mov.mi DBL1L,21 + add_s r12,DBL1L,1 + asl_s r12,r12,20 + beq_s .Lret0 + brhs.d DBL0H,r12,.Lpast_denorm_large_cancel + sub DBL0H,DBL1H,r12 + bmsk DBL0H,DBL1H,30 + lsr r12,DBL0H,20 + xor_s DBL0H,DBL0H,DBL1H + sub.f DBL1L,r12,1 + jle [blink] +.Lpast_denorm_large_cancel: + rsub.f r7,DBL1L,32 + lsr r7,DBL0L,r7 + asl_s DBL0L,DBL0L,DBL1L + mov.ls r7,DBL0L + add_s DBL0H,DBL0H,r7 + j_s.d [blink] + mov.ls DBL0L,0 +.Lret0: + j_s.d [blink] + mov_l DBL0H,0 + +/* r4:DBL0L:r12 : unnormalized result fraction + DBL1H: result sign and exponent */ +/* When seeing large cancellation, only the topmost guard bit might be set. */ + .balign 4 +.Large_cancel_sub: + norm.f DBL1L,DBL0L + bpnz.d 0f + bmsk DBL0H,DBL1H,30 + mov r5,22<<20 + bne.d 1f + mov_s DBL1L,21 + bset r5,r5,5+20 + add_s DBL1L,DBL1L,32 + brne r12,0,1f + j_s.d [blink] + mov_l DBL0H,0 + .balign 4 +0: add r5,DBL1L,23 + asl r5,r5,20 + add_s DBL1L,DBL1L,22 +1: brlo DBL0H,r5,.Ldenorm_large_cancel_sub + sub DBL0H,DBL1H,r5 +.Lpast_denorm_large_cancel_sub: + rsub.f r7,DBL1L,32 + lsr r12,r12,r7 + lsr r7,DBL0L,r7 + asl_s DBL0L,DBL0L,DBL1L + add.ge DBL0H,DBL0H,r7 + add_s DBL0L,DBL0L,r12 + add.lt DBL0H,DBL0H,DBL0L + mov.eq DBL0L,r12 + j_s.d [blink] + mov.lt DBL0L,0 + .balign 4 +.Ldenorm_large_cancel_sub: + lsr r5,DBL0H,20 + xor_s DBL0H,DBL0H,DBL1H + brgt.d r5,1,.Lpast_denorm_large_cancel_sub + sub DBL1L,r5,1 + j_l [blink] ; denorm, no shift -> no rounding needed. + +/* r4: DBL0H & 0x7fffffff + r6: DBL1H & 0x7ff00000 + r9: 0x7ff00000 + r10: sign difference + r12: shift count (negative) */ + .balign 4 +.Ldbl1_gt: + brhs r6,r9,.Lret_dbl1 ; inf or NaN + neg r8,r12 + brhs r8,32,.Large_shift_dbl0 +.Lsmall_shift_dbl0: + breq.d r6,0,.Ldenorm_small_shift_dbl0 + bmsk_s DBL0H,DBL0H,19 + bset_s DBL0H,DBL0H,20 +.Lfixed_denorm_small_shift_dbl0: + asl r4,DBL0H,r12 + lsr DBL0H,DBL0H,r8 + lsr r5,DBL0L,r8 + asl r12,DBL0L,r12 + brge.d r10,0,.Ladd_dbl1_gt + or DBL0L,r4,r5 +/* subtract, abs(DBL0) < abs(DBL1) */ +/* DBL0H, DBL0L: fraction with explicit leading 1, shifted into place + DBL1H, DBL1L: original values + r6: orig. DBL1H & 0x7ff00000 + r9: 0x7ff00000 + r12: guard bits */ + .balign 4 +.Lrsub: + neg.f r12,r12 + bmsk r7,DBL1H,19 + mov_s r5,DBL0H + sbc.f DBL0L,DBL1L,DBL0L + bic DBL1H,DBL1H,r7 + bset r7,r7,20 + sbc.f r4,r7,r5 + beq_l .Large_cancel_sub + norm DBL1L,r4 + b_l .Lsub_done ; note: r6 is already set up. + +.Lret_dbl1: + mov_s DBL0H,DBL1H + j_s.d [blink] + mov_l DBL0L,DBL1L + .balign 4 +.Ldenorm_small_shift_dbl0: + sub.f r8,r8,1 + bne.d .Lfixed_denorm_small_shift_dbl0 + add_s r12,r12,1 + brlt r10,0,.Lrsub +.Ladd_dbl1_gt: ; bit 20 of DBL0H is clear and bit 0 of r12 does not matter + add.f DBL0L,DBL0L,DBL1L + add_s DBL0H,DBL0H,DBL1H + add.cs DBL0H,DBL0H,1 + xor DBL1H,DBL0H,DBL1H + bbit0 DBL1H,20,.Lno_shiftdown_dbl1_gt + lsr.f DBL1H,DBL0H + and r4,DBL0L,2 + bmsk DBL1H,DBL1H,18 + sbc DBL0H,DBL0H,DBL1H + rrc.f DBL0L,DBL0L + or.f r12,r12,r4 + cmp.eq r12,r12 + add.cs.f DBL0L,DBL0L,1 + bic.f 0,r9,DBL0H ; check for generating infinity with possible ... + jne.d [blink] ; ... non-zero fraction + add.cs DBL0H,DBL0H,1 + mov_s DBL0L,0 + bmsk DBL1H,DBL0H,19 + j_s.d [blink] + bic_s DBL0H,DBL0H,DBL1H +.Lno_shiftdown_dbl1_gt: + add.f 0,r12,r12 + btst.eq DBL0L,0 + cmp.eq r12,r12 + add.cs.f DBL0L,DBL0L,1 + j_s.d [blink] + add.cs DBL0H,DBL0H,1 + + .balign 4 +.Large_shift_dbl0: + brhs r8,55,.Lret_dbl1 + bmsk_s DBL0H,DBL0H,19 + brne r6,0,.Lno_denorm_large_shift_dbl0 + add_s r12,r12,1 + brne.d r8,33,.Lfixed_denorm_large_shift_dbl0 + sub r8,r8,1 + bset_s DBL0H,DBL0H,20 +.Lshift32_dbl0: + mov_s r12,DBL0L + mov_s DBL0L,DBL0H + brlt.d r10,0,.Lrsub + mov_s DBL0H,0 + b_s .Ladd_dbl1_gt + + .balign 4 +.Lno_denorm_large_shift_dbl0: + breq.d r8,32,.Lshift32_dbl0 + bset_l DBL0H,DBL0H,20 +.Lfixed_denorm_large_shift_dbl0: + asl r4,DBL0H,r12 + lsr r5,DBL0L,r8 + asl.f 0,DBL0L,r12 + lsr DBL0L,DBL0H,r8 + or r12,r4,r5 + tst.eq r12,1 + or.ne r12,r12,2 + brlt.d r10,0,.Lrsub + mov_s DBL0H,0 + b_l .Ladd_dbl1_gt + ENDFUNC(__adddf3) + ENDFUNC(__subdf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/addsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/addsf3.S new file mode 100644 index 000000000..45ed09321 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/addsf3.S @@ -0,0 +1,344 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +#if 0 /* DEBUG */ + .global __addsf3 + FUNC(__addsf3) + .balign 4 +__addsf3: + push_s blink + push_s r1 + bl.d __addsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __addsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__addsf3) + .global __subsf3 + FUNC(__subsf3) + .balign 4 +__subsf3: + push_s blink + push_s r1 + bl.d __subsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __subsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__subsf3) +#define __addsf3 __addsf3_asm +#define __subsf3 __subsf3_asm +#endif /* DEBUG */ +/* N.B. This is optimized for ARC700. + ARC600 has very different scheduling / instruction selection criteria. */ + +/* inputs: r0, r1 + output: r0 + clobber: r1-r10, r12, flags */ + + .balign 4 + .global __addsf3 + .global __subsf3 + FUNC(__addsf3) + FUNC(__subsf3) + .long 0x7f800000 ; exponent mask +__subsf3: + bxor_l r1,r1,31 +__addsf3: + ld r9,[pcl,-8] + bmsk r4,r0,30 + xor r10,r0,r1 + and r6,r1,r9 + sub.f r12,r4,r6 + asr_s r12,r12,23 + blo .Ldbl1_gt + brhs r4,r9,.Linf_nan + brne r12,0,.Lsmall_shift + brge r10,0,.Ladd_same_exp ; r12 == 0 +/* After subtracting, we need to normalize; when shifting to place the + leading 1 into position for the implicit 1 and adding that to DBL0, + we increment the exponent. Thus, we have to subtract one more than + the shift count from the exponent beforehand. Iff the exponent drops thus + below zero (before adding in the fraction with the leading one), we have + generated a denormal number. Denormal handling is basicallly reducing the + shift count so that we produce a zero exponent instead; FWIW, this way + the shift count can become zero (if we started out with exponent 1). + On the plus side, we don't need to check for denorm input, the result + of subtracing these looks just the same as denormals generated during + subtraction. */ + bmsk r7,r1,30 + breq r4,r7,.Lret0 + sub.f r5,r4,r7 + lsr r12,r4,23 + neg.cs r5,r5 + norm r3,r5 + bmsk r2,r0,22 + sub_s r3,r3,6 + min r12,r12,r3 + bic r1,r0,r2 + sub_s r3,r12,1 + asl_s r12,r12,23 + asl r2,r5,r3 + sub_s r1,r1,r12 + add_s r0,r1,r2 + j_s.d [blink] + bxor.cs r0,r0,31 + .balign 4 +.Linf_nan: + ; If both inputs are inf, but with different signs, the result is NaN. + asr r12,r10,31 + or_s r1,r1,r12 + j_s.d [blink] + or.eq r0,r0,r1 + .balign 4 +.Ladd_same_exp: + /* This is a special case because we can't test for need to shift + down by checking if bit 23 of DBL0 changes. OTOH, here we know + that we always need to shift down. */ + ; adding the two floating point numbers together makes the sign + ; cancel out and apear as carry; the exponent is doubled, and the + ; fraction also in need of shifting left by one. The two implicit + ; ones of the sources make an implicit 1 of the result, again + ; non-existent in a place shifted by one. + add.f r0,r0,r1 + btst_s r0,1 + breq r6,0,.Ldenorm_add + add.ne r0,r0,1 ; round to even. + rrc r0,r0 + bmsk r1,r9,23 + add r0,r0,r1 ; increment exponent + bic.f 0,r9,r0; check for overflow -> infinity. + jne_l [blink] + mov_s r0,r9 + j_s.d [blink] + bset.cs r0,r0,31 + +.Ldenorm_add: + j_s.d [blink] + add r0,r4,r1 + +.Lret_dbl0: + j_s [blink] + + .balign 4 +.Lsmall_shift: + brhi r12,25,.Lret_dbl0 + breq.d r6,0,.Ldenorm_small_shift + bmsk_s r1,r1,22 + bset_s r1,r1,23 +.Lfixed_denorm_small_shift: + neg r8,r12 + asl r5,r1,r8 + brge.d r10,0,.Ladd + lsr_l r1,r1,r12 +/* subtract, abs(DBL0) > abs(DBL1) */ +/* DBL0: original values + DBL1: fraction with explicit leading 1, shifted into place + r4: orig. DBL0 & 0x7fffffff + r6: orig. DBL1 & 0x7f800000 + r9: 0x7f800000 + r10: orig. DBL0H ^ DBL1H + r5 : guard bits */ + .balign 4 +.Lsub: + neg.f r12,r5 + bmsk r3,r0,22 + bset r5,r3,23 + sbc.f r4,r5,r1 + beq.d .Large_cancel_sub + bic r7,r0,r3 + norm r3,r4 + bmsk r6,r7,30 +.Lsub_done: + sub_s r3,r3,6 + breq r3,1,.Lsub_done_noshift + asl r5,r3,23 + sub_l r3,r3,1 + brlo r6,r5,.Ldenorm_sub + sub r0,r7,r5 + neg_s r1,r3 + lsr.f r2,r12,r1 + asl_s r12,r12,r3 + btst_s r2,0 + bmsk.eq.f r12,r12,30 + asl r5,r4,r3 + add_s r0,r0,r2 + adc.ne r0,r0,0 + j_s.d [blink] + add_l r0,r0,r5 + +.Lret0: + j_s.d [blink] + mov_l r0,0 + + .balign 4 +.Ldenorm_small_shift: + brne.d r12,1,.Lfixed_denorm_small_shift + sub_s r12,r12,1 + brlt.d r10,0,.Lsub + mov_s r5,r12 ; zero r5, and align following code +.Ladd: ; Both bit 23 of DBL1 and bit 0 of r5 are clear. + bmsk r2,r0,22 + add_s r2,r2,r1 + bbit0.d r2,23,.Lno_shiftdown + add_s r0,r0,r1 + bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity + bmsk r1,r2,22 + lsr.ne.f r2,r2,2; cc: even ; hi: might round down + lsr.ne r1,r1,1 + rcmp.hi r5,1; hi : round down + bclr.hi r0,r0,0 + j_l.d [blink] + sub_s r0,r0,r1 + +/* r4: DBL0H & 0x7fffffff + r6: DBL1H & 0x7f800000 + r9: 0x7f800000 + r10: sign difference + r12: shift count (negative) */ + .balign 4 +.Ldbl1_gt: + brhs r6,r9,.Lret_dbl1 ; inf or NaN + neg r8,r12 + brhi r8,25,.Lret_dbl1 +.Lsmall_shift_dbl0: + breq.d r6,0,.Ldenorm_small_shift_dbl0 + bmsk_s r0,r0,22 + bset_s r0,r0,23 +.Lfixed_denorm_small_shift_dbl0: + asl r5,r0,r12 + brge.d r10,0,.Ladd_dbl1_gt + lsr r0,r0,r8 +/* subtract, abs(DBL0) < abs(DBL1) */ +/* DBL0: fraction with explicit leading 1, shifted into place + DBL1: original value + r6: orig. DBL1 & 0x7f800000 + r9: 0x7f800000 + r5: guard bits */ + .balign 4 +.Lrsub: + neg.f r12,r5 + bmsk r5,r1,22 + bic r7,r1,r5 + bset r5,r5,23 + sbc.f r4,r5,r0 + bne.d .Lsub_done ; note: r6 is already set up. + norm r3,r4 + /* Fall through */ + +/* r4:r12 : unnormalized result fraction + r7: result sign and exponent */ +/* When seeing large cancellation, only the topmost guard bit might be set. */ + .balign 4 +.Large_cancel_sub: + breq_s r12,0,.Lret0 + sub r0,r7,24<<23 + xor.f 0,r0,r7 ; test if exponent is negative + tst.pl r9,r0 ; test if exponent is zero + jpnz [blink] ; return if non-denormal result + bmsk r6,r7,30 + lsr r3,r6,23 + xor r0,r6,r7 + sub_s r3,r3,24-22 + j_s.d [blink] + bset r0,r0,r3 + + ; If a denorm is produced, we have an exact result - + ; no need for rounding. + .balign 4 +.Ldenorm_sub: + sub r3,r6,1 + lsr.f r3,r3,23 + xor r0,r6,r7 + neg_s r1,r3 + asl.ne r4,r4,r3 + lsr_s r12,r12,r1 + add_s r0,r0,r4 + j_s.d [blink] + add.ne r0,r0,r12 + + .balign 4 +.Lsub_done_noshift: + add.f 0,r12,r12 + btst.eq r4,0 + bclr r4,r4,23 + add r0,r7,r4 + j_s.d [blink] + adc.ne r0,r0,0 + + .balign 4 +.Lno_shiftdown: + add.f 0,r5,r5 + btst.eq r0,0 + cmp.eq r5,r5 + j_s.d [blink] + add.cs r0,r0,1 + +.Lret_dbl1: + j_s.d [blink] + mov_l r0,r1 + .balign 4 +.Ldenorm_small_shift_dbl0: + sub.f r8,r8,1 + bne.d .Lfixed_denorm_small_shift_dbl0 + add_s r12,r12,1 + brlt.d r10,0,.Lrsub + mov r5,0 +.Ladd_dbl1_gt: ; both bit 23 of DBL0 and bit 0 of r5 are clear. + bmsk r2,r1,22 + add_s r2,r2,r0 + bbit0.d r2,23,.Lno_shiftdown_dbl1_gt + add_s r0,r1,r0 + bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity + bmsk r1,r2,22 + lsr.ne.f r2,r2,2; cc: even ; hi: might round down + lsr.ne r1,r1,1 + rcmp.hi r5,1; hi : round down + bclr.hi r0,r0,0 + j_l.d [blink] + sub_s r0,r0,r1 + + .balign 4 +.Lno_shiftdown_dbl1_gt: + add.f 0,r5,r5 + btst.eq r0,0 + cmp.eq r5,r5 + j_s.d [blink] + add.cs r0,r0,1 + ENDFUNC(__addsf3) + ENDFUNC(__subsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc-ieee-754.h b/gcc-4.9/libgcc/config/arc/ieee-754/arc-ieee-754.h new file mode 100644 index 000000000..64e6d2309 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc-ieee-754.h @@ -0,0 +1,56 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifdef __LITTLE_ENDIAN__ +#define DBL0L r0 +#define DBL0H r1 +#define DBL1L r2 +#define DBL1H r3 +#else +#define DBL0L r1 +#define DBL0H r0 +#define DBL1L r3 +#define DBL1H r2 +#endif +#define add_l add +#define asr_l asr +#define j_l j +#define jne_l jne +#define jeq_l jeq +#define or_l or +#define mov_l mov +#define b_l b +#define beq_l beq +#define bne_l bne +#define brne_l brne +#define bset_l bset +#define sub_l sub +#define sub1_l sub1 +#define lsr_l lsr +#define xor_l xor +#define bic_l bic +#define bmsk_l bmsk +#define bxor_l bxor +#define bcs_s blo_s diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/divdf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/divdf3.S new file mode 100644 index 000000000..2381ba67d --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/divdf3.S @@ -0,0 +1,421 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + to calculate a := b/x as b*y, with y := 1/x: + - x is in the range [1..2) + - calculate 15..18 bit inverse y0 using a table of approximating polynoms. + Precision is higher for polynoms used to evaluate input with larger + value. + - Do one newton-raphson iteration step to double the precision, + then multiply this with the divisor + -> more time to decide if dividend is subnormal + - the worst error propagation is on the side of the value range + with the least initial defect, thus giving us about 30 bits precision. + The truncation error for the either is less than 1 + x/2 ulp. + A 31 bit inverse can be simply calculated by using x with implicit 1 + and chaining the multiplies. For a 32 bit inverse, we multiply y0^2 + with the bare fraction part of x, then add in y0^2 for the implicit + 1 of x. + - If calculating a 31 bit inverse, the systematic error is less than + -1 ulp; likewise, for 32 bit, it is less than -2 ulp. + - If we calculate our seed with a 32 bit fraction, we can archive a + tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we + only need to take the step to calculate the 2nd stage rest and + rounding adjust 1/32th of the time. However, if we use a 20 bit + fraction for the seed, the negative error can exceed -2 ulp/128, (2) + thus for a simple add / tst check, we need to do the 2nd stage + rest calculation/ rounding adjust 1/16th of the time. + (1): The inexactness of the 32 bit inverse contributes an error in the + range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the + rest contributes an error < +1/x ulp/128 . In the interval [1,2), + x/2 + 1/x <= 1.5 . + (2): Unless proven otherwise. I have not actually looked for an + example where -2 ulp/128 is exceeded, and my calculations indicate + that the excess, if existent, is less than -1/512 ulp. + ??? The algorithm is still based on the ARC700 optimized code. + Maybe we could make better use of 32x16 bit multiply, or 64 bit multiply + results. + */ +#include "../arc-ieee-754.h" +#define mlo acc2 +#define mhi acc1 +#define mul64(b,c) mullw 0,b,c` machlw 0,b,c +#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c + +/* N.B. fp-bit.c does double rounding on denormal numbers. */ +#if 0 /* DEBUG */ + .global __divdf3 + FUNC(__divdf3) + .balign 4 +__divdf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __divdf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __divdf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + and r12,DBL0H,DBL1H + bic.f 0,0x7ff80000,r12 ; both NaN -> OK + jeq_s [blink] + bl abort + ENDFUNC(__divdf3) +#define __divdf3 __divdf3_asm +#endif /* DEBUG */ + + FUNC(__divdf3) + .balign 4 +.L7ff00000: + .long 0x7ff00000 +.Ldivtab: + .long 0xfc0fffe1 + .long 0xf46ffdfb + .long 0xed1ffa54 + .long 0xe61ff515 + .long 0xdf7fee75 + .long 0xd91fe680 + .long 0xd2ffdd52 + .long 0xcd1fd30c + .long 0xc77fc7cd + .long 0xc21fbbb6 + .long 0xbcefaec0 + .long 0xb7efa100 + .long 0xb32f92bf + .long 0xae8f83b7 + .long 0xaa2f7467 + .long 0xa5ef6479 + .long 0xa1cf53fa + .long 0x9ddf433e + .long 0x9a0f3216 + .long 0x965f2091 + .long 0x92df0f11 + .long 0x8f6efd05 + .long 0x8c1eeacc + .long 0x88eed876 + .long 0x85dec615 + .long 0x82eeb3b9 + .long 0x800ea10b + .long 0x7d3e8e0f + .long 0x7a8e7b3f + .long 0x77ee6836 + .long 0x756e5576 + .long 0x72fe4293 + .long 0x709e2f93 + .long 0x6e4e1c7f + .long 0x6c0e095e + .long 0x69edf6c5 + .long 0x67cde3a5 + .long 0x65cdd125 + .long 0x63cdbe25 + .long 0x61ddab3f + .long 0x600d991f + .long 0x5e3d868c + .long 0x5c6d7384 + .long 0x5abd615f + .long 0x590d4ecd + .long 0x576d3c83 + .long 0x55dd2a89 + .long 0x545d18e9 + .long 0x52dd06e9 + .long 0x516cf54e + .long 0x4ffce356 + .long 0x4e9cd1ce + .long 0x4d3cbfec + .long 0x4becae86 + .long 0x4aac9da4 + .long 0x496c8c73 + .long 0x483c7bd3 + .long 0x470c6ae8 + .long 0x45dc59af + .long 0x44bc4915 + .long 0x43ac3924 + .long 0x428c27fb + .long 0x418c187a + .long 0x407c07bd + +__divdf3_support: /* This label makes debugger output saner. */ + .balign 4 +.Ldenorm_dbl1: + brge r6, \ + 0x43500000,.Linf_NaN ; large number / denorm -> Inf + bmsk.f r12,DBL1H,19 + mov.eq r12,DBL1L + mov.eq DBL1L,0 + sub.eq r7,r7,32 + norm.f r11,r12 ; flag for x/0 -> Inf check + beq_s .Linf_NaN + mov.mi r11,0 + add.pl r11,r11,1 + add_s r12,r12,r12 + asl r8,r12,r11 + rsub r12,r11,31 + lsr r12,DBL1L,r12 + tst_s DBL1H,DBL1H + or r8,r8,r12 + lsr r4,r8,26 + lsr DBL1H,r8,12 + ld.as r4,[r10,r4] + bxor.mi DBL1H,DBL1H,31 + sub r11,r11,11 + asl DBL1L,DBL1L,r11 + sub r11,r11,1 + mulu64 (r4,r8) + sub r7,r7,r11 + b.d .Lpast_denorm_dbl1 + asl r7,r7,20 + +.Linf_NaN: + tst_s DBL0L,DBL0L ; 0/0 -> NaN + xor_s DBL1H,DBL1H,DBL0H + bclr.eq.f DBL0H,DBL0H,31 + bmsk DBL0H,DBL1H,30 + xor_s DBL0H,DBL0H,DBL1H + sub.eq DBL0H,DBL0H,1 + mov_s DBL0L,0 + j_s.d [blink] + or DBL0H,DBL0H,r9 + .balign 4 +.Lret0_2: + xor_s DBL1H,DBL1H,DBL0H + mov_s DBL0L,0 + bmsk DBL0H,DBL1H,30 + j_s.d [blink] + xor_s DBL0H,DBL0H,DBL1H + .balign 4 + .global __divdf3 +/* N.B. the spacing between divtab and the sub3 to get its address must + be a multiple of 8. */ +__divdf3: + asl r8,DBL1H,12 + lsr r4,r8,26 + sub3 r10,pcl,51;(.-.Ldivtab) >> 3 + ld.as r9,[pcl,-104]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000 + ld.as r4,[r10,r4] + lsr r12,DBL1L,20 + and.f r7,DBL1H,r9 + or r8,r8,r12 + mulu64 (r4,r8) + beq.d .Ldenorm_dbl1 +.Lpast_denorm_dbl1: + and.f r6,DBL0H,r9 + breq.d r7,r9,.Linf_nan_dbl1 + asl r4,r4,12 + sub r4,r4,mhi + mululw 0,r4,r4 + machulw r5,r4,r4 + bne.d .Lnormal_dbl0 + lsr r8,r8,1 + + .balign 4 +.Ldenorm_dbl0: + bmsk.f r12,DBL0H,19 + ; wb stall + mov.eq r12,DBL0L + sub.eq r6,r6,32 + norm.f r11,r12 ; flag for 0/x -> 0 check + brge r7, \ + 0x43500000, .Lret0_2 ; denorm/large number -> 0 + beq_s .Lret0_2 + mov.mi r11,0 + add.pl r11,r11,1 + asl r12,r12,r11 + sub r6,r6,r11 + add.f 0,r6,31 + lsr r10,DBL0L,r6 + mov.mi r10,0 + add r6,r6,11+32 + neg.f r11,r6 + asl DBL0L,DBL0L,r11 + mov.pl DBL0L,0 + sub r6,r6,32-1 + b.d .Lpast_denorm_dbl0 + asl r6,r6,20 + + .balign 4 +.Linf_nan_dbl1: ; 0/Inf -> NaN Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN + or.f 0,r6,DBL0L + cmp.ne r6,r9 + not_s DBL0L,DBL1H + sub_s.ne DBL0L,DBL0L,DBL0L + tst_s DBL0H,DBL0H + add_s DBL0H,DBL1H,DBL0L + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + + .balign 4 +.Lnormal_dbl0: + breq.d r6,r9,.Linf_nan_dbl0 + asl r12,DBL0H,11 + lsr r10,DBL0L,21 +.Lpast_denorm_dbl0: + bset r8,r8,31 + mulu64 (r5,r8) + add_s r12,r12,r10 + bset r5,r12,31 + cmp r5,r8 + cmp.eq DBL0L,DBL1L + lsr.cc r5,r5,1 + sub r4,r4,mhi ; u1.31 inverse, about 30 bit + mululw 0,r5,r4 + machulw r11,r5,r4 ; result fraction highpart + lsr r8,r8,2 ; u3.29 + add r5,r6, /* wait for immediate */ \ + 0x3fe00000 + mulu64 (r11,r8) ; u-28.31 + asl_s DBL1L,DBL1L,9 ; u-29.23:9 + sbc r6,r5,r7 + mov r12,mlo ; u-28.31 + mulu64 (r11,DBL1L) ; mhi: u-28.23:9 + add.cs DBL0L,DBL0L,DBL0L + asl_s DBL0L,DBL0L,6 ; u-26.25:7 + asl r10,r11,23 + sub_l DBL0L,DBL0L,r12 + lsr r7,r11,9 + sub r5,DBL0L,mhi ; rest msw ; u-26.31:0 + mul64 (r5,r4) ; mhi: result fraction lowpart + xor.f 0,DBL0H,DBL1H + and DBL0H,r6,r9 + add_s DBL0H,DBL0H,r7 + bclr r12,r9,20 ; 0x7fe00000 + brhs.d r6,r12,.Linf_denorm + bxor.mi DBL0H,DBL0H,31 + add.f r12,mhi,0x11 + asr r9,r12,5 + sub.mi DBL0H,DBL0H,1 + add.f DBL0L,r9,r10 + tst r12,0x1c + jne.d [blink] + add.cs DBL0H,DBL0H,1 + /* work out exact rounding if we fall through here. */ + /* We know that the exact result cannot be represented in double + precision. Find the mid-point between the two nearest + representable values, multiply with the divisor, and check if + the result is larger than the dividend. Since we want to know + only the sign bit, it is sufficient to calculate only the + highpart of the lower 64 bits. */ + mulu64 (r11,DBL1L) ; rest before considering r12 in r5 : -mlo + sub.f DBL0L,DBL0L,1 + asl r12,r9,2 ; u-22.30:2 + sub.cs DBL0H,DBL0H,1 + sub.f r12,r12,2 + mov r10,mlo ; rest before considering r12 in r5 : -r10 + mululw 0,r12,DBL1L + machulw r7,r12,DBL1L ; mhi: u-51.32 + asl r5,r5,25 ; s-51.7:25 + lsr r10,r10,7 ; u-51.30:2 + mulu64 (r12,r8) ; mlo: u-51.31:1 + sub r5,r5,r10 + add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L + bset r7,r7,0 ; make sure that the result is not zero, and that + sub r5,r5,r7 ; a highpart zero appears negative + sub.f r5,r5,mlo ; rest msw + add.pl.f DBL0L,DBL0L,1 + j_s.d [blink] + add.eq DBL0H,DBL0H,1 + +.Linf_nan_dbl0: + tst_s DBL1H,DBL1H + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + .balign 4 +.Linf_denorm: + lsr r12,r6,28 + brlo.d r12,0xc,.Linf +.Ldenorm: + asr r6,r6,20 + neg r9,r6 + mov_s DBL0H,0 + brhs.d r9,54,.Lret0 + bxor.mi DBL0H,DBL0H,31 + add r12,mhi,1 + and r12,r12,-4 + rsub r7,r6,5 + asr r10,r12,28 + bmsk r4,r12,27 + min r7,r7,31 + asr DBL0L,r4,r7 + add DBL1H,r11,r10 + abs.f r10,r4 + sub.mi r10,r10,1 + add.f r7,r6,32-5 + asl r4,r4,r7 + mov.mi r4,r10 + add.f r10,r6,23 + rsub r7,r6,9 + lsr r7,DBL1H,r7 + asl r10,DBL1H,r10 + or.pnz DBL0H,DBL0H,r7 + or.mi r4,r4,r10 + mov.mi r10,r7 + add.f DBL0L,r10,DBL0L + add.cs.f DBL0H,DBL0H,1 ; carry clear after this point + bxor.f 0,r4,31 + add.pnz.f DBL0L,DBL0L,1 + add.cs.f DBL0H,DBL0H,1 + jne_s [blink] + /* Calculation so far was not conclusive; calculate further rest. */ + mulu64 (r11,DBL1L) ; rest before considering r12 in r5 : -mlo + asr.f r12,r12,3 + asl r5,r5,25 ; s-51.7:25 + mov r11,mlo ; rest before considering r12 in r5 : -r11 + mulu64 (r12,r8) ; u-51.31:1 + and r9,DBL0L,1 ; tie-breaker: round to even + lsr r11,r11,7 ; u-51.30:2 + mov DBL1H,mlo ; u-51.31:1 + mulu64 (r12,DBL1L) ; u-51.62:2 + sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L + add_s DBL1H,DBL1H,r11 + sub DBL1H,DBL1H,r5 ; -rest msw + add_s DBL1H,DBL1H,mhi ; -rest msw + add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-( + tst_s DBL1H,DBL1H + cmp.eq mlo,r9 + add.cs.f DBL0L,DBL0L,1 + j_s.d [blink] + add.cs DBL0H,DBL0H,1 + +.Lret0: + /* return +- 0 */ + j_s.d [blink] + mov_s DBL0L,0 +.Linf: + mov_s DBL0H,r9 + mov_s DBL0L,0 + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + ENDFUNC(__divdf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/divsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/divsf3.S new file mode 100644 index 000000000..77b257d84 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/divsf3.S @@ -0,0 +1,274 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + - calculate 15..18 bit inverse using a table of approximating polynoms. + precision is higher for polynoms used to evaluate input with larger + value. + - do one newton-raphson iteration step to double the precision, + then multiply this with the divisor + -> more time to decide if dividend is subnormal + - the worst error propagation is on the side of the value range + with the least initial defect, thus giving us about 30 bits precision. + */ +#include "../arc-ieee-754.h" +#define mlo acc2 +#define mhi acc1 +#define mul64(b,c) mullw 0,b,c` machlw 0,b,c +#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c + +#if 0 /* DEBUG */ + .global __divsf3 + FUNC(__divsf3) + .balign 4 +__divsf3: + push_s blink + push_s r1 + bl.d __divsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __divsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 +#if 1 + bne abort + jeq_s [blink] + b abort +#else + bne abort + j_s [blink] +#endif + ENDFUNC(__divsf3) +#define __divsf3 __divsf3_asm +#endif /* DEBUG */ + + FUNC(__divsf3) + .balign 4 +.Ldivtab: + .long 0xfc0ffff0 + .long 0xf46ffefd + .long 0xed1ffd2a + .long 0xe627fa8e + .long 0xdf7ff73b + .long 0xd917f33b + .long 0xd2f7eea3 + .long 0xcd1fe986 + .long 0xc77fe3e7 + .long 0xc21fdddb + .long 0xbcefd760 + .long 0xb7f7d08c + .long 0xb32fc960 + .long 0xae97c1ea + .long 0xaa27ba26 + .long 0xa5e7b22e + .long 0xa1cfa9fe + .long 0x9ddfa1a0 + .long 0x9a0f990c + .long 0x9667905d + .long 0x92df878a + .long 0x8f6f7e84 + .long 0x8c27757e + .long 0x88f76c54 + .long 0x85df630c + .long 0x82e759c5 + .long 0x8007506d + .long 0x7d3f470a + .long 0x7a8f3da2 + .long 0x77ef341e + .long 0x756f2abe + .long 0x72f7212d + .long 0x709717ad + .long 0x6e4f0e44 + .long 0x6c1704d6 + .long 0x69e6fb44 + .long 0x67cef1d7 + .long 0x65c6e872 + .long 0x63cedf18 + .long 0x61e6d5cd + .long 0x6006cc6d + .long 0x5e36c323 + .long 0x5c76b9f3 + .long 0x5abeb0b7 + .long 0x5916a79b + .long 0x57769e77 + .long 0x55de954d + .long 0x54568c4e + .long 0x52d6834d + .long 0x51667a7f + .long 0x4ffe71b5 + .long 0x4e9e68f1 + .long 0x4d466035 + .long 0x4bf65784 + .long 0x4aae4ede + .long 0x496e4646 + .long 0x48363dbd + .long 0x47063547 + .long 0x45de2ce5 + .long 0x44be2498 + .long 0x43a61c64 + .long 0x4296144a + .long 0x41860c0e + .long 0x407e03ee +.L7f800000: + .long 0x7f800000 + .balign 4 + .global __divsf3_support +__divsf3_support: +.Linf_NaN: + bclr.f 0,r0,31 ; 0/0 -> NaN + xor_s r0,r0,r1 + bmsk r1,r0,30 + bic_s r0,r0,r1 + sub.eq r0,r0,1 + j_s.d [blink] + or r0,r0,r9 +.Lret0: + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_s r0,r0,r1 +/* N.B. the spacing between divtab and the sub3 to get its address must + be a multiple of 8. */ +__divsf3: + ld.as r9,[pcl,-9]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000 + sub3 r3,pcl,37;(.-.Ldivtab) >> 3 + lsr r2,r1,17 + and.f r11,r1,r9 + bmsk r5,r2,5 + beq.d .Ldenorm_fp1 + asl r6,r1,8 + and.f r2,r0,r9 + ld.as r5,[r3,r5] + asl r4,r1,9 + bset r6,r6,31 + breq.d r11,r9,.Linf_nan_fp1 +.Lpast_denorm_fp1: + mululw 0,r5,r4 + machulw r8,r5,r4 + breq.d r2,r9,.Linf_nan_fp0 + asl r5,r5,13 + sub r7,r5,r8 + mululw 0,r7,r6 + machulw r8,r7,r6 + beq.d .Ldenorm_fp0 + asl r12,r0,8 + mulu64 (r8,r7) + bset r3,r12,31 +.Lpast_denorm_fp0: + cmp_s r3,r6 + lsr.cc r3,r3,1 + add_s r2,r2, /* wait for immediate */ \ + 0x3f000000 + sub r7,r7,mhi ; u1.31 inverse, about 30 bit + mulu64 (r3,r7) + sbc r2,r2,r11 + xor.f 0,r0,r1 + and r0,r2,r9 + bclr r3,r9,23 ; 0x7f000000 + brhs.d r2,r3,.Linf_denorm + bxor.mi r0,r0,31 +.Lpast_denorm: + add r3,mhi,0x22 ; round to nearest or higher + tst r3,0x3c ; check if rounding was unsafe + lsr r3,r3,6 + jne.d [blink] ; return if rounding was safe. + add_s r0,r0,r3 + /* work out exact rounding if we fall through here. */ + /* We know that the exact result cannot be represented in single + precision. Find the mid-point between the two nearest + representable values, multiply with the divisor, and check if + the result is larger than the dividend. */ + add_s r3,r3,r3 + sub_s r3,r3,1 + mulu64 (r3,r6) + asr.f 0,r0,1 ; for round-to-even in case this is a denorm + rsub r2,r9,25 + asl_s r12,r12,r2 + sub.f 0,r12,mlo + j_s.d [blink] + sub.mi r0,r0,1 +.Linf_nan_fp1: + lsr_s r0,r0,31 + bmsk.f 0,r1,22 + asl_s r0,r0,31 + bne_s 0f ; inf/inf -> nan + brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan +0: j_s.d [blink] + mov r0,-1 +.Lsigned0: +.Linf_nan_fp0: + tst_s r1,r1 + j_s.d [blink] + bxor.mi r0,r0,31 + .balign 4 + .global __divsf3 +/* For denormal results, it is possible that an exact result needs + rounding, and thus the round-to-even rule has to come into play. */ +.Linf_denorm: + brlo r2,0xc0000000,.Linf +.Ldenorm: + asr_s r2,r2,23 + bic r0,r0,r9 + neg r9,r2 + brlo.d r9,25,.Lpast_denorm + lsr r3,mlo,r9 + /* Fall through: return +- 0 */ + j_s [blink] +.Linf: + j_s.d [blink] + or r0,r0,r9 + .balign 4 +.Ldenorm_fp1: + norm.f r12,r6 ; flag for x/0 -> Inf check + add r6,r6,r6 + rsub r5,r12,16 + ror r5,r1,r5 + bmsk r5,r5,5 + bic.ne.f 0, \ + 0x60000000,r0 ; large number / denorm -> Inf + ld.as r5,[r3,r5] + asl r6,r6,r12 + beq.d .Linf_NaN + and.f r2,r0,r9 + add r4,r6,r6 + asl_s r12,r12,23 + bne.d .Lpast_denorm_fp1 + add_s r2,r2,r12 +.Ldenorm_fp0: + mulu64 (r8,r7) + bclr r12,r12,31 + norm.f r3,r12 ; flag for 0/x -> 0 check + bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 + beq_s .Lret0 + asl_s r12,r12,r3 + asl_s r3,r3,23 + add_s r12,r12,r12 + add r11,r11,r3 + b.d .Lpast_denorm_fp0 + mov_s r3,r12 + ENDFUNC(__divsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/muldf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/muldf3.S new file mode 100644 index 000000000..59df77253 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/muldf3.S @@ -0,0 +1,231 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __muldf3 + .balign 4 +__muldf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __muldf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __muldf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + b abort +#define __muldf3 __muldf3_asm +#endif /* DEBUG */ + +__muldf3_support: /* This label makes debugger output saner. */ + .balign 4 + FUNC(__muldf3) +.Ldenorm_2: + breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output + norm.f r12,DBL1L + mov.mi r12,21 + add.pl r12,r12,22 + neg r11,r12 + asl_s r12,r12,20 + lsr.f DBL1H,DBL1L,r11 + ror DBL1L,DBL1L,r11 + sub_s DBL0H,DBL0H,r12 + mov.eq DBL1H,DBL1L + sub_l DBL1L,DBL1L,DBL1H + /* Fall through. */ + .global __muldf3 + .balign 4 +__muldf3: + mululw 0,DBL0L,DBL1L + machulw r4,DBL0L,DBL1L + ld.as r9,[pcl,0x67] ; ((.L7ff00000-.+2)/4)] + bmsk r6,DBL0H,19 + bset r6,r6,20 + mov r8,acc2 + mululw 0,r4,1 + and r11,DBL0H,r9 + breq.d r11,0,.Ldenorm_dbl0 + and r12,DBL1H,r9 + breq.d r12,0,.Ldenorm_dbl1 + maclw 0,r6,DBL1L + machulw 0,r6,DBL1L + breq.d r11,r9,.Linf_nan + bmsk r10,DBL1H,19 + breq.d r12,r9,.Linf_nan + bset r10,r10,20 + maclw 0,r10,DBL0L + machulw r5,r10,DBL0L + add_s r12,r12,r11 ; add exponents + mov r4,acc2 + mululw 0,r5,1 + maclw 0,r6,r10 + machulw r7,r6,r10 ; fraction product in r7:acc2:r4:r8 + tst r8,r8 + bclr r8,r9,30 ; 0x3ff00000 + bset.ne r4,r4,0 ; put least significant word into sticky bit + bclr r6,r9,20 ; 0x7fe00000 + lsr.f r10,r7,9 + rsub.eq r8,r8,r9 ; 0x40000000 + sub r12,r12,r8 ; subtract bias + implicit 1 + brhs.d r12,r6,.Linf_denorm + rsub r10,r10,12 +.Lshift_frac: + neg r8,r10 + asl r6,r4,r10 + lsr DBL0L,r4,r8 + add.f 0,r6,r6 + btst.eq DBL0L,0 + cmp.eq r4,r4 ; round to nearest / round to even + asl r4,acc2,r10 + lsr r5,acc2,r8 + adc.f DBL0L,DBL0L,r4 + xor.f 0,DBL0H,DBL1H + asl r7,r7,r10 + add_s r12,r12,r5 + adc DBL0H,r12,r7 + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + +/* N.B. This is optimized for ARC700. + ARC600 has very different scheduling / instruction selection criteria. */ + +/* If one number is denormal, subtract some from the exponent of the other + one (if the other exponent is too small, return 0), and normalize the + denormal. Then re-run the computation. */ +.Lret0_2: + lsr_s DBL0H,DBL0H,31 + asl_s DBL0H,DBL0H,31 + j_s.d [blink] + mov_s DBL0L,0 + .balign 4 +.Ldenorm_dbl0: + mov_s r12,DBL0L + mov_s DBL0L,DBL1L + mov_s DBL1L,r12 + mov_s r12,DBL0H + mov_s DBL0H,DBL1H + mov_s DBL1H,r12 + and r11,DBL0H,r9 +.Ldenorm_dbl1: + brhs r11,r9,.Linf_nan + brhs 0x3ca00001,r11,.Lret0 + sub_s DBL0H,DBL0H,DBL1H + bmsk.f DBL1H,DBL1H,30 + add_s DBL0H,DBL0H,DBL1H + beq.d .Ldenorm_2 + norm r12,DBL1H + sub_s r12,r12,10 + asl r5,r12,20 + asl_s DBL1H,DBL1H,r12 + sub DBL0H,DBL0H,r5 + neg r5,r12 + lsr r6,DBL1L,r5 + asl_s DBL1L,DBL1L,r12 + b.d __muldf3 + add_s DBL1H,DBL1H,r6 + +.Lret0: xor_s DBL0H,DBL0H,DBL1H + bclr DBL1H,DBL0H,31 + xor_s DBL0H,DBL0H,DBL1H + j_s.d [blink] + mov_s DBL0L,0 + + .balign 4 +.Linf_nan: + bclr r12,DBL1H,31 + xor_s DBL1H,DBL1H,DBL0H + bclr_s DBL0H,DBL0H,31 + max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf + or.f 0,DBL0H,DBL0L + mov_s DBL0L,0 + or.ne.f DBL1L,DBL1L,r12 + not_s DBL0H,DBL0L ; inf * 0 -> NaN + mov.ne DBL0H,r8 + tst_s DBL1H,DBL1H + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + +/* We have checked for infinity / NaN input before, and transformed + denormalized inputs into normalized inputs. Thus, the worst case + exponent overflows are: + 1 + 1 - 0x400 == 0xc02 : maximum underflow + 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow + N.B. 0x7e and 0x7f are also values for overflow. + + If (r12 <= -54), we have an underflow to zero. */ + .balign 4 +.Linf_denorm: + lsr r6,r12,28 + brlo.d r6,0xc,.Linf + asr r6,r12,20 + add.f r10,r10,r6 + brgt.d r10,0,.Lshift_frac + mov_s r12,0 + beq.d .Lround_frac + add r10,r10,32 +.Lshift32_frac: + tst r4,r4 + mov r4,acc2 + bset.ne r4,r4,1 + mululw 0,r7,1 + brge.d r10,1,.Lshift_frac + mov r7,0 + breq.d r10,0,.Lround_frac + add r10,r10,32 + brgt r10,21,.Lshift32_frac + b_s .Lret0 + +.Lround_frac: + add.f 0,r4,r4 + btst.eq acc2,0 + mov_s DBL0L,acc2 + mov_s DBL0H,r7 + adc.eq.f DBL0L,DBL0L,0 + j_s.d [blink] + adc.eq DBL0H,DBL0H,0 + +.Linf: mov_s DBL0L,0 + xor.f DBL1H,DBL1H,DBL0H + mov_s DBL0H,r9 + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + ENDFUNC(__muldf3) + + .balign 4 +.L7ff00000: + .long 0x7ff00000 diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/mulsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/mulsf3.S new file mode 100644 index 000000000..41681f542 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/mulsf3.S @@ -0,0 +1,176 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __mulsf3 + FUNC(__mulsf3) + .balign 4 +__mulsf3: + push_s blink + push_s r1 + bl.d __mulsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __mulsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + and r12,r0,r1 + bic.f 0,0x7f800000,r12 + bne 0f + bmsk.f 0,r0,22 + bmsk.ne.f r1,r1,22 + jne_s [blink] ; both NaN -> OK +0: bl abort + ENDFUNC(__mulsf3) +#define __mulsf3 __mulsf3_asm +#endif /* DEBUG */ + + .balign 4 + .global __mulsf3 + FUNC(__mulsf3) +__mulsf3: + ld.as r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)] + bmsk r4,r1,22 + bset r2,r0,23 + asl_s r2,r2,8 + bset r3,r4,23 + and r11,r0,r9 + breq.d r11,0,.Ldenorm_dbl0 + and r12,r1,r9 + breq.d r12,0,.Ldenorm_dbl1 + xor_s r0,r0,r1 + mululw 0,r2,r3 + machulw r6,r2,r3 + breq.d r11,r9,.Linf_nan_dbl0 + ld.as r4,[pcl,69]; [pcl,((.L7fffffff-.+2)/4)] + breq.d r12,r9,.Linf_nan_dbl1 +.Lpast_denorm: + asl.f 0,r6,8 + mov r7,acc2 + add.pl r6,r6,r6 + bclr.pl r6,r6,23 + add.pl.f r7,r7,r7 + add.cs r6,r6,1 + lsr.f 0,r6,1 + add_s r12,r12,r11 + adc.f 0,r7,r4 + add_s r12,r12, \ + -0x3f800000 + adc.f r8,r6,r12 + tst.pl r8,r9 + bic r0,r0,r4 + min r3,r8,r9 + jpnz.d [blink] + add.pnz r0,r0,r3 +; infinity or denormal number + add.ne.f r3,r3,r3 + asr_s r3,r3,23+1 + bset r6,r6,23 + bpnz.d .Linfinity + sub_s r3,r3,1 + neg_s r2,r3 + brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0 + lsr r2,r6,r2 + asl r9,r6,r3 + lsr.f 0,r2,1 + tst r7,r7 + add_s r0,r0,r2 + bset.ne r9,r9,0 + adc.f 0,r9,r4 + j_s.d [blink] + add.cs r0,r0,1 +.Linfinity: + j_s.d [blink] + add_s r0,r0,r9 + +.Lret_r0: j_s [blink] + + .balign 4 +.Ldenorm_dbl0: + bclr_s r2,r2,31 + norm.f r4,r2 + add_s r2,r2,r2 + asl r2,r2,r4 + breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1 + asl r4,r4,23 + mululw 0,r2,r3 + machulw r6,r2,r3 + sub.ne.f r12,r12,r4 + ld.as r4,[pcl,28]; [pcl,((.L7fffffff-.+2)/4)] + bhi.d .Lpast_denorm + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_s r0,r0,r1 + + .balign 4 +.Ldenorm_dbl0_inf_nan_dbl1: + bmsk.f 0,r0,30 + mov.eq r1,-1 +.Linf_nan_dbl1: + xor_s r1,r1,r0 +.Linf_nan_dbl0: + bclr_s r1,r1,31 + j_s.d [blink] + xor_s r0,r0,r1 + + .balign 4 +.Ldenorm_dbl1: + breq.d r11,r9,.Linf_nan_dbl0_2 + norm.f r3,r4 + sub_s r3,r3,7 + asl r4,r4,r3 + mululw 0,r2,r4 + machulw r6,r2,r4 + sub_s r3,r3,1 + asl_s r3,r3,23 + sub.ne.f r11,r11,r3 + ld.as r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)] + bhi.d .Lpast_denorm + bmsk r8,r0,30 + j_s.d [blink] + bic r0,r0,r8 + + .balign 4 +.Linf_nan_dbl0_2: + bclr_s r1,r1,31 + xor_s r0,r0,r1 + sub.eq r1,r1,1 ; inf/nan * 0 -> nan + bic.f 0,r9,r1 + j_s.d [blink] + or.eq r0,r0,r1 ; r1 nan -> result nan + + .balign 4 +.L7f800000: + .long 0x7f800000 +.L7fffffff: + .long 0x7fffffff + ENDFUNC(__mulsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S new file mode 100644 index 000000000..100c8bc6c --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S @@ -0,0 +1,410 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + to calculate a := b/x as b*y, with y := 1/x: + - x is in the range [1..2) + - calculate 15..18 bit inverse y0 using a table of approximating polynoms. + Precision is higher for polynoms used to evaluate input with larger + value. + - Do one newton-raphson iteration step to double the precision, + then multiply this with the divisor + -> more time to decide if dividend is subnormal + - the worst error propagation is on the side of the value range + with the least initial defect, thus giving us about 30 bits precision. + The truncation error for the either is less than 1 + x/2 ulp. + A 31 bit inverse can be simply calculated by using x with implicit 1 + and chaining the multiplies. For a 32 bit inverse, we multiply y0^2 + with the bare fraction part of x, then add in y0^2 for the implicit + 1 of x. + - If calculating a 31 bit inverse, the systematic error is less than + -1 ulp; likewise, for 32 bit, it is less than -2 ulp. + - If we calculate our seed with a 32 bit fraction, we can archive a + tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we + only need to take the step to calculate the 2nd stage rest and + rounding adjust 1/32th of the time. However, if we use a 20 bit + fraction for the seed, the negative error can exceed -2 ulp/128, (2) + thus for a simple add / tst check, we need to do the 2nd stage + rest calculation/ rounding adjust 1/16th of the time. + (1): The inexactness of the 32 bit inverse contributes an error in the + range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the + rest contributes an error < +1/x ulp/128 . In the interval [1,2), + x/2 + 1/x <= 1.5 . + (2): Unless proven otherwise. I have not actually looked for an + example where -2 ulp/128 is exceeded, and my calculations indicate + that the excess, if existent, is less than -1/512 ulp. + ??? The algorithm is still based on the ARC700 optimized code. + Maybe we could make better use of 64 bit multiply results and/or mmed . + */ +#include "../arc-ieee-754.h" + +/* N.B. fp-bit.c does double rounding on denormal numbers. */ +#if 0 /* DEBUG */ + .global __divdf3 + FUNC(__divdf3) + .balign 4 +__divdf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __divdf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __divdf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + and r12,DBL0H,DBL1H + bic.f 0,0x7ff80000,r12 ; both NaN -> OK + jeq_s [blink] + bl abort + ENDFUNC(__divdf3) +#define __divdf3 __divdf3_asm +#endif /* DEBUG */ + + FUNC(__divdf3) + .balign 4 +.L7ff00000: + .long 0x7ff00000 +.Ldivtab: + .long 0xfc0fffe1 + .long 0xf46ffdfb + .long 0xed1ffa54 + .long 0xe61ff515 + .long 0xdf7fee75 + .long 0xd91fe680 + .long 0xd2ffdd52 + .long 0xcd1fd30c + .long 0xc77fc7cd + .long 0xc21fbbb6 + .long 0xbcefaec0 + .long 0xb7efa100 + .long 0xb32f92bf + .long 0xae8f83b7 + .long 0xaa2f7467 + .long 0xa5ef6479 + .long 0xa1cf53fa + .long 0x9ddf433e + .long 0x9a0f3216 + .long 0x965f2091 + .long 0x92df0f11 + .long 0x8f6efd05 + .long 0x8c1eeacc + .long 0x88eed876 + .long 0x85dec615 + .long 0x82eeb3b9 + .long 0x800ea10b + .long 0x7d3e8e0f + .long 0x7a8e7b3f + .long 0x77ee6836 + .long 0x756e5576 + .long 0x72fe4293 + .long 0x709e2f93 + .long 0x6e4e1c7f + .long 0x6c0e095e + .long 0x69edf6c5 + .long 0x67cde3a5 + .long 0x65cdd125 + .long 0x63cdbe25 + .long 0x61ddab3f + .long 0x600d991f + .long 0x5e3d868c + .long 0x5c6d7384 + .long 0x5abd615f + .long 0x590d4ecd + .long 0x576d3c83 + .long 0x55dd2a89 + .long 0x545d18e9 + .long 0x52dd06e9 + .long 0x516cf54e + .long 0x4ffce356 + .long 0x4e9cd1ce + .long 0x4d3cbfec + .long 0x4becae86 + .long 0x4aac9da4 + .long 0x496c8c73 + .long 0x483c7bd3 + .long 0x470c6ae8 + .long 0x45dc59af + .long 0x44bc4915 + .long 0x43ac3924 + .long 0x428c27fb + .long 0x418c187a + .long 0x407c07bd + +__divdf3_support: /* This label makes debugger output saner. */ + .balign 4 +.Ldenorm_dbl1: + brge r6, \ + 0x43500000,.Linf_NaN ; large number / denorm -> Inf + bmsk.f r12,DBL1H,19 + mov.eq r12,DBL1L + mov.eq DBL1L,0 + sub.eq r7,r7,32 + norm.f r11,r12 ; flag for x/0 -> Inf check + beq_s .Linf_NaN + mov.mi r11,0 + add.pl r11,r11,1 + add_s r12,r12,r12 + asl r8,r12,r11 + rsub r12,r11,31 + lsr r12,DBL1L,r12 + tst_s DBL1H,DBL1H + or r8,r8,r12 + lsr r4,r8,26 + lsr DBL1H,r8,12 + ld.as r4,[r10,r4] + bxor.mi DBL1H,DBL1H,31 + sub r11,r11,11 + asl DBL1L,DBL1L,r11 + sub r11,r11,1 + mulu64 r4,r8 + sub r7,r7,r11 + b.d .Lpast_denorm_dbl1 + asl r7,r7,20 + + .balign 4 +.Ldenorm_dbl0: + bmsk.f r12,DBL0H,19 + ; wb stall + mov.eq r12,DBL0L + sub.eq r6,r6,32 + norm.f r11,r12 ; flag for 0/x -> 0 check + brge r7, \ + 0x43500000, .Lret0_2 ; denorm/large number -> 0 + beq_s .Lret0_2 + mov.mi r11,0 + add.pl r11,r11,1 + asl r12,r12,r11 + sub r6,r6,r11 + add.f 0,r6,31 + lsr r10,DBL0L,r6 + mov.mi r10,0 + add r6,r6,11+32 + neg.f r11,r6 + asl DBL0L,DBL0L,r11 + mov.pl DBL0L,0 + sub r6,r6,32-1 + b.d .Lpast_denorm_dbl0 + asl r6,r6,20 + +.Linf_NaN: + tst_s DBL0L,DBL0L ; 0/0 -> NaN + xor_s DBL1H,DBL1H,DBL0H + bclr.eq.f DBL0H,DBL0H,31 + bmsk DBL0H,DBL1H,30 + xor_s DBL0H,DBL0H,DBL1H + sub.eq DBL0H,DBL0H,1 + mov_s DBL0L,0 + j_s.d [blink] + or DBL0H,DBL0H,r9 + .balign 4 +.Lret0_2: + xor_s DBL1H,DBL1H,DBL0H + mov_s DBL0L,0 + bmsk DBL0H,DBL1H,30 + j_s.d [blink] + xor_s DBL0H,DBL0H,DBL1H + .balign 4 + .global __divdf3 +/* N.B. the spacing between divtab and the sub3 to get its address must + be a multiple of 8. */ +__divdf3: + asl r8,DBL1H,12 + lsr r4,r8,26 + sub3 r10,pcl,61; (.-.Ldivtab) >> 3 + ld.as r9,[pcl,-124]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000 + ld.as r4,[r10,r4] + lsr r12,DBL1L,20 + and.f r7,DBL1H,r9 + or r8,r8,r12 + mulu64 r4,r8 + beq.d .Ldenorm_dbl1 +.Lpast_denorm_dbl1: + and.f r6,DBL0H,r9 + breq.d r7,r9,.Linf_nan_dbl1 + asl r4,r4,12 + sub r4,r4,mhi + mulu64 r4,r4 + beq.d .Ldenorm_dbl0 + lsr r8,r8,1 + breq.d r6,r9,.Linf_nan_dbl0 + asl r12,DBL0H,11 + lsr r10,DBL0L,21 +.Lpast_denorm_dbl0: + bset r8,r8,31 + mulu64 mhi,r8 + add_s r12,r12,r10 + bset r5,r12,31 + cmp r5,r8 + cmp.eq DBL0L,DBL1L + lsr.cc r5,r5,1 + sub r4,r4,mhi ; u1.31 inverse, about 30 bit + mulu64 r5,r4 ; result fraction highpart + lsr r8,r8,2 ; u3.29 + add r5,r6, /* wait for immediate */ \ + 0x3fe00000 + mov r11,mhi ; result fraction highpart + mulu64 r11,r8 ; u-28.31 + asl_s DBL1L,DBL1L,9 ; u-29.23:9 + sbc r6,r5,r7 + mov r12,mlo ; u-28.31 + mulu64 r11,DBL1L ; mhi: u-28.23:9 + add.cs DBL0L,DBL0L,DBL0L + asl_s DBL0L,DBL0L,6 ; u-26.25:7 + asl r10,r11,23 + sub_l DBL0L,DBL0L,r12 + lsr r7,r11,9 + sub r5,DBL0L,mhi ; rest msw ; u-26.31:0 + mul64 r5,r4 ; mhi: result fraction lowpart + xor.f 0,DBL0H,DBL1H + and DBL0H,r6,r9 + add_s DBL0H,DBL0H,r7 + bclr r12,r9,20 ; 0x7fe00000 + brhs.d r6,r12,.Linf_denorm + bxor.mi DBL0H,DBL0H,31 + add.f r12,mhi,0x11 + asr r9,r12,5 + sub.mi DBL0H,DBL0H,1 + add.f DBL0L,r9,r10 + tst r12,0x1c + jne.d [blink] + add.cs DBL0H,DBL0H,1 + /* work out exact rounding if we fall through here. */ + /* We know that the exact result cannot be represented in double + precision. Find the mid-point between the two nearest + representable values, multiply with the divisor, and check if + the result is larger than the dividend. Since we want to know + only the sign bit, it is sufficient to calculate only the + highpart of the lower 64 bits. */ + mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo + sub.f DBL0L,DBL0L,1 + asl r12,r9,2 ; u-22.30:2 + sub.cs DBL0H,DBL0H,1 + sub.f r12,r12,2 + mov r10,mlo ; rest before considering r12 in r5 : -r10 + mulu64 r12,DBL1L ; mhi: u-51.32 + asl r5,r5,25 ; s-51.7:25 + lsr r10,r10,7 ; u-51.30:2 + mov r7,mhi ; u-51.32 + mulu64 r12,r8 ; mlo: u-51.31:1 + sub r5,r5,r10 + add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L + bset r7,r7,0 ; make sure that the result is not zero, and that + sub r5,r5,r7 ; a highpart zero appears negative + sub.f r5,r5,mlo ; rest msw + add.pl.f DBL0L,DBL0L,1 + j_s.d [blink] + add.eq DBL0H,DBL0H,1 + +.Linf_nan_dbl1: ; 0/Inf -> NaN Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN + or.f 0,r6,DBL0L + cmp.ne r6,r9 + not_s DBL0L,DBL1H + sub_s.ne DBL0L,DBL0L,DBL0L + tst_s DBL0H,DBL0H + add_s DBL0H,DBL1H,DBL0L + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 +.Linf_nan_dbl0: + tst_s DBL1H,DBL1H + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + .balign 4 +.Linf_denorm: + lsr r12,r6,28 + brlo.d r12,0xc,.Linf +.Ldenorm: + asr r6,r6,20 + neg r9,r6 + mov_s DBL0H,0 + brhs.d r9,54,.Lret0 + bxor.mi DBL0H,DBL0H,31 + add r12,mhi,1 + and r12,r12,-4 + rsub r7,r6,5 + asr r10,r12,28 + bmsk r4,r12,27 + min r7,r7,31 + asr DBL0L,r4,r7 + add DBL1H,r11,r10 + abs.f r10,r4 + sub.mi r10,r10,1 + add.f r7,r6,32-5 + asl r4,r4,r7 + mov.mi r4,r10 + add.f r10,r6,23 + rsub r7,r6,9 + lsr r7,DBL1H,r7 + asl r10,DBL1H,r10 + or.pnz DBL0H,DBL0H,r7 + or.mi r4,r4,r10 + mov.mi r10,r7 + add.f DBL0L,r10,DBL0L + add.cs.f DBL0H,DBL0H,1 ; carry clear after this point + bxor.f 0,r4,31 + add.pnz.f DBL0L,DBL0L,1 + add.cs.f DBL0H,DBL0H,1 + jne_s [blink] + /* Calculation so far was not conclusive; calculate further rest. */ + mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo + asr.f r12,r12,3 + asl r5,r5,25 ; s-51.7:25 + mov r11,mlo ; rest before considering r12 in r5 : -r11 + mulu64 r12,r8 ; u-51.31:1 + and r9,DBL0L,1 ; tie-breaker: round to even + lsr r11,r11,7 ; u-51.30:2 + mov DBL1H,mlo ; u-51.31:1 + mulu64 r12,DBL1L ; u-51.62:2 + sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L + add_s DBL1H,DBL1H,r11 + sub DBL1H,DBL1H,r5 ; -rest msw + add_s DBL1H,DBL1H,mhi ; -rest msw + add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-( + tst_s DBL1H,DBL1H + cmp.eq mlo,r9 + add.cs.f DBL0L,DBL0L,1 + j_s.d [blink] + add.cs DBL0H,DBL0H,1 + +.Lret0: + /* return +- 0 */ + j_s.d [blink] + mov_s DBL0L,0 +.Linf: + mov_s DBL0H,r9 + mov_s DBL0L,0 + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + ENDFUNC(__divdf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S new file mode 100644 index 000000000..ef54ffd7c --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S @@ -0,0 +1,274 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + - calculate 15..18 bit inverse using a table of approximating polynoms. + precision is higher for polynoms used to evaluate input with larger + value. + - do one newton-raphson iteration step to double the precision, + then multiply this with the divisor + -> more time to decide if dividend is subnormal + - the worst error propagation is on the side of the value range + with the least initial defect, thus giving us about 30 bits precision. + */ +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __divsf3 + FUNC(__divsf3) + .balign 4 +__divsf3: + push_s blink + push_s r1 + bl.d __divsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __divsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 +#if 1 + bne abort + jeq_s [blink] + b abort +#else + bne abort + j_s [blink] +#endif + ENDFUNC(__divsf3) +#define __divsf3 __divsf3_asm +#endif /* DEBUG */ + + FUNC(__divsf3) + .balign 4 +.Ldivtab: + .long 0xfc0ffff0 + .long 0xf46ffefd + .long 0xed1ffd2a + .long 0xe627fa8e + .long 0xdf7ff73b + .long 0xd917f33b + .long 0xd2f7eea3 + .long 0xcd1fe986 + .long 0xc77fe3e7 + .long 0xc21fdddb + .long 0xbcefd760 + .long 0xb7f7d08c + .long 0xb32fc960 + .long 0xae97c1ea + .long 0xaa27ba26 + .long 0xa5e7b22e + .long 0xa1cfa9fe + .long 0x9ddfa1a0 + .long 0x9a0f990c + .long 0x9667905d + .long 0x92df878a + .long 0x8f6f7e84 + .long 0x8c27757e + .long 0x88f76c54 + .long 0x85df630c + .long 0x82e759c5 + .long 0x8007506d + .long 0x7d3f470a + .long 0x7a8f3da2 + .long 0x77ef341e + .long 0x756f2abe + .long 0x72f7212d + .long 0x709717ad + .long 0x6e4f0e44 + .long 0x6c1704d6 + .long 0x69e6fb44 + .long 0x67cef1d7 + .long 0x65c6e872 + .long 0x63cedf18 + .long 0x61e6d5cd + .long 0x6006cc6d + .long 0x5e36c323 + .long 0x5c76b9f3 + .long 0x5abeb0b7 + .long 0x5916a79b + .long 0x57769e77 + .long 0x55de954d + .long 0x54568c4e + .long 0x52d6834d + .long 0x51667a7f + .long 0x4ffe71b5 + .long 0x4e9e68f1 + .long 0x4d466035 + .long 0x4bf65784 + .long 0x4aae4ede + .long 0x496e4646 + .long 0x48363dbd + .long 0x47063547 + .long 0x45de2ce5 + .long 0x44be2498 + .long 0x43a61c64 + .long 0x4296144a + .long 0x41860c0e + .long 0x407e03ee +.L7f800000: + .long 0x7f800000 + .balign 4 + .global __divsf3_support +__divsf3_support: +.Linf_NaN: + bclr.f 0,r0,31 ; 0/0 -> NaN + xor_s r0,r0,r1 + bmsk r1,r0,30 + bic_s r0,r0,r1 + sub.eq r0,r0,1 + j_s.d [blink] + or r0,r0,r9 +.Lret0: + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_s r0,r0,r1 +/* N.B. the spacing between divtab and the sub3 to get its address must + be a multiple of 8. */ +__divsf3: + lsr r2,r1,17 + sub3 r3,pcl,37 ; (.-.Ldivtab) >> 3 + bmsk_s r2,r2,5 + ld.as r5,[r3,r2] + asl r4,r1,9 + ld.as r9,[pcl,-13]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000 + mulu64 r5,r4 + and.f r11,r1,r9 + asl r6,r1,8 + bset r6,r6,31 + beq.d .Ldenorm_fp1 + asl r5,r5,13 + breq.d r11,r9,.Linf_nan_fp1 + and.f r2,r0,r9 + sub r7,r5,mhi + mulu64 r7,r6 + beq.d .Ldenorm_fp0 + asl r12,r0,8 + breq.d r2,r9,.Linf_nan_fp0 + mulu64 mhi,r7 +.Lpast_denorm_fp1: + bset r3,r12,31 +.Lpast_denorm_fp0: + cmp_s r3,r6 + lsr.cc r3,r3,1 + add_s r2,r2, /* wait for immediate */ \ + 0x3f000000 + sub r7,r7,mhi ; u1.31 inverse, about 30 bit + mulu64 r3,r7 + sbc r2,r2,r11 + xor.f 0,r0,r1 + and r0,r2,r9 + bclr r3,r9,23 ; 0x7f000000 + brhs.d r2,r3,.Linf_denorm + bxor.mi r0,r0,31 +.Lpast_denorm: + add r3,mhi,0x22 ; round to nearest or higher + tst r3,0x3c ; check if rounding was unsafe + lsr r3,r3,6 + jne.d [blink] ; return if rounding was safe. + add_s r0,r0,r3 + /* work out exact rounding if we fall through here. */ + /* We know that the exact result cannot be represented in single + precision. Find the mid-point between the two nearest + representable values, multiply with the divisor, and check if + the result is larger than the dividend. */ + add_s r3,r3,r3 + sub_s r3,r3,1 + mulu64 r3,r6 + asr.f 0,r0,1 ; for round-to-even in case this is a denorm + rsub r2,r9,25 + asl_s r12,r12,r2 + sub.f 0,r12,mlo + j_s.d [blink] + sub.mi r0,r0,1 +.Linf_nan_fp1: + lsr_s r0,r0,31 + bmsk.f 0,r1,22 + asl_s r0,r0,31 + bne_s 0f ; inf/inf -> nan + brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan +0: j_s.d [blink] + mov r0,-1 +.Lsigned0: +.Linf_nan_fp0: + tst_s r1,r1 + j_s.d [blink] + bxor.mi r0,r0,31 + .balign 4 + .global __divsf3 +/* For denormal results, it is possible that an exact result needs + rounding, and thus the round-to-even rule has to come into play. */ +.Linf_denorm: + brlo r2,0xc0000000,.Linf +.Ldenorm: + asr_s r2,r2,23 + bic r0,r0,r9 + neg r9,r2 + brlo.d r9,25,.Lpast_denorm + lsr r3,mlo,r9 + /* Fall through: return +- 0 */ + j_s [blink] +.Linf: + j_s.d [blink] + or r0,r0,r9 + .balign 4 +.Ldenorm_fp1: + bclr r6,r6,31 + norm.f r12,r6 ; flag for x/0 -> Inf check + add r6,r6,r6 + rsub r5,r12,16 + ror r5,r1,r5 + asl r6,r6,r12 + bmsk r5,r5,5 + ld.as r5,[r3,r5] + add r4,r6,r6 + ; load latency + mulu64 r5,r4 + bic.ne.f 0, \ + 0x60000000,r0 ; large number / denorm -> Inf + asl r5,r5,13 + sub r7,r5,mhi + beq.d .Linf_NaN + mulu64 r7,r6 + asl_s r12,r12,23 + and.f r2,r0,r9 + add_s r2,r2,r12 + asl r12,r0,8 + bne.d .Lpast_denorm_fp1 +.Ldenorm_fp0: mulu64 mhi,r7 + bclr r12,r12,31 + norm.f r3,r12 ; flag for 0/x -> 0 check + bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 + beq_s .Lret0 + asl_s r12,r12,r3 + asl_s r3,r3,23 + add_s r12,r12,r12 + add r11,r11,r3 + b.d .Lpast_denorm_fp0 + mov_s r3,r12 + ENDFUNC(__divsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S new file mode 100644 index 000000000..132b0093f --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S @@ -0,0 +1,234 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __muldf3 + .balign 4 +__muldf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __muldf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __muldf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + and r12,DBL0H,DBL1H + bic.f 0,0x7ff80000,r12 ; both NaN -> OK + jeq_s [blink] + b abort +#define __muldf3 __muldf3_asm +#endif /* DEBUG */ + +__muldf3_support: /* This label makes debugger output saner. */ + .balign 4 + FUNC(__muldf3) +.Ldenorm_2: + breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output + norm.f r12,DBL1L + mov.mi r12,21 + add.pl r12,r12,22 + neg r11,r12 + asl_s r12,r12,20 + lsr.f DBL1H,DBL1L,r11 + ror DBL1L,DBL1L,r11 + sub_s DBL0H,DBL0H,r12 + mov.eq DBL1H,DBL1L + sub_l DBL1L,DBL1L,DBL1H + /* Fall through. */ + .global __muldf3 + .balign 4 +__muldf3: + mulu64 DBL0L,DBL1L + ld.as r9,[pcl,0x68] ; ((.L7ff00000-.+2)/4)] + bmsk r6,DBL0H,19 + bset r6,r6,20 + and r11,DBL0H,r9 + breq.d r11,0,.Ldenorm_dbl0 + and r12,DBL1H,r9 + breq.d r12,0,.Ldenorm_dbl1 + mov r8,mlo + mov r4,mhi + mulu64 r6,DBL1L + breq.d r11,r9,.Linf_nan + bmsk r10,DBL1H,19 + breq.d r12,r9,.Linf_nan + bset r10,r10,20 + add.f r4,r4,mlo + adc r5,mhi,0 + mulu64 r10,DBL0L + add_s r12,r12,r11 ; add exponents + add.f r4,r4,mlo + adc r5,r5,mhi + mulu64 r6,r10 + tst r8,r8 + bclr r8,r9,30 ; 0x3ff00000 + bset.ne r4,r4,0 ; put least significant word into sticky bit + bclr r6,r9,20 ; 0x7fe00000 + add.f r5,r5,mlo + adc r7,mhi,0 ; fraction product in r7:r5:r4 + lsr.f r10,r7,9 + rsub.eq r8,r8,r9 ; 0x40000000 + sub r12,r12,r8 ; subtract bias + implicit 1 + brhs.d r12,r6,.Linf_denorm + rsub r10,r10,12 +.Lshift_frac: + neg r8,r10 + asl r6,r4,r10 + lsr DBL0L,r4,r8 + add.f 0,r6,r6 + btst.eq DBL0L,0 + cmp.eq r4,r4 ; round to nearest / round to even + asl r4,r5,r10 + lsr r5,r5,r8 + adc.f DBL0L,DBL0L,r4 + xor.f 0,DBL0H,DBL1H + asl r7,r7,r10 + add_s r12,r12,r5 + adc DBL0H,r12,r7 + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + +/* N.B. This is optimized for ARC700. + ARC600 has very different scheduling / instruction selection criteria. */ + +/* If one number is denormal, subtract some from the exponent of the other + one (if the other exponent is too small, return 0), and normalize the + denormal. Then re-run the computation. */ +.Lret0_2: + lsr_s DBL0H,DBL0H,31 + asl_s DBL0H,DBL0H,31 + j_s.d [blink] + mov_s DBL0L,0 + .balign 4 +.Ldenorm_dbl0: + mov_s r12,DBL0L + mov_s DBL0L,DBL1L + mov_s DBL1L,r12 + mov_s r12,DBL0H + mov_s DBL0H,DBL1H + mov_s DBL1H,r12 + and r11,DBL0H,r9 +.Ldenorm_dbl1: + brhs r11,r9,.Linf_nan + brhs 0x3ca00001,r11,.Lret0 + sub_s DBL0H,DBL0H,DBL1H + bmsk.f DBL1H,DBL1H,30 + add_s DBL0H,DBL0H,DBL1H + beq.d .Ldenorm_2 + norm r12,DBL1H + sub_s r12,r12,10 + asl r5,r12,20 + asl_s DBL1H,DBL1H,r12 + sub DBL0H,DBL0H,r5 + neg r5,r12 + lsr r6,DBL1L,r5 + asl_s DBL1L,DBL1L,r12 + b.d __muldf3 + add_s DBL1H,DBL1H,r6 + +.Lret0: xor_s DBL0H,DBL0H,DBL1H + bclr DBL1H,DBL0H,31 + xor_s DBL0H,DBL0H,DBL1H + j_s.d [blink] + mov_s DBL0L,0 + + .balign 4 +.Linf_nan: + bclr r12,DBL1H,31 + xor_s DBL1H,DBL1H,DBL0H + bclr_s DBL0H,DBL0H,31 + max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf + or.f 0,DBL0H,DBL0L + mov_s DBL0L,0 + or.ne.f DBL1L,DBL1L,r12 + not_s DBL0H,DBL0L ; inf * 0 -> NaN + mov.ne DBL0H,r8 + tst_s DBL1H,DBL1H + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + +/* We have checked for infinity / NaN input before, and transformed + denormalized inputs into normalized inputs. Thus, the worst case + exponent overflows are: + 1 + 1 - 0x400 == 0xc02 : maximum underflow + 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow + N.B. 0x7e and 0x7f are also values for overflow. + + If (r12 <= -54), we have an underflow to zero. */ + .balign 4 +.Linf_denorm: + lsr r6,r12,28 + brlo.d r6,0xc,.Linf + asr r6,r12,20 + add.f r10,r10,r6 + brgt.d r10,0,.Lshift_frac + mov_s r12,0 + beq.d .Lround_frac + add r10,r10,32 +.Lshift32_frac: + tst r4,r4 + mov r4,r5 + bset.ne r4,r4,1 + mov r5,r7 + brge.d r10,1,.Lshift_frac + mov r7,0 + breq.d r10,0,.Lround_frac + add r10,r10,32 + brgt r10,21,.Lshift32_frac + b_s .Lret0 + +.Lround_frac: + add.f 0,r4,r4 + btst.eq r5,0 + mov_s DBL0L,r5 + mov_s DBL0H,r7 + adc.eq.f DBL0L,DBL0L,0 + j_s.d [blink] + adc.eq DBL0H,DBL0H,0 + +.Linf: mov_s DBL0L,0 + xor.f DBL1H,DBL1H,DBL0H + mov_s DBL0H,r9 + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + ENDFUNC(__muldf3) + + .balign 4 +.L7ff00000: + .long 0x7ff00000 diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S new file mode 100644 index 000000000..ac346de25 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S @@ -0,0 +1,180 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __mulsf3 + FUNC(__mulsf3) + .balign 4 +__mulsf3: + push_s blink + push_s r1 + bl.d __mulsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __mulsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + and r12,r0,r1 + bic.f 0,0x7f800000,r12 + bne 0f + bmsk.f 0,r0,22 + bmsk.ne.f r1,r1,22 + jne_s [blink] ; both NaN -> OK +0: bl abort + ENDFUNC(__mulsf3) +#define __mulsf3 __mulsf3_asm +#endif /* DEBUG */ + + .balign 4 + .global __mulsf3 + FUNC(__mulsf3) +__mulsf3: + ld.as r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)] + bmsk r4,r1,22 + bset r2,r0,23 + asl_s r2,r2,8 + bset r3,r4,23 + mulu64 r2,r3 + and r11,r0,r9 + breq.d r11,0,.Ldenorm_dbl0 + and r12,r1,r9 + breq.d r12,0,.Ldenorm_dbl1 + xor_s r0,r0,r1 + breq.d r11,r9,.Linf_nan_dbl0 + ld.as r4,[pcl,70]; [pcl,((.L7fffffff-.+2)/4)] + breq.d r12,r9,.Linf_nan_dbl1 +.Lpast_denorm: + asl.f 0,mhi,8 + mov r6,mhi + mov r7,mlo + add.pl r6,r6,r6 + bclr.pl r6,r6,23 + add.pl.f r7,r7,r7 + add.cs r6,r6,1 + lsr.f 0,r6,1 + add_s r12,r12,r11 + adc.f 0,r7,r4 + add_s r12,r12, \ + -0x3f800000 + adc.f r8,r6,r12 + tst.pl r8,r9 + bic r0,r0,r4 + min r3,r8,r9 + jpnz.d [blink] + add.pnz r0,r0,r3 +; infinity or denormal number + add.ne.f r3,r3,r3 + asr_s r3,r3,23+1 + bset r6,r6,23 + bpnz.d .Linfinity + sub_s r3,r3,1 + neg_s r2,r3 + brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0 + lsr r2,r6,r2 + asl r9,r6,r3 + lsr.f 0,r2,1 + tst r7,r7 + add_s r0,r0,r2 + bset.ne r9,r9,0 + adc.f 0,r9,r4 + j_s.d [blink] + add.cs r0,r0,1 +.Linfinity: + j_s.d [blink] + add_s r0,r0,r9 + +.Lret_r0: j_s [blink] + + .balign 4 +.Ldenorm_dbl0: + bclr_s r2,r2,31 + norm.f r4,r2 + add_s r2,r2,r2 + asl r2,r2,r4 + mulu64 r2,r3 + breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1 + asl r4,r4,23 + sub.ne.f r12,r12,r4 + ld.as r4,[pcl,29]; [pcl,((.L7fffffff-.+2)/4)] + bhi.d .Lpast_denorm + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_s r0,r0,r1 + + .balign 4 +.Ldenorm_dbl0_inf_nan_dbl1: + bmsk.f 0,r0,30 + beq_s .Lretnan + xor_s r0,r0,r1 +.Linf_nan_dbl1: + xor_s r1,r1,r0 +.Linf_nan_dbl0: + bclr_s r1,r1,31 + cmp_s r1,r9 + jls.d [blink] + xor_s r0,r0,r1 +; r1 NaN -> result NaN +.Lretnan: + j_s.d [blink] + mov r0,-1 + + .balign 4 +.Ldenorm_dbl1: + breq.d r11,r9,.Linf_nan_dbl0_2 + norm.f r3,r4 + sub_s r3,r3,7 + asl r4,r4,r3 + mulu64 r2,r4 + sub_s r3,r3,1 + asl_s r3,r3,23 + sub.ne.f r11,r11,r3 + ld.as r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)] + bhi.d .Lpast_denorm + bmsk r8,r0,30 + j_s.d [blink] + bic r0,r0,r8 + + .balign 4 +.Linf_nan_dbl0_2: + bclr_s r1,r1,31 + xor_s r0,r0,r1 + sub.eq r1,r1,1 ; inf/nan * 0 -> nan + bic.f 0,r9,r1 + j_s.d [blink] + or.eq r0,r0,r1 ; r1 nan -> result nan + + .balign 4 +.L7f800000: + .long 0x7f800000 +.L7fffffff: + .long 0x7fffffff + ENDFUNC(__mulsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600/divsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600/divsf3.S new file mode 100644 index 000000000..d8ea88183 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600/divsf3.S @@ -0,0 +1,227 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __divsf3 + FUNC(__divsf3) + .balign 4 +__divsf3: + push_s blink + push_s r1 + bl.d __divsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __divsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + and r12,r0,r1 + bic.f 0,0x7f800000,r12 ; both NaN -> OK + jeq_s [blink] + bl abort + ENDFUNC(__divsf3) +#define __divsf3 __divsf3_asm +#endif /* DEBUG */ + + .balign 4 +__divdf3_support: /* This label makes debugger output saner. */ + FUNC(__divsf3) +.Ldenorm_fp0: + norm.f r12,r2 ; flag for 0/x -> 0 check + bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 + beq_s .Lret0_NaN + tst r1,r9 + add_s r2,r2,r2 + sub_s r12,r12,8 + asl_s r2,r2,r12 + asl_l r12,r12,23 + bne.d .Lpast_denorm_fp0 + add r5,r5,r12 +/* r0 is subnormal, r1 is subnormal or 0. */ + + .balign 4 +.Ldenorm_fp1: + norm.f r12,r3 ; flag for x/0 -> Inf check + bic.ne.f 0,0x60000000,r0 ; large number/denorm -> Inf + beq_s .Linf + add_s r3,r3,r3 + sub_s r12,r12,8 + asl_s r3,r3,r12 + asl_s r12,r12,23 + b.d .Lpast_denorm_fp1 + add r4,r4,r12 + +.Lret0_NaN: + bclr.f 0,r1,31 ; 0/0 -> NaN + bic r0,r10,r9 + j_s.d [blink] + sub.eq r0,r0,1 + + .balign 4 +.Linf_nan_fp0: + bic.f 0,r9,r1 ; fp1 Inf -> result NaN + bic r1,r5,r9 ; fp1 sign + sub.eq r1,r1,1 + j_s.d [blink] + xor_s r0,r0,r1 +.Linf_nan_fp1: + bic r0,r4,r9 ; fp0 sign + bmsk.f 0,r1,22 ; x/inf -> 0, x/nan -> nan + xor.eq r1,r1,r9 + j_s.d [blink] + xor_s r0,r0,r1 + + .global __divsf3 + .balign 4 + .long 0x7f800000 ; exponent mask +__divsf3: + ld r9,[pcl,-4] + bmsk r2,r0,22 + xor r4,r0,r2 + bmsk r3,r1,22 + xor r5,r1,r3 + and r11,r0,r9 + breq.d r11,0,.Ldenorm_fp0 + xor r10,r4,r5 + breq r11,r9,.Linf_nan_fp0 + bset_s r2,r2,23 + and r11,r1,r9 + breq r11,0,.Ldenorm_fp1 + breq r11,r9,.Linf_nan_fp1 +.Lpast_denorm_fp0: + bset_s r3,r3,23 +.Lpast_denorm_fp1: + cmp r2,r3 + asl_s r2,r2,6+1 + asl_s r3,r3,7 + add.lo r2,r2,r2 + bclr r8,r9,30 ; exponent bias + bclr.lo r8,r8,23 ; reduce exp by one if fraction is shifted + sub r4,r4,r5 + add r4,r4,r8 + xor.f 0,r10,r4 + bmi .Linf_denorm + and.f r12,r4,r9 + beq .Ldenorm + sub_s r2,r2,r3 ; discard implicit 1 + rsub r3,r3,1 ; prime r3 for two-insn divide-step use +.Ldiv_23bit: + .rep 6 + add1.f r2,r3,r2 + sub.cc r2,r2,r3 + .endr + breq r12,r9,.Linf + bmsk r0,r2,6 + xor_s r2,r2,r0 +.Ldiv_17bit: + .rep 7 + add1.f r2,r3,r2 + sub.cc r2,r2,r3 + .endr + asl_s r0,r0,7 + bmsk r1,r2,6 + xor_s r2,r2,r1 + or_s r0,r0,r1 +.Ldiv_10bit: + .rep 7 + add1.f r2,r3,r2 + sub.cc r2,r2,r3 + .endr + asl_s r0,r0,7 + bmsk r1,r2,6 + xor_s r2,r2,r1 + or_s r0,r0,r1 +.Ldiv_3bit: + .rep 3 + add1.f r2,r3,r2 + sub.cc r2,r2,r3 + .endr + asl_s r0,r0,3 +.Ldiv_0bit: + add1.f r1,r3,r2 + sub.cc r1,r1,r3 + bmsk_s r2,r2,2 + tst r1,-0x7e ; 0xffffff82, test for rest or odd + bmsk_s r1,r1,0 + add_s r0,r0,r2 ; assemble fraction + add_s r0,r0,r4 ; add in sign & exponent + j_s.d [blink] + add.ne r0,r0,r1 ; round to nearest / even + + .balign 4 +.Linf: + j_s.d [blink] + or r0,r10,r9 + +.Lret_r4: + j_s.d [blink] + mov_s r0,r4 + .balign 4 +.Linf_denorm: + add.f r12,r4,r4 + asr_l r12,r12,24 + bpl .Linf + max r12,r12,-24 +.Ldenorm: + rsub r3,r3,1 + add r1,pcl,68; .Ldenorm_tab-. + ldw.as r12,[r1,r12] + mov_s r0,0 + lsr_s r2,r2 + sub_s r1,r1,r12 + j_s.d [r1] + bic r4,r10,r9 + .short .Ldenorm_tab-.Lret_r4 + .short .Ldenorm_tab-.Ldiv_0bit + .short .Ldenorm_tab-.Ldiv_3bit-2*8 + .short .Ldenorm_tab-.Ldiv_3bit-1*8 + .short .Ldenorm_tab-.Ldiv_3bit + .short .Ldenorm_tab-.Ldiv_10bit-6*8 + .short .Ldenorm_tab-.Ldiv_10bit-5*8 + .short .Ldenorm_tab-.Ldiv_10bit-3*8 + .short .Ldenorm_tab-.Ldiv_10bit-3*8 + .short .Ldenorm_tab-.Ldiv_10bit-2*8 + .short .Ldenorm_tab-.Ldiv_10bit-1*8 + .short .Ldenorm_tab-.Ldiv_10bit + .short .Ldenorm_tab-.Ldiv_17bit-6*8 + .short .Ldenorm_tab-.Ldiv_17bit-5*8 + .short .Ldenorm_tab-.Ldiv_17bit-4*8 + .short .Ldenorm_tab-.Ldiv_17bit-3*8 + .short .Ldenorm_tab-.Ldiv_17bit-2*8 + .short .Ldenorm_tab-.Ldiv_17bit-1*8 + .short .Ldenorm_tab-.Ldiv_17bit + .short .Ldenorm_tab-.Ldiv_23bit-5*8 + .short .Ldenorm_tab-.Ldiv_23bit-4*8 + .short .Ldenorm_tab-.Ldiv_23bit-3*8 + .short .Ldenorm_tab-.Ldiv_23bit-2*8 + .short .Ldenorm_tab-.Ldiv_23bit-1*8 +.Ldenorm_tab: + .short .Ldenorm_tab-.Ldiv_23bit + ENDFUNC(__divsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600/mulsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600/mulsf3.S new file mode 100644 index 000000000..4a0736db0 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600/mulsf3.S @@ -0,0 +1,179 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __mulsf3 + FUNC(__mulsf3) + .balign 4 +__mulsf3: + push_s blink + push_s r1 + bl.d __mulsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __mulsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + and r12,r0,r1 + bic.f 0,0x7f800000,r12 + bne 0f + bmsk.f 0,r0,22 + bmsk.ne.f r1,r1,22 + jne_s [blink] ; both NaN -> OK +0: bl abort + ENDFUNC(__mulsf3) +#define __mulsf3 __mulsf3_asm +#endif /* DEBUG */ + + .balign 4 + .global __mulsf3 + FUNC(__mulsf3) +__mulsf3: + ld.as r9,[pcl,76]; [pcl,((.L7f800000-.+2)/4)] + bmsk r4,r1,22 + bset r3,r4,23 + bmsk r2,r0,22 + and r11,r0,r9 + breq.d r11,0,.Ldenorm_dbl0 + and r12,r1,r9 + xor_s r0,r0,r1 + breq.d r11,r9,.Linf_nan_dbl0 + bset_s r2,r2,23 + breq r12,0,.Ldenorm_dbl1 + breq r12,r9,.Linf_nan_dbl1 +.Lpast_denorm: + mov r6,0 + lsr.f r7,r2 +; We could so this a bit faster here with a 32 bit shift register and +; inserting the r2 factor / retrieving the low result a byte at a time, +; but that'd increase code size. + mov lp_count,24 + .balign 4 + lp 0f + add.cs r6,r6,r3 + lsr.f r6,r6 + rrc.f r7,r7 +0: + ld.as r4,[pcl,59]; [pcl,((.L7fffffff-.+2)/4)] + asl.f 0,r6,8 + add.pl r6,r6,r6 + bclr.pl r6,r6,23 + add.pl.f r7,r7,r7 + add.cs r6,r6,1 + lsr.f 0,r6,1 + add_s r12,r12,r11 + adc.f 0,r7,r4 + add_s r12,r12, \ + -0x3f800000 + adc.f r8,r6,r12 + tst.pl r8,r9 + bic r0,r0,r4 + min r3,r8,r9 + jpnz.d [blink] + add.pnz r0,r0,r3 +; infinity or denormal number + add.ne.f r3,r3,r3 + asr_s r3,r3,23+1 + bset r6,r6,23 + bpnz.d .Linfinity + sub_s r3,r3,1 + neg_s r2,r3 + brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0 + lsr r2,r6,r2 + asl r9,r6,r3 + lsr.f 0,r2,1 + tst r7,r7 + add_s r0,r0,r2 + bset.ne r9,r9,0 + adc.f 0,r9,r4 + j_s.d [blink] + add.cs r0,r0,1 +.Linfinity: + j_s.d [blink] + add_s r0,r0,r9 + +.Lret_r0: j_s [blink] + + .balign 4 +.Ldenorm_dbl0: + asl_s r2,r2,8 + norm.f r4,r2 + lsr_s r2,r2,7 + asl r2,r2,r4 + breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1 + asl r4,r4,23 + sub.ne.f r12,r12,r4 + bhi.d .Lpast_denorm + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_s r0,r0,r1 + + .balign 4 +.Ldenorm_dbl0_inf_nan_dbl1: + bmsk.f 0,r0,30 + beq_s .Lretnan + xor_s r0,r0,r1 +.Linf_nan_dbl1: + xor_s r1,r1,r0 + bclr_s r1,r1,31 + j_s.d [blink] + xor_s r0,r0,r1 +.Linf_nan_dbl0: + sub_s r2,r1,1 ; inf/nan * 0 -> nan; inf * nan -> nan (use |r2| >= inf) + bic.f 0,r9,r2 + xor_s r0,r0,r1 + bclr_s r1,r1,31 + xor_s r0,r0,r1 + jne_s [blink] +.Lretnan: + j_s.d [blink] + mov r0,-1 + .balign 4 +.Ldenorm_dbl1: + norm.f r3,r4 + sub_s r3,r3,7 + asl r4,r4,r3 + sub_s r3,r3,1 + asl_s r3,r3,23 + sub.ne.f r11,r11,r3 + bhi.d .Lpast_denorm + mov_s r3,r4 + bmsk r3,r0,30 + j_s.d [blink] + bic_s r0,r0,r3 + + .balign 4 +.L7f800000: + .long 0x7f800000 +.L7fffffff: + .long 0x7fffffff + ENDFUNC(__mulsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/divdf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/divdf3.S new file mode 100644 index 000000000..dd74ba67c --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/divdf3.S @@ -0,0 +1,416 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + to calculate a := b/x as b*y, with y := 1/x: + - x is in the range [1..2) + - calculate 15..18 bit inverse y0 using a table of approximating polynoms. + Precision is higher for polynoms used to evaluate input with larger + value. + - Do one newton-raphson iteration step to double the precision, + then multiply this with the divisor + -> more time to decide if dividend is subnormal + - the worst error propagation is on the side of the value range + with the least initial defect, thus giving us about 30 bits precision. + The truncation error for the either is less than 1 + x/2 ulp. + A 31 bit inverse can be simply calculated by using x with implicit 1 + and chaining the multiplies. For a 32 bit inverse, we multiply y0^2 + with the bare fraction part of x, then add in y0^2 for the implicit + 1 of x. + - If calculating a 31 bit inverse, the systematic error is less than + -1 ulp; likewise, for 32 bit, it is less than -2 ulp. + - If we calculate our seed with a 32 bit fraction, we can archive a + tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we + only need to take the step to calculate the 2nd stage rest and + rounding adjust 1/32th of the time. However, if we use a 20 bit + fraction for the seed, the negative error can exceed -2 ulp/128, (2) + thus for a simple add / tst check, we need to do the 2nd stage + rest calculation/ rounding adjust 1/16th of the time. + (1): The inexactness of the 32 bit inverse contributes an error in the + range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the + rest contributes an error < +1/x ulp/128 . In the interval [1,2), + x/2 + 1/x <= 1.5 . + (2): Unless proven otherwise. I have not actually looked for an + example where -2 ulp/128 is exceeded, and my calculations indicate + that the excess, if existent, is less than -1/512 ulp. + */ +#include "arc-ieee-754.h" + +/* N.B. fp-bit.c does double rounding on denormal numbers. */ +#if 0 /* DEBUG */ + .global __divdf3 + FUNC(__divdf3) + .balign 4 +__divdf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __divdf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __divdf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + and r12,DBL0H,DBL1H + bic.f 0,0x7ff80000,r12 ; both NaN -> OK + jeq_s [blink] + bl abort + ENDFUNC(__divdf3) +#define __divdf3 __divdf3_asm +#endif /* DEBUG */ + + FUNC(__divdf3) +__divdf3_support: /* This label makes debugger output saner. */ + .balign 4 +.Ldenorm_dbl1: + brge r6, \ + 0x43500000,.Linf_NaN ; large number / denorm -> Inf + bmsk.f r12,DBL1H,19 + mov.eq r12,DBL1L + mov.eq DBL1L,0 + sub.eq r7,r7,32 + norm.f r11,r12 ; flag for x/0 -> Inf check + beq_s .Linf_NaN + mov.mi r11,0 + add.pl r11,r11,1 + add_s r12,r12,r12 + asl r8,r12,r11 + rsub r12,r11,31 + lsr r12,DBL1L,r12 + tst_s DBL1H,DBL1H + or r8,r8,r12 + lsr r4,r8,26 + lsr DBL1H,r8,12 + ld.as r4,[r10,r4] + bxor.mi DBL1H,DBL1H,31 + sub r11,r11,11 + asl DBL1L,DBL1L,r11 + sub r11,r11,1 + mpyhu r5,r4,r8 + sub r7,r7,r11 + asl r4,r4,12 + b.d .Lpast_denorm_dbl1 + asl r7,r7,20 + ; wb stall + + .balign 4 +.Ldenorm_dbl0: + bmsk.f r12,DBL0H,19 + ; wb stall + mov.eq r12,DBL0L + sub.eq r6,r6,32 + norm.f r11,r12 ; flag for 0/x -> 0 check + brge r7, \ + 0x43500000, .Lret0_NaN ; denorm/large number -> 0 + beq_s .Lret0_NaN + mov.mi r11,0 + add.pl r11,r11,1 + asl r12,r12,r11 + sub r6,r6,r11 + add.f 0,r6,31 + lsr r10,DBL0L,r6 + mov.mi r10,0 + add r6,r6,11+32 + neg.f r11,r6 + asl DBL0L,DBL0L,r11 + mov.pl DBL0L,0 + sub r6,r6,32-1 + b.d .Lpast_denorm_dbl0 + asl r6,r6,20 + +.Linf_NaN: + tst_s DBL0L,DBL0L ; 0/0 -> NaN + xor_s DBL1H,DBL1H,DBL0H + bclr.eq.f DBL0H,DBL0H,31 + bmsk DBL0H,DBL1H,30 + xor_s DBL0H,DBL0H,DBL1H + sub.eq DBL0H,DBL0H,1 + mov_s DBL0L,0 + j_s.d [blink] + or DBL0H,DBL0H,r9 + .balign 4 +.Lret0_NaN: + xor_s DBL1H,DBL1H,DBL0H + cmp_s r12,r9 + mov_s DBL0L,0 + bmsk DBL0H,DBL1H,30 + xor_s DBL0H,DBL0H,DBL1H + j_s.d [blink] + sub.hi DBL0H,DBL0H,1 +.Linf_nan_dbl1: ; Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN + not_s DBL0L,DBL1H + cmp r6,r9 + sub_s.ne DBL0L,DBL0L,DBL0L + tst_s DBL0H,DBL0H + add_s DBL0H,DBL1H,DBL0L + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 +.Linf_nan_dbl0: + tst_s DBL1H,DBL1H + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + .balign 4 + .global __divdf3 +/* N.B. the spacing between divtab and the add3 to get its address must + be a multiple of 8. */ +__divdf3: + asl r8,DBL1H,12 + lsr r12,DBL1L,20 + lsr r4,r8,26 + add3 r10,pcl,59 ; (.Ldivtab-.) >> 3 + ld.as r4,[r10,r4] + ld.as r9,[pcl,180]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000 + or r8,r8,r12 + mpyhu r5,r4,r8 + and.f r7,DBL1H,r9 + asl r4,r4,12 ; having the asl here is a concession to the XMAC pipeline. + beq.d .Ldenorm_dbl1 + and r6,DBL0H,r9 +.Lpast_denorm_dbl1: ; wb stall + sub r4,r4,r5 + mpyhu r5,r4,r4 + breq.d r6,0,.Ldenorm_dbl0 + lsr r8,r8,1 + asl r12,DBL0H,11 + lsr r10,DBL0L,21 +.Lpast_denorm_dbl0: ; wb stall + bset r8,r8,31 + mpyhu r11,r5,r8 + add_s r12,r12,r10 + bset r5,r12,31 + cmp r5,r8 + cmp.eq DBL0L,DBL1L + ; wb stall + lsr.cc r5,r5,1 + sub r4,r4,r11 ; u1.31 inverse, about 30 bit + mpyhu r11,r5,r4 ; result fraction highpart + breq r7,r9,.Linf_nan_dbl1 + lsr r8,r8,2 ; u3.29 + add r5,r6, /* wait for immediate / XMAC wb stall */ \ + 0x3fe00000 + ; wb stall (not for XMAC) + breq r6,r9,.Linf_nan_dbl0 + mpyu r12,r11,r8 ; u-28.31 + asl_s DBL1L,DBL1L,9 ; u-29.23:9 + sbc r6,r5,r7 + ; resource conflict (not for XMAC) + mpyhu r5,r11,DBL1L ; u-28.23:9 + add.cs DBL0L,DBL0L,DBL0L + asl_s DBL0L,DBL0L,6 ; u-26.25:7 + asl r10,r11,23 + sub_l DBL0L,DBL0L,r12 + ; wb stall (before 'and' for XMAC) + lsr r7,r11,9 + sub r5,DBL0L,r5 ; rest msw ; u-26.31:0 + mpyh r12,r5,r4 ; result fraction lowpart + xor.f 0,DBL0H,DBL1H + and DBL0H,r6,r9 + add_s DBL0H,DBL0H,r7 ; (XMAC wb stall) + bxor.mi DBL0H,DBL0H,31 + brhs r6, /* wb stall / wait for immediate */ \ + 0x7fe00000,.Linf_denorm + add.f r12,r12,0x11 + asr r9,r12,5 + sub.mi DBL0H,DBL0H,1 + add.f DBL0L,r9,r10 + tst r12,0x1c + jne.d [blink] + add.cs DBL0H,DBL0H,1 + /* work out exact rounding if we fall through here. */ + /* We know that the exact result cannot be represented in double + precision. Find the mid-point between the two nearest + representable values, multiply with the divisor, and check if + the result is larger than the dividend. Since we want to know + only the sign bit, it is sufficient to calculate only the + highpart of the lower 64 bits. */ + sub.f DBL0L,DBL0L,1 + asl r12,r9,2 ; u-22.30:2 + mpyu r10,r11,DBL1L ; rest before considering r12 in r5 : -r10 + sub.cs DBL0H,DBL0H,1 + sub.f r12,r12,2 + ; resource conflict (not for XMAC) + mpyhu r7,r12,DBL1L ; u-51.32 + asl r5,r5,25 ; s-51.7:25 + lsr r10,r10,7 ; u-51.30:2 + ; resource conflict (not for XMAC) + ; resource conflict (not for XMAC) + mpyu r9,r12,r8 ; u-51.31:1 + sub r5,r5,r10 + add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L + bset r7,r7,0 ; make sure that the result is not zero, and that + ; wb stall (one earlier for XMAC) + sub r5,r5,r7 ; a highpart zero appears negative + sub.f r5,r5,r9 ; rest msw + add.pl.f DBL0L,DBL0L,1 + j_s.d [blink] + add.eq DBL0H,DBL0H,1 + + .balign 4 +.Linf_denorm: + brlo r6,0xc0000000,.Linf +.Ldenorm: + asr r6,r6,20 + neg r9,r6 + mov_s DBL0H,0 + brhs.d r9,54,.Lret0 + bxor.mi DBL0H,DBL0H,31 + add_l r12,r12,1 + and r12,r12,-4 + rsub r7,r6,5 + asr r10,r12,28 + bmsk r4,r12,27 + asrs DBL0L,r4,r7 + add DBL1H,r11,r10 + add.f r7,r6,32-5 + abss r10,r4 + asl r4,r4,r7 + mov.mi r4,r10 + add.f r10,r6,23 + rsub r7,r6,9 + lsr r7,DBL1H,r7 + asl r10,DBL1H,r10 + or.pnz DBL0H,DBL0H,r7 + or.mi r4,r4,r10 + mov.mi r10,r7 + add.f DBL0L,r10,DBL0L + add.cs.f DBL0H,DBL0H,1 ; carry clear after this point + bxor.f 0,r4,31 + add.pnz.f DBL0L,DBL0L,1 + add.cs.f DBL0H,DBL0H,1 + jne_l [blink] + /* Calculation so far was not conclusive; calculate further rest. */ + mpyu r11,r11,DBL1L ; rest before considering r12 in r5 : -r11 + asr.f r12,r12,3 + asl r5,r5,25 ; s-51.7:25 + ; resource conflict (not for XMAC) + mpyu DBL1H,r12,r8 ; u-51.31:1 + and r9,DBL0L,1 ; tie-breaker: round to even + lsr r11,r11,7 ; u-51.30:2 + ; resource conflict (not for XMAC) + mpyhu r8,r12,DBL1L ; u-51.32 + sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L + add_s DBL1H,DBL1H,r11 + ; resource conflict (not for XMAC) + ; resource conflict (not for XMAC) + mpyu r12,r12,DBL1L ; u-83.30:2 + sub DBL1H,DBL1H,r5 ; -rest msw + add_s DBL1H,DBL1H,r8 ; -rest msw + add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-( + ; wb stall (XMAC: Before add.f) + tst_s DBL1H,DBL1H + cmp.eq r12,r9 + add.cs.f DBL0L,DBL0L,1 + j_s.d [blink] + add.cs DBL0H,DBL0H,1 + +.Lret0: + /* return +- 0 */ + j_s.d [blink] + mov_s DBL0L,0 +.Linf: + mov_s DBL0H,r9 + mov_s DBL0L,0 + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + + .balign 4 +.Ldivtab: + .long 0xfc0fffe1 + .long 0xf46ffdfb + .long 0xed1ffa54 + .long 0xe61ff515 + .long 0xdf7fee75 + .long 0xd91fe680 + .long 0xd2ffdd52 + .long 0xcd1fd30c + .long 0xc77fc7cd + .long 0xc21fbbb6 + .long 0xbcefaec0 + .long 0xb7efa100 + .long 0xb32f92bf + .long 0xae8f83b7 + .long 0xaa2f7467 + .long 0xa5ef6479 + .long 0xa1cf53fa + .long 0x9ddf433e + .long 0x9a0f3216 + .long 0x965f2091 + .long 0x92df0f11 + .long 0x8f6efd05 + .long 0x8c1eeacc + .long 0x88eed876 + .long 0x85dec615 + .long 0x82eeb3b9 + .long 0x800ea10b + .long 0x7d3e8e0f + .long 0x7a8e7b3f + .long 0x77ee6836 + .long 0x756e5576 + .long 0x72fe4293 + .long 0x709e2f93 + .long 0x6e4e1c7f + .long 0x6c0e095e + .long 0x69edf6c5 + .long 0x67cde3a5 + .long 0x65cdd125 + .long 0x63cdbe25 + .long 0x61ddab3f + .long 0x600d991f + .long 0x5e3d868c + .long 0x5c6d7384 + .long 0x5abd615f + .long 0x590d4ecd + .long 0x576d3c83 + .long 0x55dd2a89 + .long 0x545d18e9 + .long 0x52dd06e9 + .long 0x516cf54e + .long 0x4ffce356 + .long 0x4e9cd1ce + .long 0x4d3cbfec + .long 0x4becae86 + .long 0x4aac9da4 + .long 0x496c8c73 + .long 0x483c7bd3 + .long 0x470c6ae8 + .long 0x45dc59af + .long 0x44bc4915 + .long 0x43ac3924 + .long 0x428c27fb + .long 0x418c187a + .long 0x407c07bd +.L7ff00000: + .long 0x7ff00000 + ENDFUNC(__divdf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/divsf3-stdmul.S b/gcc-4.9/libgcc/config/arc/ieee-754/divsf3-stdmul.S new file mode 100644 index 000000000..620209d1f --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/divsf3-stdmul.S @@ -0,0 +1,281 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + - calculate 15..18 bit inverse using a table of approximating polynoms. + precision is higher for polynoms used to evaluate input with larger + value. + - do one newton-raphson iteration step to double the precision, + then multiply this with the divisor + -> more time to decide if dividend is subnormal + - the worst error propagation is on the side of the value range + with the least initial defect, thus giving us about 30 bits precision. + */ +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __divsf3 + FUNC(__divsf3) + .balign 4 +__divsf3: + push_s blink + push_s r1 + bl.d __divsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __divsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 +#if 1 + bne abort + jeq_s [blink] + b abort +#else + bne abort + j_s [blink] +#endif + ENDFUNC(__divsf3) +#define __divsf3 __divsf3_asm +#endif /* DEBUG */ + + FUNC(__divsf3) + .balign 4 +.L7f800000: + .long 0x7f800000 +.Ldivtab: + .long 0xfc0ffff0 + .long 0xf46ffefd + .long 0xed1ffd2a + .long 0xe627fa8e + .long 0xdf7ff73b + .long 0xd917f33b + .long 0xd2f7eea3 + .long 0xcd1fe986 + .long 0xc77fe3e7 + .long 0xc21fdddb + .long 0xbcefd760 + .long 0xb7f7d08c + .long 0xb32fc960 + .long 0xae97c1ea + .long 0xaa27ba26 + .long 0xa5e7b22e + .long 0xa1cfa9fe + .long 0x9ddfa1a0 + .long 0x9a0f990c + .long 0x9667905d + .long 0x92df878a + .long 0x8f6f7e84 + .long 0x8c27757e + .long 0x88f76c54 + .long 0x85df630c + .long 0x82e759c5 + .long 0x8007506d + .long 0x7d3f470a + .long 0x7a8f3da2 + .long 0x77ef341e + .long 0x756f2abe + .long 0x72f7212d + .long 0x709717ad + .long 0x6e4f0e44 + .long 0x6c1704d6 + .long 0x69e6fb44 + .long 0x67cef1d7 + .long 0x65c6e872 + .long 0x63cedf18 + .long 0x61e6d5cd + .long 0x6006cc6d + .long 0x5e36c323 + .long 0x5c76b9f3 + .long 0x5abeb0b7 + .long 0x5916a79b + .long 0x57769e77 + .long 0x55de954d + .long 0x54568c4e + .long 0x52d6834d + .long 0x51667a7f + .long 0x4ffe71b5 + .long 0x4e9e68f1 + .long 0x4d466035 + .long 0x4bf65784 + .long 0x4aae4ede + .long 0x496e4646 + .long 0x48363dbd + .long 0x47063547 + .long 0x45de2ce5 + .long 0x44be2498 + .long 0x43a61c64 + .long 0x4296144a + .long 0x41860c0e + .long 0x407e03ee +__divsf3_support: /* This label makes debugger output saner. */ +.Ldenorm_fp1: + bclr r6,r6,31 + norm.f r12,r6 ; flag for x/0 -> Inf check + add r6,r6,r6 + rsub r5,r12,16 + ror r5,r1,r5 + asl r6,r6,r12 + bmsk r5,r5,5 + ld.as r5,[r3,r5] + add r4,r6,r6 + ; load latency + mpyhu r7,r5,r4 + bic.ne.f 0, \ + 0x60000000,r0 ; large number / denorm -> Inf + beq_s .Linf_NaN + asl r5,r5,13 + ; wb stall + ; slow track + sub r7,r5,r7 + mpyhu r8,r7,r6 + asl_s r12,r12,23 + and.f r2,r0,r9 + add r2,r2,r12 + asl r12,r0,8 + ; wb stall + bne.d .Lpast_denorm_fp1 +.Ldenorm_fp0: + mpyhu r8,r8,r7 + bclr r12,r12,31 + norm.f r3,r12 ; flag for 0/x -> 0 check + bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 + beq_s .Lret0 + asl_s r12,r12,r3 + asl_s r3,r3,23 + add_s r12,r12,r12 + add r11,r11,r3 + b.d .Lpast_denorm_fp0 + mov_s r3,r12 + .balign 4 +.Linf_NaN: + bclr.f 0,r0,31 ; 0/0 -> NaN + xor_s r0,r0,r1 + bmsk r1,r0,30 + bic_s r0,r0,r1 + sub.eq r0,r0,1 + j_s.d [blink] + or r0,r0,r9 +.Lret0: + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_s r0,r0,r1 +.Linf_nan_fp1: + lsr_s r0,r0,31 + bmsk.f 0,r1,22 + asl_s r0,r0,31 + bne_s 0f ; inf/inf -> nan + brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan +0: j_s.d [blink] + mov r0,-1 +.Lsigned0: +.Linf_nan_fp0: + tst_s r1,r1 + j_s.d [blink] + bxor.mi r0,r0,31 + .balign 4 + .global __divsf3 +/* N.B. the spacing between divtab and the sub3 to get its address must + be a multiple of 8. */ +__divsf3: + lsr r2,r1,17 + sub3 r3,pcl,55;(.-.Ldivtab) >> 3 + bmsk_s r2,r2,5 + ld.as r5,[r3,r2] + asl r4,r1,9 + ld.as r9,[pcl,-114]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000 + mpyhu r7,r5,r4 + asl r6,r1,8 + and.f r11,r1,r9 + bset r6,r6,31 + asl r5,r5,13 + ; wb stall + beq .Ldenorm_fp1 + sub r7,r5,r7 + mpyhu r8,r7,r6 + breq.d r11,r9,.Linf_nan_fp1 + and.f r2,r0,r9 + beq.d .Ldenorm_fp0 + asl r12,r0,8 + ; wb stall + breq r2,r9,.Linf_nan_fp0 + mpyhu r8,r8,r7 +.Lpast_denorm_fp1: + bset r3,r12,31 +.Lpast_denorm_fp0: + cmp_s r3,r6 + lsr.cc r3,r3,1 + add_s r2,r2, /* wait for immediate */ \ + /* wb stall */ \ + 0x3f000000 + sub r7,r7,r8 ; u1.31 inverse, about 30 bit + mpyhu r3,r3,r7 + sbc r2,r2,r11 + xor.f 0,r0,r1 + and r0,r2,r9 + bxor.mi r0,r0,31 + brhs r2, /* wb stall / wait for immediate */ \ + 0x7f000000,.Linf_denorm +.Lpast_denorm: + add_s r3,r3,0x22 ; round to nearest or higher + tst r3,0x3c ; check if rounding was unsafe + lsr r3,r3,6 + jne.d [blink] ; return if rounding was safe. + add_s r0,r0,r3 + /* work out exact rounding if we fall through here. */ + /* We know that the exact result cannot be represented in single + precision. Find the mid-point between the two nearest + representable values, multiply with the divisor, and check if + the result is larger than the dividend. */ + add_s r3,r3,r3 + sub_s r3,r3,1 + mpyu r3,r3,r6 + asr.f 0,r0,1 ; for round-to-even in case this is a denorm + rsub r2,r9,25 + asl_s r12,r12,r2 + ; wb stall + ; slow track + sub.f 0,r12,r3 + j_s.d [blink] + sub.mi r0,r0,1 +/* For denormal results, it is possible that an exact result needs + rounding, and thus the round-to-even rule has to come into play. */ +.Linf_denorm: + brlo r2,0xc0000000,.Linf +.Ldenorm: + asr_s r2,r2,23 + bic r0,r0,r9 + neg r9,r2 + brlo.d r9,25,.Lpast_denorm + lsr r3,r3,r9 + /* Fall through: return +- 0 */ + j_s [blink] +.Linf: + j_s.d [blink] + or r0,r0,r9 + ENDFUNC(__divsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/divsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/divsf3.S new file mode 100644 index 000000000..edc16a849 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/divsf3.S @@ -0,0 +1,221 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __divsf3 + FUNC(__divsf3) + .balign 4 +__divsf3: + push_s blink + push_s r1 + bl.d __divsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __divsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 +#if 1 + bne abort + jeq_s [blink] + b abort +#else + bne abort + j_s [blink] +#endif + ENDFUNC(__divsf3) +#define __divsf3 __divsf3_asm +#endif /* DEBUG */ + + .balign 4 +__divdf3_support: /* This label makes debugger output saner. */ + FUNC(__divsf3) +.Ldenorm_fp0: + norm.f r12,r2 ; flag for 0/x -> 0 check + bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 + beq_s .Lret0_NaN + tst r1,r9 + add_s r2,r2,r2 + sub_s r12,r12,8 + asl_s r2,r2,r12 + asl_l r12,r12,23 + bne.d .Lpast_denorm_fp0 + add r5,r5,r12 +/* r0 is subnormal, r1 is subnormal or 0. */ + + .balign 4 +.Ldenorm_fp1: + norm.f r12,r3 ; flag for x/0 -> Inf check + bic.ne.f 0,0x60000000,r0 ; large number/denorm -> Inf + beq_s .Linf + add_s r3,r3,r3 + sub_s r12,r12,8 + asl_s r3,r3,r12 + asl_s r12,r12,23 + b.d .Lpast_denorm_fp1 + add r4,r4,r12 + +.Lret0_NaN: + bclr.f 0,r1,31 ; 0/0 -> NaN + bic r0,r10,r9 + j_s.d [blink] + sub.eq r0,r0,1 + + .global __divsf3 + .balign 4 + .long 0x7f800000 ; exponent mask +__divsf3: + ld r9,[pcl,-4] + bmsk r2,r0,22 + xor r4,r0,r2 + bmsk r3,r1,22 + xor r5,r1,r3 + and r11,r0,r9 + breq.d r11,0,.Ldenorm_fp0 + xor r10,r4,r5 + breq r11,r9,.Linf_nan_fp0 + bset_s r2,r2,23 + and r11,r1,r9 + breq r11,0,.Ldenorm_fp1 + breq r11,r9,.Linf_nan_fp1 +.Lpast_denorm_fp0: + bset_s r3,r3,23 +.Lpast_denorm_fp1: + cmp r2,r3 + asl_s r2,r2,6+1 + asl_s r3,r3,7 + add.lo r2,r2,r2 + bclr r8,r9,30 ; exponent bias + bclr.lo r8,r8,23 ; reduce exp by one if fraction is shifted + sub r4,r4,r5 + add r4,r4,r8 + xor.f 0,r10,r4 + bmi .Linf_denorm + and r12,r4,r9 + breq r12,0,.Ldenorm + sub_s r2,r2,r3 ; discard implicit 1 +.Ldiv_23bit: + .rep 6 + divaw r2,r2,r3 + .endr + breq r12,r9,.Linf + bmsk r0,r2,6 + xor_s r2,r2,r0 +.Ldiv_17bit: + .rep 7 + divaw r2,r2,r3 + .endr + asl_s r0,r0,7 + bmsk r1,r2,6 + xor_s r2,r2,r1 + or_s r0,r0,r1 +.Ldiv_10bit: + .rep 7 + divaw r2,r2,r3 + .endr + asl_s r0,r0,7 + bmsk r1,r2,6 + xor_s r2,r2,r1 + or_s r0,r0,r1 +.Ldiv_3bit: + .rep 3 + divaw r2,r2,r3 + .endr + asl_s r0,r0,3 +.Ldiv_0bit: + divaw r1,r2,r3 + bmsk_s r2,r2,2 + tst r1,-0x7e ; 0xffffff82, test for rest or odd + bmsk_s r1,r1,0 + add_s r0,r0,r2 ; assemble fraction + add_s r0,r0,r4 ; add in sign & exponent + j_s.d [blink] + add.ne r0,r0,r1 ; round to nearest / even + + .balign 4 +.Linf_nan_fp0: + bic.f 0,r9,r1 ; fp1 Inf -> result NaN + bic r1,r5,r9 ; fp1 sign + sub.eq r1,r1,1 + j_s.d [blink] + xor_s r0,r0,r1 +.Linf_nan_fp1: + bic r0,r4,r9 ; fp0 sign + bmsk.f 0,r1,22 ; x/inf -> 0, x/nan -> nan + xor.eq r1,r1,r9 + j_s.d [blink] + xor_s r0,r0,r1 +.Linf: + j_s.d [blink] + or r0,r10,r9 + +.Lret_r4: + j_s.d [blink] + mov_s r0,r4 + .balign 4 +.Linf_denorm: + add.f r12,r4,r4 + asr_l r12,r12,24 + bpl .Linf + max r12,r12,-24 +.Ldenorm: + add r1,pcl,42; .Ldenorm_tab-. + ldb_s r12,[r12,r1] + mov_s r0,0 + lsr_s r2,r2 + sub_s r1,r1,r12 + j_s.d [r1] + bic r4,r10,r9 + .byte .Ldenorm_tab-.Lret_r4 + .byte .Ldenorm_tab-.Ldiv_0bit + .byte .Ldenorm_tab-.Ldiv_3bit-8 + .byte .Ldenorm_tab-.Ldiv_3bit-4 + .byte .Ldenorm_tab-.Ldiv_3bit + .byte .Ldenorm_tab-.Ldiv_10bit-24 + .byte .Ldenorm_tab-.Ldiv_10bit-20 + .byte .Ldenorm_tab-.Ldiv_10bit-16 + .byte .Ldenorm_tab-.Ldiv_10bit-12 + .byte .Ldenorm_tab-.Ldiv_10bit-8 + .byte .Ldenorm_tab-.Ldiv_10bit-4 + .byte .Ldenorm_tab-.Ldiv_10bit + .byte .Ldenorm_tab-.Ldiv_17bit-24 + .byte .Ldenorm_tab-.Ldiv_17bit-20 + .byte .Ldenorm_tab-.Ldiv_17bit-16 + .byte .Ldenorm_tab-.Ldiv_17bit-12 + .byte .Ldenorm_tab-.Ldiv_17bit-8 + .byte .Ldenorm_tab-.Ldiv_17bit-4 + .byte .Ldenorm_tab-.Ldiv_17bit + .byte .Ldenorm_tab-.Ldiv_23bit-20 + .byte .Ldenorm_tab-.Ldiv_23bit-16 + .byte .Ldenorm_tab-.Ldiv_23bit-12 + .byte .Ldenorm_tab-.Ldiv_23bit-8 + .byte .Ldenorm_tab-.Ldiv_23bit-4 +.Ldenorm_tab: + .byte .Ldenorm_tab-.Ldiv_23bit + ENDFUNC(__divsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/divtab-arc-df.c b/gcc-4.9/libgcc/config/arc/ieee-754/divtab-arc-df.c new file mode 100644 index 000000000..9142b4541 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/divtab-arc-df.c @@ -0,0 +1,161 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* We use a polynom similar to a Tchebycheff polynom to get an initial + seed, and then use a newton-raphson iteration step to get an + approximate result + If this result can't be rounded to the exact result with confidence, we + round to the value between the two closest representable values, and + test if the correctly rounded value is above or below this value. + + Because of the Newton-raphson iteration step, an error in the seed at X + is amplified by X. Therefore, we don't want a Tchebycheff polynom + or a polynom that is close to optimal according to the maximum norm + on the errro of the seed value; we want one that is close to optimal + according to the maximum norm on the error of the result, i.e. we + want the maxima of the polynom to increase linearily. + Given an interval [X0,X2) over which to approximate, + with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have, + like for Tchebycheff polynoms: + P(0) := 1 + but then we have: + P(1) := X + S*D + P(2) := 2 * X^2 + S*D * X - D^2 + Then again: + P(n+1) := 2 * X * P(n) - D^2 * P (n-1) + */ + +static long double merr = 42.; + +double +err (long double a0, long double a1, long double x) +{ + long double y0 = a0 + (x-1)*a1; + + long double approx = 2. * y0 - y0 * x * y0; + long double true = 1./x; + long double err = approx - true; + + if (err <= -1./65536./16384.) + printf ("ERROR EXCEEDS 1 ULP %.15f %.15f %.15f\n", + (double)x, (double)approx, (double)true); + if (merr > err) + merr = err; + return err; +} + +int +main (void) +{ + long double T[5]; /* Taylor polynom */ + long double P[5][5]; + int i, j; + long double X0, X1, X2, S; + long double inc = 1./64; + long double D = inc*0.5; + long i0, i1, i2, io; + + memset (P, 0, sizeof (P)); + P[0][0] = 1.; + for (i = 1; i < 5; i++) + P[i][i] = 1 << i-1; + P[2][0] = -D*D; + for (X0 = 1.; X0 < 2.; X0 += inc) + { + X1 = X0 + inc * 0.5; + X2 = X0 + inc; + S = D / X1; + T[0] = 1./X1; + for (i = 1; i < 5; i++) + T[i] = T[i-1] * -T[0]; +#if 0 + printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2], +(double)T[3], (double)T[4]); +#endif + P[1][0] = S*D; + P[2][1] = S*D; + for (i = 3; i < 5; i++) + { + P[i][0] = -D*D*P[i-2][0]; + for (j = 1; j < i; j++) + P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j]; + } +#if 0 + printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2], +(double)P[3][3], (double)P[3][4]); + printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2], +(double)P[4][3], (double)P[4][4]); +#endif + for (i = 4; i > 1; i--) + { + long double a = T[i]/P[i][i]; + + for (j = 0; j < i; j++) + T[j] -= a * P[i][j]; + } +#if 0 + printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); +#endif +#if 0 + i2 = T[2]*1024; + long double a = (T[2]-i/1024.)/P[2][2]; + for (j = 0; j < 2; j++) + T[j] -= a * P[2][j]; +#else + i2 = 0; +#endif + long double T0, Ti1; + for (i = 0, i0 = 0; i < 4; i++) + { + + i1 = T[1]*4096. + i0 / (long double)(1 << 20) - 0.5; + i1 = - (-i1 & 0x0fff); + Ti1 = ((unsigned)(-i1 << 20) | i0) /-(long double)(1LL<<32LL); + T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1; + i0 = T0 * 1024 * 1024 + 0.5; + i0 &= 0xfffff; + } +#if 0 + printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); +#endif + io = (unsigned)(-i1 << 20) | i0; + long double A1 = (unsigned)io/-65536./65536.; + long double A0 = (unsigned)(io << 12)/65536./65536.; + long double Xm0 = 1./sqrt (-A1); + long double Xm1 = 0.5+0.5*-A0/A1; +#if 0 + printf ("%f %f %f %f\n", (double)A0, (double)A1, (double) Ti1, (double)X0); + printf ("%.12f %.12f %.12f\n", + err (A0, A1, X0), err (A0, A1, X1), err (A0, A1, X2)); + printf ("%.12f %.12f\n", (double)Xm0, (double)Xm1); + printf ("%.12f %.12f\n", err (A0, A1, Xm0), err (A0, A1, Xm1)); +#endif + printf ("\t.long 0x%x\n", io); + } +#if 0 + printf ("maximum error: %.15f %x %f\n", (double)merr, (unsigned)(long long)(-merr * 65536 * 65536), (double)log(-merr)/log(2)); +#endif + return 0; +} diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/divtab-arc-sf.c b/gcc-4.9/libgcc/config/arc/ieee-754/divtab-arc-sf.c new file mode 100644 index 000000000..ff0f08ba4 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/divtab-arc-sf.c @@ -0,0 +1,127 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* We use a polynom similar to a Tchebycheff polynom to get an initial + seed, and then use a newton-raphson iteration step to get an + approximate result + If this result can't be rounded to the exact result with confidence, we + round to the value between the two closest representable values, and + test if the correctly rounded value is above or below this value. + + Because of the Newton-raphson iteration step, an error in the seed at X + is amplified by X. Therefore, we don't want a Tchebycheff polynom + or a polynom that is close to optimal according to the maximum norm + on the errro of the seed value; we want one that is close to optimal + according to the maximum norm on the error of the result, i.e. we + want the maxima of the polynom to increase linearily. + Given an interval [X0,X2) over which to approximate, + with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have, + like for Tchebycheff polynoms: + P(0) := 1 + but then we have: + P(1) := X + S*D + P(2) := 2 * X^2 + S*D * X - D^2 + Then again: + P(n+1) := 2 * X * P(n) - D^2 * P (n-1) + */ + +int +main (void) +{ + long double T[5]; /* Taylor polynom */ + long double P[5][5]; + int i, j; + long double X0, X1, X2, S; + long double inc = 1./64; + long double D = inc*0.5; + long i0, i1, i2; + + memset (P, 0, sizeof (P)); + P[0][0] = 1.; + for (i = 1; i < 5; i++) + P[i][i] = 1 << i-1; + P[2][0] = -D*D; + for (X0 = 1.; X0 < 2.; X0 += inc) + { + X1 = X0 + inc * 0.5; + X2 = X1 + inc; + S = D / X1; + T[0] = 1./X1; + for (i = 1; i < 5; i++) + T[i] = T[i-1] * -T[0]; +#if 0 + printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2], +(double)T[3], (double)T[4]); +#endif + P[1][0] = S*D; + P[2][1] = S*D; + for (i = 3; i < 5; i++) + { + P[i][0] = -D*D*P[i-2][0]; + for (j = 1; j < i; j++) + P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j]; + } +#if 0 + printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2], +(double)P[3][3], (double)P[3][4]); + printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2], +(double)P[4][3], (double)P[4][4]); +#endif + for (i = 4; i > 1; i--) + { + long double a = T[i]/P[i][i]; + + for (j = 0; j < i; j++) + T[j] -= a * P[i][j]; + } +#if 0 + printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); +#endif +#if 0 + i2 = T[2]*512; + long double a = (T[2]-i/512.)/P[2][2]; + for (j = 0; j < 2; j++) + T[j] -= a * P[2][j]; +#else + i2 = 0; +#endif + for (i = 0, i0 = 0; i < 4; i++) + { + long double T0, Ti1; + + i1 = T[1]*8192. + i0 / (long double)(1 << 19) - 0.5; + i1 = - (-i1 & 0x1fff); + Ti1 = ((unsigned)(-i1 << 19) | i0) /-(long double)(1LL<<32LL); + T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1; + i0 = T0 * 512 * 1024 + 0.5; + i0 &= 0x7ffff; + } +#if 0 + printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); +#endif + printf ("\t.long 0x%x\n", (-i1 << 19) | i0); + } + return 0; +} diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/eqdf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/eqdf2.S new file mode 100644 index 000000000..10401534a --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/eqdf2.S @@ -0,0 +1,76 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: DBL0, DBL1 + output: z flag + clobber: r12, flags + For NaNs, bit 19.. bit 30 of the high word must be set. */ +#if 0 /* DEBUG */ + .global __eqdf2 + .balign 4 + FUNC(__eqdf2) +__eqdf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __eqdf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __eqdf2_asm` ld.ab r10,[sp,4] + pop_s blink + breq.d r11,0,0f + ld.ab r11,[sp,4] + jne_s [blink] + bl abort +0: jeq_s [blink] + bl abort + ENDFUNC(__eqdf2) +#define __eqdf2 __eqdf2_asm +#endif /* DEBUG */ + .global __eqdf2 + .balign 4 + HIDDEN_FUNC(__eqdf2) + /* Good performance as long as the difference in high word is + well predictable (as seen from the branch predictor). */ +__eqdf2: + brne.d DBL0H,DBL1H,.Lhighdiff + bmsk r12,DBL0H,20 +#ifdef DPFP_COMPAT + or.f 0,DBL0L,DBL1L + bset.ne r12,r12,21 +#endif /* DPFP_COMPAT */ + add1.f r12,r12,DBL0H /* set c iff NaN; also, clear z if NaN. */ + j_s.d [blink] + cmp.cc DBL0L,DBL1L + .balign 4 +.Lhighdiff: + or r12,DBL0H,DBL1H + or.f 0,DBL0L,DBL1L + j_s.d [blink] + bmsk.eq.f r12,r12,30 + ENDFUNC(__eqdf2) +/* ??? could we do better by speeding up some 'common' case of inequality? */ diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/eqsf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/eqsf2.S new file mode 100644 index 000000000..8a56132c1 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/eqsf2.S @@ -0,0 +1,69 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: r0, r1 + output: z flag + clobber: r12, flags + For NaNs, bit 22 .. bit 30 must be set. */ +#if 0 /* DEBUG */ + .global __eqsf2 + .balign 4 + FUNC(__eqsf2) +__eqsf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __eqsf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __eqsf2_asm` ld.ab r10,[sp,4] + pop_s blink + breq.d r11,0,0f + ld.ab r11,[sp,4] + jne_s [blink] + bl abort +0: jeq_s [blink] + bl abort + ENDFUNC(__eqsf2) +#define __eqsf2 __eqsf2_asm +#endif /* DEBUG */ + /* Good performance as long as the binary difference is + well predictable (as seen from the branch predictor). */ + .global __eqsf2 + .balign 4 + HIDDEN_FUNC(__eqsf2) +__eqsf2: + breq r0, r1,.Lno_bdiff + or r12,r0,r1 + j_s.d [blink] + bmsk.f 0,r12,30 +.Lno_bdiff: + bmsk r12,r0,23 + add1.f r12,r12,r0 /* set c iff NaN; also, clear z if NaN. */ + j_s.d [blink] + cmp.cc r0,r1 + ENDFUNC(__eqsf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/extendsfdf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/extendsfdf2.S new file mode 100644 index 000000000..cf6c98d23 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/extendsfdf2.S @@ -0,0 +1,122 @@ +/* Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __extendsfdf2 + .balign 4 + FUNC(__extendsfdf2) +__extendsfdf2: + push_s blink + bl.d __extendsfdf2_c + push_s r0 + ld_s r2,[sp] + st_s r1,[sp] + push_s r0 + bl.d __extendsfdf2_asm + mov_s r0,r2 + pop_s r2 + pop_s r3 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + bl abort + ENDFUNC(__extendsfdf2) +#define __extendsfdf2 __extendsfdf2_asm +#endif /* DEBUG */ +#if 0 /* ARC600 */ +__extendsfdf2: + lsr r2,r0,23 + tst r2,0xff + bic.ne.f r2,0xff + beq_s .Linf_nan_denorm_0 +.. +.Linf_nan_denorm: + bbit1 r0,30,.Linf_nan +#endif + .global __extendsfdf2 + .balign 4 + FUNC(__extendsfdf2) +__extendsfdf2: + add.f r1,r0,r0 + norm r3,r1 +#ifdef __LITTLE_ENDIAN__ + lsr_s DBL0H,r1,4 + brhs r3,7,.Linf_nan_denorm_0 + asl_s DBL0L,r0,29 + add_s DBL0H,DBL0H, \ + 0x38000000 +#else + lsr r2,r1,4 + brhs r3,7,.Linf_nan_denorm_0 + asl_s DBL0L,r1,28 + add DBL0H,r2, \ + 0x38000000 +#endif + j_s.d [blink] + bxor.cs DBL0H,DBL0H,31 + .balign 4 +.Linf_nan_denorm_0: +#ifdef __LITTLE_ENDIAN__ + mov_s DBL0H,r0 + jeq.d [blink] + mov.eq DBL0L,0 +#else + jeq_s [blink] +#endif + bmi .Linf_nan + asl_s r0,r0,r3 + rsub r3,r3,0x380+6 +#ifdef __LITTLE_ENDIAN__ + asl_s r3,r3,20 + lsr DBL0H,r0,9 + asl_s DBL0L,r0,23 + add_s DBL0H,DBL0H,r3 + j_s.d [blink] + bxor.cs DBL0H,DBL0H,31 +#else + asl DBL0L,r0,23 + lsr_s DBL0H,r0,9 + asl_s r3,r3,20 + bxor.cs DBL0H,DBL0H,31 + j_s.d [blink] + add_l DBL0H,DBL0H,r3 +#endif +.Linf_nan: +#ifdef __LITTLE_ENDIAN__ + lsr DBL0H,r0,3 + + or_s DBL0H,DBL0H,r0 + j_s.d [blink] + mov_l DBL0L,0 +#else + lsr r3,r0,3 + mov_s DBL0L,0 + j_s.d [blink] + or_l DBL0H,r0,r3 +#endif + ENDFUNC(__extendsfdf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/fixdfsi.S b/gcc-4.9/libgcc/config/arc/ieee-754/fixdfsi.S new file mode 100644 index 000000000..82c2c02b8 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/fixdfsi.S @@ -0,0 +1,85 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + FUNC(__fixdfsi) + .global __fixdfsi + .balign 4 +__fixdfsi: + push_s blink + push_s r0 + bl.d __fixdfsi_c + push_s r1 + mov_s r2,r0 + pop_s r1 + ld r0,[sp] + bl.d __fixdfsi_asm + st r2,[sp] + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__fixdfsi) +#define __fixdfsi __fixdfsi_asm +#endif /* DEBUG */ + +/* If the fraction has to be shifted left by a positive non-zero amount, + we have to combine bits from DBL0L and DBL0H. If we shift right, + or shift by zero, we only want to have the bits from DBL0H in r0. */ + + .global __fixdfsi + FUNC(__fixdfsi) + .balign 4 +__fixdfsi: + bbit0 DBL0H,30,.Lret0or1 + asr r2,DBL0H,20 + bmsk_s DBL0H,DBL0H,19 + sub_s r2,r2,19; 0x3ff+20-0x400 + neg_s r3,r2 + asr.f 0,r3,11 + bset_s DBL0H,DBL0H,20 +#ifdef __LITTLE_ENDIAN__ + mov.cs DBL0L,DBL0H + asl DBL0H,DBL0H,r2 +#else + asl.cc DBL0H,DBL0H,r2 + lsr.cs DBL0H,DBL0H,r3 +#endif + lsr_s DBL0L,DBL0L,r3 + + add.cc r0,r0,r1 + j_s.d [blink] + neg.pl r0,r0 +.Lret0or1: + add.f r0,DBL0H,0x100000 + lsr_s r0,r0,30 + + bmsk_s r0,r0,0 + j_s.d [blink] + neg.mi r0,r0 + ENDFUNC(__fixdfsi) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/fixsfsi.S b/gcc-4.9/libgcc/config/arc/ieee-754/fixsfsi.S new file mode 100644 index 000000000..56ab2fd09 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/fixsfsi.S @@ -0,0 +1,71 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __fixsfsi + FUNC(__fixsfsi) + .balign 4 +__fixsfsi: + push_s blink + bl.d __fixsfsi_c + push_s r0 + ld_s r1,[sp] + st_s r0,[sp] + bl.d __fixsfsi_asm + mov_s r0,r1 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__fixsfsi) +#define __fixsfsi __fixsfsi_asm +#endif /* DEBUG */ + + .global __fixsfsi + FUNC(__fixsfsi) + .balign 4 +__fixsfsi: + bbit0 r0,30,.Lret0or1 + lsr r2,r0,23 + bmsk_s r0,r0,22 + bset_s r0,r0,23 + sub_s r2,r2,22;0x7f+23-0x80 + asl.f 0,r2,24 + neg r3,r2 + asl.mi r0,r0,r2 + lsr.pl r0,r0,r3 + j_s.d [blink] + neg.cs r0,r0 +.Lret0or1: + add.f r0,r0,0x800000 + lsr_s r0,r0,30 + + bmsk_s r0,r0,0 + j_s.d [blink] + neg.mi r0,r0 + ENDFUNC(__fixsfsi) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/fixunsdfsi.S b/gcc-4.9/libgcc/config/arc/ieee-754/fixunsdfsi.S new file mode 100644 index 000000000..13af5dc6e --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/fixunsdfsi.S @@ -0,0 +1,80 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + FUNC(__fixunsdfsi) + .global __fixunsdfsi + .balign 4 +__fixunsdfsi: + push_s blink + push_s r0 + bl.d __fixunsdfsi_c + push_s r1 + mov_s r2,r0 + pop_s r1 + ld r0,[sp] + bl.d __fixunsdfsi_asm + st r2,[sp] + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__fixunsdfsi) +#define __fixunsdfsi __fixunsdfsi_asm +#endif /* DEBUG */ + + .global __fixunsdfsi + FUNC(__fixunsdfsi) + .balign 4 +__fixunsdfsi: + bbit0 DBL0H,30,.Lret0or1 + lsr r2,DBL0H,20 + bmsk_s DBL0H,DBL0H,19 + sub_s r2,r2,19; 0x3ff+20-0x400 + neg_s r3,r2 + btst_s r3,10 + bset_s DBL0H,DBL0H,20 +#ifdef __LITTLE_ENDIAN__ + mov.ne DBL0L,DBL0H + asl DBL0H,DBL0H,r2 +#else + asl.eq DBL0H,DBL0H,r2 + lsr.ne DBL0H,DBL0H,r3 +#endif + lsr DBL0L,DBL0L,r3 + j_s.d [blink] + add.eq r0,r0,r1 +.Lret0: + j_s.d [blink] + mov_l r0,0 +.Lret0or1: + add_s DBL0H,DBL0H,0x100000 + lsr_s DBL0H,DBL0H,30 + j_s.d [blink] + bmsk_l r0,DBL0H,0 + ENDFUNC(__fixunsdfsi) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/floatsidf.S b/gcc-4.9/libgcc/config/arc/ieee-754/floatsidf.S new file mode 100644 index 000000000..2e2363aeb --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/floatsidf.S @@ -0,0 +1,77 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __floatsidf + .balign 4 + FUNC(__floatsidf) +__floatsidf: + push_s blink + bl.d __floatsidf_c + push_s r0 + ld_s r2,[sp] + st_s r1,[sp] + push_s r0 + bl.d __floatsidf_asm + mov_s r0,r2 + pop_s r2 + pop_s r3 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + bl abort + ENDFUNC(__floatsidf) +#define __floatsidf __floatsidf_asm +#endif /* DEBUG */ + + .global __floatsidf + .balign 4 + FUNC(__floatsidf) +__floatsidf: + abs.f r1,r0 + jeq_s [blink] + lsr r2,r1 + mov r12,-0x41d ; -(0x3ff+31-1) + norm r2,r2 + bclr.cs r12,r12,11 + rsub.f r3,r2,11 + add_s r12,r2,r12 + add_s r2,r2,21 +#ifdef __LITTLE_ENDIAN__ + asl DBL0L,r1,r2 + lsr_s DBL0H,r1,r3 +#else + lsr DBL0H,r1,r3 + asl_s DBL0L,r1,r2 +#endif + asl_s r12,r12,20 + mov.lo DBL0H,DBL0L + sub_s DBL0H,DBL0H,r12 + j_s.d [blink] + mov.ls DBL0L,0 + ENDFUNC(__floatsidf) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/floatsisf.S b/gcc-4.9/libgcc/config/arc/ieee-754/floatsisf.S new file mode 100644 index 000000000..0e35fe06a --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/floatsisf.S @@ -0,0 +1,99 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __floatsisf + FUNC(__floatsisf) + .balign 4 +__floatsisf: + push_s blink + bl.d __floatsisf_c + push_s r0 + ld_s r1,[sp] + st_s r0,[sp] + bl.d __floatsisf_asm + mov_s r0,r1 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__floatsisf) + .global __floatunsisf + FUNC(__floatunsisf) + .balign 4 +__floatunsisf: + push_s blink + bl.d __floatunsisf_c + push_s r0 + ld_s r1,[sp] + st_s r0,[sp] + bl.d __floatunsisf_asm + mov_s r0,r1 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__floatunsisf) +#define __floatsisf __floatsisf_asm +#define __floatunsisf __floatunsisf_asm +#endif /* DEBUG */ + + .global __floatunsisf + .global __floatsisf + FUNC(__floatsisf) + FUNC(__floatunsisf) + .balign 4 +__floatunsisf: + lsr_s r2,r0 + mov_l r12,0x9d ; 0x7f + 31 - 1 + norm r2,r2 + brne_l r0,0,0f + j_s [blink] + .balign 4 +__floatsisf: + abs.f r0,r0 + jeq_s [blink] + lsr_s r2,r0 + mov_s r12,0x9d ; 0x7f + 31 - 1 + norm r2,r2 + bset.cs r12,r12,8 +0: rsub.f r3,r2,8 + bmsk r1,r0,r3 + ror r1,r1,r3 + lsr.pl r0,r0,r3 + neg_s r3,r3 + asl.mi r0,r0,r3 + sub_s r12,r12,r2 + asl_s r12,r12,23 + bxor.pl.f r1,r1,31 + add_s r0,r0,r12 + j_s.d [blink] + add.pnz r0,r0,1 + ENDFUNC(__floatunsisf) + ENDFUNC(__floatsisf) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/floatunsidf.S b/gcc-4.9/libgcc/config/arc/ieee-754/floatunsidf.S new file mode 100644 index 000000000..4bdb965bc --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/floatunsidf.S @@ -0,0 +1,75 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __floatunsidf + .balign 4 + FUNC(__floatunsidf) +__floatunsidf: + push_s blink + bl.d __floatunsidf_c + push_s r0 + ld_s r2,[sp] + st_s r1,[sp] + push_s r0 + bl.d __floatunsidf_asm + mov_s r0,r2 + pop_s r2 + pop_s r3 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + bl abort + ENDFUNC(__floatunsidf) +#define __floatunsidf __floatunsidf_asm +#endif /* DEBUG */ + + .global __floatunsidf + .balign 4 + FUNC(__floatunsidf) +__floatunsidf: + lsr_s r1,r0 + breq_s r0,0,.Lret0 + norm r2,r1 + mov r12,-0x41d ; -(0x3ff+31-1) + rsub.f r3,r2,11 + add_s r12,r2,r12 + add_s r2,r2,21 +#ifdef __LITTLE_ENDIAN__ + lsr DBL0H,r0,r3 + asl_s DBL0L,r0,r2 +#else + asl DBL0L,r0,r2 + lsr_s DBL0H,r0,r3 +#endif + asl_s r12,r12,20 + mov.lo DBL0H,DBL0L + sub_s DBL0H,DBL0H,r12 +.Lret0: j_s.d [blink] + mov.ls DBL0L,0 + ENDFUNC(__floatunsidf) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/gedf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/gedf2.S new file mode 100644 index 000000000..72b71af5c --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/gedf2.S @@ -0,0 +1,86 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: DBL0, DBL1 + output: c flags to be used for 'hs' condition + clobber: r12, flags */ +/* For NaNs, bit 19.. bit 30 of the high word must be set. */ +#if 0 /* DEBUG */ + .global __gedf2 + .balign 4 + FUNC(__gedf2) +__gedf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __gedf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __gedf2_asm` ld.ab r10,[sp,4] + pop_s blink + brge.d r11,0,0f + ld.ab r11,[sp,4] + jlo [blink] + bl abort +0: jhs [blink] + bl abort + ENDFUNC(__gedf2) +#define __gedf2 __gedf2_asm +#endif /* DEBUG */ + .global __gedf2 + .balign 4 + HIDDEN_FUNC(__gedf2) +__gedf2: + or.f r12,DBL0H,DBL1H + bmi.d .Lneg + bmsk_s r12,r12,20 + add1.f 0,r12,DBL0H ; clear z; set c iff NaN + add1.cc.f r12,r12,DBL1H ; clear z; set c iff NaN + bbit1 DBL0H,31,.Lneg + cmp.cc DBL0H,DBL1H + j_s.d [blink] + cmp.eq DBL0L,DBL1L + .balign 4 +.Lneg: breq.d DBL1H,0,.L0 + add1.f 0,r12,DBL0H + add1.cc.f r12,r12,DBL1H + cmp.cc DBL1H,DBL0H + j_s.d [blink] + cmp.eq DBL1L,DBL0L + .balign 4 +.L0: + bxor.f 0,DBL0H,31 ; check for high word of -0. + beq_s .Lcheck_0 + cmp.cc DBL1H,DBL0H + j_s.d [blink] + cmp.eq DBL1L,DBL0L +.Lcheck_0: + ; high words suggest DBL0 may be -0, DBL1 +0; check low words. + cmp_s DBL1H,DBL0L + j_s.d [blink] + cmp.cc DBL1H,DBL1L + ENDFUNC(__gedf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/gesf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/gesf2.S new file mode 100644 index 000000000..896901acd --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/gesf2.S @@ -0,0 +1,75 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: r0, r1 + output: c flag to be used for 'hs' condition + clobber: r12,flags */ +/* For NaNs, bit 22.. bit 30 must be set. */ +#if 0 /* DEBUG */ + .global __gesf2 + .balign 4 + FUNC(__gesf2) +__gesf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __gesf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __gesf2_asm` ld.ab r10,[sp,4] + pop_s blink + brge.d r11,0,0f + ld.ab r11,[sp,4] + jlo [blink] + bl abort +0: jhs [blink] + bl abort + ENDFUNC(__gesf2) +#define __gesf2 __gesf2_asm +#endif /* DEBUG */ + .global __gesf2 + .balign 4 + HIDDEN_FUNC(__gesf2) +__gesf2: + or.f r12,r0,r1 + bmi.d .Lneg + bmsk_s r12,r12,23 + add1.f 0,r12,r0 ; check for NaN + add1.cc.f r12,r12,r1 + j_s.d [blink] + cmp.cc r0,r1 + .balign 4 +.Lneg: breq.d r1,0,.L0 + add1.f 0,r12,r0 ; check for NaN + add1.cc.f r12,r12,r1 + j_s.d [blink] + cmp.cc r1,r0 + .balign 4 +.L0: bxor.f 0,r0,31 ; check for -0 + j_s.d [blink] + cmp.hi r1,r0 + ENDFUNC(__gesf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/gtdf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/gtdf2.S new file mode 100644 index 000000000..56c2a292e --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/gtdf2.S @@ -0,0 +1,86 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: DBL0, DBL1 + output: c,z flags to be used for 'hi' condition + clobber: r12, flags */ +/* For NaNs, bit 19.. bit 30 of the high word must be set. */ +#if 0 /* DEBUG */ + .global __gtdf2 + .balign 4 + FUNC(__gtdf2) +__gtdf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __gtdf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __gtdf2_asm` ld.ab r10,[sp,4] + pop_s blink + brgt.d r11,0,0f + ld.ab r11,[sp,4] + jls [blink] + bl abort +0: jhi [blink] + bl abort + ENDFUNC(__gtdf2) +#define __gtdf2 __gtdf2_asm +#endif /* DEBUG */ + .global __gtdf2 + .balign 4 + HIDDEN_FUNC(__gtdf2) +__gtdf2: + or.f r12,DBL0H,DBL1H + bmi.d .Lneg + bmsk_s r12,r12,20 + add1.f 0,r12,DBL0H ; clear z; set c iff NaN + add1.cc.f r12,r12,DBL1H ; clear z; set c iff NaN + ; don't care: z may or may not be cleared if there is no NaN event + cmp.cc DBL0H,DBL1H + j_s.d [blink] + cmp.eq DBL0L,DBL1L + .balign 4 +.Lneg: breq.d DBL0H,0,.L0 + add1.f 0,r12,DBL1H + add1.cc.f r12,r12,DBL0H + cmp.cc DBL1H,DBL0H + j_s.d [blink] + cmp.eq DBL1L,DBL0L + .balign 4 +.L0: + bxor.f 0,DBL1H,31 + beq_s .Lcheck_0 + cmp.cc DBL1H,DBL0H + j_s.d [blink] + cmp.eq DBL1L,DBL0L + .balign 4 +.Lcheck_0: + ; high words suggest DBL0 may be +0, DBL1 -0; check low words. + j_s.d [blink] + or.f 0,DBL0L,DBL1L + ENDFUNC(__gtdf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/gtsf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/gtsf2.S new file mode 100644 index 000000000..6253d06ee --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/gtsf2.S @@ -0,0 +1,75 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: r0, r1 + output: c, z flags to be used for 'hi' condition + clobber: r12,flags */ +/* For NaNs, bit 22.. bit 30 must be set. */ +#if 0 /* DEBUG */ + .global __gtsf2 + .balign 4 + FUNC(__gtsf2) +__gtsf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __gtsf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __gtsf2_asm` ld.ab r10,[sp,4] + pop_s blink + brgt.d r11,0,0f + ld.ab r11,[sp,4] + jls [blink] + bl abort +0: jhi [blink] + bl abort + ENDFUNC(__gtsf2) +#define __gtsf2 __gtsf2_asm +#endif /* DEBUG */ + .global __gtsf2 + .balign 4 + HIDDEN_FUNC(__gtsf2) +__gtsf2: + or.f r12,r0,r1 + bmi.d .Lneg + bmsk_s r12,r12,23 + add1.f 0,r12,r0 ; check for NaN + add1.cc.f r12,r12,r1 + j_s.d [blink] + cmp.cc r0,r1 + .balign 4 +.Lneg: breq.d r0,0,.L0 + add1.f 0,r12,r0 ; check for NaN + add1.cc.f r12,r12,r1 + j_s.d [blink] + cmp.cc r1,r0 + .balign 4 +.L0: bxor.f 0,r1,31 ; check for -0 + j_s.d [blink] + cmp.hi r1,r0 + ENDFUNC(__gtsf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/muldf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/muldf3.S new file mode 100644 index 000000000..7826fe75f --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/muldf3.S @@ -0,0 +1,235 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* XMAC schedule: directly back-to-back multiplies stall; the third + instruction after a multiply stalls unless it is also a multiply. */ +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __muldf3 + .balign 4 +__muldf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __muldf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __muldf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + b abort +#define __muldf3 __muldf3_asm +#endif /* DEBUG */ +/* N.B. This is optimized for ARC700. + ARC600 has very different scheduling / instruction selection criteria. */ +/* For the standard multiplier, instead of mpyu rx,DBL0L,DBL1L; tst rx,rx , + we can do: + sub rx,DBL0L,1; bic rx,DBL0L,rx; lsr rx,rx; norm rx,rx; asl.f 0,DBL1L,rx */ + +__muldf3_support: /* This label makes debugger output saner. */ +/* If one number is denormal, subtract some from the exponent of the other + one (if the other exponent is too small, return 0), and normalize the + denormal. Then re-run the computation. */ + .balign 4 + FUNC(__muldf3) +.Ldenorm_dbl0: + mov_s r12,DBL0L + mov_s DBL0L,DBL1L + mov_s DBL1L,r12 + mov_s r12,DBL0H + mov_s DBL0H,DBL1H + mov_s DBL1H,r12 + and r11,DBL0H,r9 +.Ldenorm_dbl1: + brhs r11,r9,.Linf_nan + brhs 0x3ca00001,r11,.Lret0 + sub_s DBL0H,DBL0H,DBL1H + bmsk_s DBL1H,DBL1H,30 + add_s DBL0H,DBL0H,DBL1H + breq_s DBL1H,0,.Ldenorm_2 + norm r12,DBL1H + + sub_s r12,r12,10 + asl r5,r12,20 + asl_s DBL1H,DBL1H,r12 + sub DBL0H,DBL0H,r5 + neg r5,r12 + lsr r6,DBL1L,r5 + asl_s DBL1L,DBL1L,r12 + b.d __muldf3 + add_s DBL1H,DBL1H,r6 + + .balign 4 +.Linf_nan: + bclr r12,DBL1H,31 + xor_s DBL1H,DBL1H,DBL0H + bclr_s DBL0H,DBL0H,31 + max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf + or.f 0,DBL0H,DBL0L + mov_s DBL0L,0 + or.ne.f DBL1L,DBL1L,r12 + not_s DBL0H,DBL0L ; inf * 0 -> NaN + mov.ne DBL0H,r8 + tst_s DBL1H,DBL1H + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + +.Lret0: xor_s DBL0H,DBL0H,DBL1H + bclr DBL1H,DBL0H,31 + xor_s DBL0H,DBL0H,DBL1H + j_s.d [blink] + mov_l DBL0L,0 + + .balign 4 +.Ldenorm_2: + breq_s DBL1L,0,.Lret0 ; 0 input -> 0 output + norm.f r12,DBL1L + + mov.mi r12,21 + add.pl r12,r12,22 + neg r11,r12 + asl_s r12,r12,20 + lsr.f DBL1H,DBL1L,r11 + ror DBL1L,DBL1L,r11 + sub_s DBL0H,DBL0H,r12 + mov.eq DBL1H,DBL1L + sub_s DBL1L,DBL1L,DBL1H + /* Fall through. */ + .global __muldf3 + .balign 4 +__muldf3: + ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)] + mpyhu r4,DBL0L,DBL1L + bmsk r6,DBL0H,19 + bset r6,r6,20 + mpyu r7,r6,DBL1L + and r11,DBL0H,r9 + breq r11,0,.Ldenorm_dbl0 + mpyhu r8,r6,DBL1L + bmsk r10,DBL1H,19 + bset r10,r10,20 + mpyhu r5,r10,DBL0L + add.f r4,r4,r7 + and r12,DBL1H,r9 + mpyhu r7,r6,r10 + breq r12,0,.Ldenorm_dbl1 + adc.f r5,r5,r8 + mpyu r8,r10,DBL0L + breq r11,r9,.Linf_nan + breq r12,r9,.Linf_nan + mpyu r6,r6,r10 + add.cs r7,r7,1 + add.f r4,r4,r8 + mpyu r10,DBL1L,DBL0L + bclr r8,r9,30 ; 0x3ff00000 + adc.f r5,r5,r6 + ; XMAC write-back stall / std. mult stall is one cycle later + bclr r6,r9,20 ; 0x7fe00000 + add.cs r7,r7,1 ; fraction product in r7:r5:r4 + tst r10,r10 + bset.ne r4,r4,0 ; put least significant word into sticky bit + lsr.f r10,r7,9 + add_l r12,r12,r11 ; add exponents + rsub.eq r8,r8,r9 ; 0x40000000 + sub r12,r12,r8 ; subtract bias + implicit 1 + brhs.d r12,r6,.Linf_denorm + rsub r10,r10,12 +.Lshift_frac: + neg r8,r10 + asl r6,r4,r10 + lsr DBL0L,r4,r8 + add.f 0,r6,r6 + btst.eq DBL0L,0 + cmp.eq r4,r4 ; round to nearest / round to even + asl r4,r5,r10 + lsr r5,r5,r8 + adc.f DBL0L,DBL0L,r4 + xor.f 0,DBL0H,DBL1H + asl r7,r7,r10 + add_s r12,r12,r5 + adc DBL0H,r12,r7 + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + +/* We have checked for infinity / NaN input before, and transformed + denormalized inputs into normalized inputs. Thus, the worst case + exponent overflows are: + 1 + 1 - 0x400 == 0xc02 : maximum underflow + 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow + N.B. 0x7e and 0x7f are also values for overflow. + + If (r12 <= -54), we have an underflow to zero. */ + .balign 4 +.Linf_denorm: + brlo r12,0xc0000000,.Linf + asr r6,r12,20 + mov_s r12,0 + add.f r10,r10,r6 + brgt r10,0,.Lshift_frac + beq_s .Lround_frac + add.f r10,r10,32 +.Lshift32_frac: + tst r4,r4 + mov r4,r5 + bset.ne r4,r4,1 + mov r5,r7 + mov r7,0 + brge r10,1,.Lshift_frac + breq r10,0,.Lround_frac + add.f r10,r10,32 + brgt r10,21,.Lshift32_frac + b_s .Lret0 + +.Lround_frac: + add.f 0,r4,r4 + btst.eq r5,0 + mov_s DBL0L,r5 + mov_s DBL0H,r7 + adc.eq.f DBL0L,DBL0L,0 + j_s.d [blink] + + adc.eq DBL0H,DBL0H,0 + +.Linf: xor.f DBL1H,DBL1H,DBL0H + mov_s DBL0L,0 + mov_s DBL0H,r9 + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + ENDFUNC(__muldf3) + + .balign 4 +.L7ff00000: + .long 0x7ff00000 diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/mulsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/mulsf3.S new file mode 100644 index 000000000..4bd82f49c --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/mulsf3.S @@ -0,0 +1,180 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* XMAC schedule: directly back-to-back multiplies stall; the third + instruction after a multiply stalls unless it is also a multiply. */ +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __mulsf3 + FUNC(__mulsf3) + .balign 4 +__mulsf3: + push_s blink + push_s r1 + bl.d __mulsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __mulsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + and r12,r0,r1 + bic.f 0,0x7f800000,r12 + bne 0f + bmsk.f 0,r0,22 + bmsk.ne.f r1,r1,22 + jne_s [blink] ; both NaN -> OK +0: bl abort + ENDFUNC(__mulsf3) +#define __mulsf3 __mulsf3_asm +#endif /* DEBUG */ + + .balign 4 + .global __mulsf3 + FUNC(__mulsf3) +__mulsf3: + ld.as r9,[pcl,79]; [pcl,((.L7f800000-.+2)/4)] + bmsk r4,r1,22 + bset r2,r0,23 + asl_s r2,r2,8 + bset r3,r4,23 + mpyhu r6,r2,r3 + and r11,r0,r9 + breq r11,0,.Ldenorm_dbl0 + mpyu r7,r2,r3 + breq r11,r9,.Linf_nan_dbl0 + and r12,r1,r9 + asl.f 0,r6,8 + breq r12,0,.Ldenorm_dbl1 +.Lpast_denorm: + xor_s r0,r0,r1 +.Lpast_denorm_dbl1: + add.pl r6,r6,r6 + bclr.pl r6,r6,23 + add.pl.f r7,r7,r7 + ld.as r4,[pcl,64]; [pcl,((.L7fffffff-.+2)/4)] + add.cs r6,r6,1 + lsr.f 0,r6,1 + breq r12,r9,.Linf_nan_dbl1 + add_s r12,r12,r11 + adc.f 0,r7,r4 + add_s r12,r12, \ + -0x3f800000 + adc.f r8,r6,r12 + bic r0,r0,r4 + tst.pl r8,r9 + min r3,r8,r9 + jpnz.d [blink] + add.pnz r0,r0,r3 +; infinity or denormal number + add.ne.f r3,r3,r3 + bpnz .Linfinity + asr_s r3,r3,23+1 + bset r6,r6,23 + sub_s r3,r3,1 + neg_s r2,r3 + brhi r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0 + lsr r2,r6,r2 + asl r9,r6,r3 + lsr.f 0,r2,1 + tst r7,r7 + add_s r0,r0,r2 + bset.ne r9,r9,0 + adc.f 0,r9,r4 + j_s.d [blink] + add.cs r0,r0,1 +.Linfinity: + j_s.d [blink] + add_s r0,r0,r9 + +.Lret_r0: j_s [blink] + + .balign 4 +.Linf_nan_dbl0: + sub_s r2,r1,1 ; inf/nan * 0 -> nan; inf * nan -> nan (use |r2| >= inf) + bic.f 0,r9,r2 + xor_s r0,r0,r1 + bclr_s r1,r1,31 + xor_s r0,r0,r1 + jne_s [blink] +.Lretnan: + j_s.d [blink] + mov r0,-1 +.Ldenorm_dbl0_inf_nan_dbl1: + bmsk.f 0,r0,30 + beq_s .Lretnan + xor_s r0,r0,r1 +.Linf_nan_dbl1: + xor_s r1,r1,r0 + bclr_s r1,r1,31 + j_s.d [blink] + xor_s r0,r0,r1 + + .balign 4 +.Ldenorm_dbl0: + bclr_s r2,r2,31 + norm.f r4,r2 + and r12,r1,r9 + add_s r2,r2,r2 + asl r2,r2,r4 + asl r4,r4,23 + mpyhu r6,r2,r3 + breq r12,r9,.Ldenorm_dbl0_inf_nan_dbl1 + sub.ne.f r12,r12,r4 + mpyu r7,r2,r3 + bhi.d .Lpast_denorm + asl.f 0,r6,8 + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_l r0,r0,r1 + + .balign 4 +.Ldenorm_dbl1: + norm.f r3,r4 + xor_s r0,r0,r1 + sub_s r3,r3,7 + asl r4,r4,r3 + sub_s r3,r3,1 + asl_s r3,r3,23 + mpyhu r6,r2,r4 + sub.ne.f r11,r11,r3 + bmsk r8,r0,30 + mpyu r7,r2,r4 + bhi.d .Lpast_denorm_dbl1 + asl.f 0,r6,8 + j_s.d [blink] + bic r0,r0,r8 + + .balign 4 +.L7f800000: + .long 0x7f800000 +.L7fffffff: + .long 0x7fffffff + ENDFUNC(__mulsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/orddf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/orddf2.S new file mode 100644 index 000000000..5fcdf4cf9 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/orddf2.S @@ -0,0 +1,63 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: r0, r1 + output: c flag + clobber: r12, flags + For NaNs, bit 19 .. bit 30 must be set. */ +#if 0 /* DEBUG */ + .global __orddf2 + .balign 4 + FUNC(__orddf2) +__orddf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __unorddf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __orddf2_asm` ld.ab r10,[sp,4] + pop_s blink + brne.d r11,0,0f + ld.ab r11,[sp,4] + jcc [blink] + bl abort +0: jcs [blink] + bl abort + ENDFUNC(__orddf2) +#define __orddf2 __orddf2_asm +#endif /* DEBUG */ + .global __orddf2 + .balign 4 + HIDDEN_FUNC(__orddf2) +__orddf2: + bmsk r12,DBL0H,20 + add1.f r12,r12,DBL0H /* clear z; set c if NaN. */ + bmsk r12,DBL1H,20 + j_s.d [blink] + add1.cc.f r12,r12,DBL1H /* clear z; set c if NaN. */ + ENDFUNC(__orddf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/ordsf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/ordsf2.S new file mode 100644 index 000000000..de764a164 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/ordsf2.S @@ -0,0 +1,63 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: r0, r1 + output: c flag + clobber: r12, flags + For NaNs, bit 22 .. bit 30 must be set. */ +#if 0 /* DEBUG */ + .global __ordsf2 + .balign 4 + FUNC(__ordsf2) +__ordsf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __unordsf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __ordsf2_asm` ld.ab r10,[sp,4] + pop_s blink + brne.d r11,0,0f + ld.ab r11,[sp,4] + jcc [blink] + bl abort +0: jcs [blink] + bl abort + ENDFUNC(__ordsf2) +#define __ordsf2 __ordsf2_asm +#endif /* DEBUG */ + .global __ordsf2 + .balign 4 + HIDDEN_FUNC(__ordsf2) +__ordsf2: + bmsk r12,r0,23 + add1.f r12,r12,r0 /* clear z; set c if NaN. */ + bmsk r12,r1,23 + j_s.d [blink] + add1.cc.f r12,r12,r1 /* clear z; set c if NaN. */ + ENDFUNC(__ordsf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/truncdfsf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/truncdfsf2.S new file mode 100644 index 000000000..87f40e486 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/truncdfsf2.S @@ -0,0 +1,134 @@ +/* Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + FUNC(__truncdfsf2) + .global __truncdfsf2 + .balign 4 +__truncdfsf2: + push_s blink + push_s r0 + bl.d __truncdfsf2_c + push_s r1 + mov_s r2,r0 + pop_s r1 + ld r0,[sp] + bl.d __truncdfsf2_asm + st r2,[sp] + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + and r12,r0,r1 + bic.f 0,0x7f800000,r12 + bne 0f + bmsk.f 0,r0,22 + bmsk.ne.f r1,r1,22 + jne_s [blink] ; both NaN -> OK +0: bl abort + ENDFUNC(__truncdfsf2) +#define __truncdfsf2 __truncdfsf2_asm +#endif /* DEBUG */ + + .global __truncdfsf2 + .balign 4 + FUNC(__truncdfsf2) +__truncdfsf2: + lsr r2,DBL0H,20 + asl_s DBL0H,DBL0H,12 + sub r12,r2,0x380 + bclr.f r3,r12,11 + brhs r3,0xff,.Lill_exp + beq_l .Ldenorm0 + asl_s r12,r12,23 + tst DBL0L, \ + 0x2fffffff /* Check if msb guard bit wants rounding up. */ + lsr_s DBL0L,DBL0L,28 + lsr_s DBL0H,DBL0H,8 + add.ne DBL0L,DBL0L,1 + add_s DBL0H,DBL0H,DBL0L + lsr_s DBL0H,DBL0H + btst_s r2,11 + add_s r0,DBL0H,r12 + j_s.d [blink] + bxor.ne r0,r0,31 + .balign 4 +.Lill_exp: + bbit1 r2,10,.Linf_nan + bmsk_s r12,r12,9 + rsub.f r12,r12,8+0x400-32 ; Go from 9 to 1 guard bit in MSW. */ + bhs_s .Lzero + lsr r3,DBL0L,21 + rrc DBL0H,DBL0H ; insert leading 1 + asl.f 0,DBL0L,8 ; check lower 24 guard bits + add_s r3,DBL0H,r3 + add.pnz r3,r3,1 ; assemble fraction with compressed guard bits. + lsr r0,r3,r12 + neg_s r12,r12 + btst_s r0,1 + asl.eq.f r3,r3,r12 + add.ne r0,r0,1 + btst_s r2,11 + lsr_s r0,r0 + j_s.d [blink] + bxor.ne r0,r0,31 +.Lzero: + lsr_s r2,r2,11 + j_s.d [blink] + asl r0,r2,31 +.Ldenorm0: + asl_s r12,r12,20 + tst DBL0L, \ + 0x5fffffff /* Check if msb guard bit wants rounding up. */ + lsr_s DBL0L,DBL0L,29 + lsr_s DBL0H,DBL0H,9 + add.ne DBL0L,DBL0L,1 + bset_s DBL0H,DBL0H,23 + add_s DBL0H,DBL0H,DBL0L + lsr_s DBL0H,DBL0H + j_s.d [blink] + add_l r0,DBL0H,r12 + +/* We would generally say that NaNs must have a non-zero high fraction part, + but to allow hardware double precision floating point to interoperate + with single precision software floating point, we make an exception here. + The cost is to replace a tst_s DBL0H with an or.f DBL0L,DBL0L,DBL0H . + As we start out unaligned, and there is an odd number of other short insns, + we have a choice of letting this cost us a misalign penalty or + 4 more bytes (if we align the code). We choose the former here because + infinity / NaN is not expected to be prevalent in time-critical code. */ +.Linf_nan: + or.f DBL0L,DBL0L,DBL0H + mov_s r0,1 + add.ne r2,r2,1 + tst r2,0x7ff + asl.ne r0,r0,23 + btst_s r12,11 + neg r0,r0 + j_s.d [blink] + bxor.eq r0,r0,31 + ENDFUNC(__truncdfsf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/uneqdf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/uneqdf2.S new file mode 100644 index 000000000..3b9c7c902 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/uneqdf2.S @@ -0,0 +1,73 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: DBL0, DBL1 + output: z flag + clobber: r12, flags + For NaNs, bit 19.. bit 30 of the high word must be set. */ +#if 0 /* DEBUG */ + .global __uneqdf2 + .balign 4 + FUNC(__uneqdf2) +__uneqdf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __eqdf2_c` push_s r0 + push_s r0` ld_s r0, [sp,4]` ld_s r1, [sp,8]` ld_s r2,[sp,12] + bl.d __unorddf2_c` ld_s r3,[sp,16] + ld.ab r11,[sp,4]` tst r0,r0` mov.ne r11,0 + pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __uneqdf2_asm` ld.ab r10,[sp,4] + pop_s blink + breq.d r11,0,0f + ld.ab r11,[sp,4] + jne_s [blink] + bl abort +0: jeq_s [blink] + bl abort + ENDFUNC(__uneqdf2) +#define __uneqdf2 __uneqdf2_asm +#endif /* DEBUG */ + .global __uneqdf2 + .balign 4 + HIDDEN_FUNC(__uneqdf2) +__uneqdf2: + cmp_s DBL0H,DBL1H + cmp.eq DBL0L,DBL1L + jeq_s [blink] + or r12,DBL0H,DBL1H + or.f 0,DBL0L,DBL1L + bclr.eq.f r12,r12,31 + jeq_s [blink] + mov_s r12, \ + 0x7ff80000 + bic.f 0,r12,DBL0H + j_s.d [blink] + bic.ne.f r12,r12,DBL1H + ENDFUNC(__uneqdf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/uneqsf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/uneqsf2.S new file mode 100644 index 000000000..b89d7494a --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/uneqsf2.S @@ -0,0 +1,69 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: r0, r1 + output: z flag + clobber: r12, flags + For NaNs, bit 22 .. bit 30 must be set. */ +#if 0 /* DEBUG */ + .global __uneqsf2 + .balign 4 + FUNC(__uneqsf2) +__uneqsf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __eqsf2_c` push_s r0 + push_s r0` ld_s r0, [sp,4] + bl.d __unordsf2_c` ld_s r1,[sp,8] + ld.ab r11,[sp,4]` tst r0,r0` mov.ne r11,0 + pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __uneqsf2_asm` ld.ab r10,[sp,4] + pop_s blink + breq.d r11,0,0f + ld.ab r11,[sp,4] + jne_s [blink] + bl abort +0: jeq_s [blink] + bl abort + ENDFUNC(__uneqsf2) +#define __uneqsf2 __uneqsf2_asm +#endif /* DEBUG */ + .global __uneqsf2 + .balign 4 + HIDDEN_FUNC(__uneqsf2) +__uneqsf2: + mov_s r12, \ + 0x7fc00000 + bic.f 0,r12,r0 + bic.ne.f r12,r12,r1 + or r12,r0,r1 + bmsk.ne.f r12,r12,30 + j_s.d [blink] + cmp.ne r0,r1 + ENDFUNC(__uneqsf2) |