diff options
Diffstat (limited to 'gcc-4.9/libgcc/config/epiphany/ieee-754/fast_div.S')
-rw-r--r-- | gcc-4.9/libgcc/config/epiphany/ieee-754/fast_div.S | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/epiphany/ieee-754/fast_div.S b/gcc-4.9/libgcc/config/epiphany/ieee-754/fast_div.S new file mode 100644 index 000000000..06b850609 --- /dev/null +++ b/gcc-4.9/libgcc/config/epiphany/ieee-754/fast_div.S @@ -0,0 +1,124 @@ +/* Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by Embecosm on behalf of Adapteva, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../epiphany-asm.h" + +.section _fast_div_text,"a",@progbits; + .balign 8; +_fast_div_table: +.word 0x007fffff// mantissa mask +.word 0x40257ebb// hold constant a = 2.58586 + +.word 0x3f000000// hold constant 126 shifted to bits [30:23] +.word 0xc0ba2e88// hold constant b = -5.81818 + +.word 0x4087c1e8// hold constant c = 4.24242 +.word 0x40000000// to hold constant 2 for Newton-Raphson iterations + + .global SYM(__fast_recipsf2) + FUNC(__fast_recipsf2) +SYM(__fast_recipsf2): + +//################### +//# input operands: +//################### +// Divisor +//R0 +// Function address (used with negative offsets to read _fast_div_table) +//R1 +/* Scratch registers: two single (TMP0/TMP5) and two pairs. */ +#define P0L TMP1 +#define P0H TMP2 +#define P1L TMP3 +#define P1H TMP4 + +//######################################### +//# Constants to be used in the algorithm +//######################################### +ldrd P0L , [ R1 , -3 ] + +ldrd P1L , [ R1 , -2 ] + + + +//############################################################################# +//# The Algorithm +//# +//# Operation: C=A/B +//# stage 1 - find the reciprocal 1/B according to the following scheme: +//# B = (2^E)*m (1<m<2, E=e-127) +//# 1/B = 1/((2^E)*m) = 1/((2^(E+1))*m1) (0.5<m1<1) +//# = (2^-(E+1))*(1/m1) = (2^E1)*(1/m1) +//# +//# Now we can find the new exponent: +//# e1 = E1+127 = -E-1+127 = -e+127-1+127 = 253-e ** +//# 1/m1 alreadt has the exponent 127, so we have to add 126-e. +//# the exponent might underflow, which we can detect as a sign change. +//# Since the architeture uses flush-to-zero for subnormals, we can +//# give the result 0. then. +//# +//# The 1/m1 term with 0.5<m1<1 is approximated with the Chebyshev polynomial +//# 1/m1 = 2.58586*(m1^2) - 5.81818*m1 + 4.24242 +//# +//# Next step is to use two iterations of Newton-Raphson algorithm to complete +//# the reciprocal calculation. +//# +//# Final result is achieved by multiplying A with 1/B +//############################################################################# + + + +// R0 exponent and sign "replacement" into TMP0 +AND TMP0,R0,P0L ; +ORR TMP0,TMP0,P1L +SUB TMP5,R0,TMP0 // R0 sign/exponent extraction into TMP5 +// Calculate new mantissa +FMADD P1H,TMP0,P0H ; + // Calculate new exponent offset 126 - "old exponent" + SUB P1L,P1L,TMP5 + ldrd P0L , [ R1 , -1 ] +FMADD P0L,TMP0,P1H ; + eor P1H,r0,P1L // check for overflow (N-BIT). + blt .Lret_0 +// P0L exponent and sign "replacement" +sub P0L,P0L,TMP5 + +// Newton-Raphson iteration #1 +MOV TMP0,P0H ; +FMSUB P0H,R0,P0L ; +FMUL P0L,P0H,P0L ; +// Newton-Raphson iteration #2 +FMSUB TMP0,R0,P0L ; +FMUL R0,TMP0,P0L ; +jr lr +.Lret_0:ldrd P0L , [ R1 , -3 ] + lsr TMP0,r0,31 ; extract sign + lsl TMP0,TMP0,31 + add P0L,P0L,r0 ; check for NaN input + eor P0L,P0L,r0 + movgte r0,TMP0 + jr lr +// Quotient calculation is expected by the caller: FMUL quotient,divident,R0 + ; + ENDFUNC(__fast_recipsf2) |