aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/libgcc/config/epiphany/ieee-754/fast_div.S
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/libgcc/config/epiphany/ieee-754/fast_div.S')
-rw-r--r--gcc-4.9/libgcc/config/epiphany/ieee-754/fast_div.S124
1 files changed, 124 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/epiphany/ieee-754/fast_div.S b/gcc-4.9/libgcc/config/epiphany/ieee-754/fast_div.S
new file mode 100644
index 000000000..06b850609
--- /dev/null
+++ b/gcc-4.9/libgcc/config/epiphany/ieee-754/fast_div.S
@@ -0,0 +1,124 @@
+/* Copyright (C) 2011-2014 Free Software Foundation, Inc.
+ Contributed by Embecosm on behalf of Adapteva, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "../epiphany-asm.h"
+
+.section _fast_div_text,"a",@progbits;
+ .balign 8;
+_fast_div_table:
+.word 0x007fffff// mantissa mask
+.word 0x40257ebb// hold constant a = 2.58586
+
+.word 0x3f000000// hold constant 126 shifted to bits [30:23]
+.word 0xc0ba2e88// hold constant b = -5.81818
+
+.word 0x4087c1e8// hold constant c = 4.24242
+.word 0x40000000// to hold constant 2 for Newton-Raphson iterations
+
+ .global SYM(__fast_recipsf2)
+ FUNC(__fast_recipsf2)
+SYM(__fast_recipsf2):
+
+//###################
+//# input operands:
+//###################
+// Divisor
+//R0
+// Function address (used with negative offsets to read _fast_div_table)
+//R1
+/* Scratch registers: two single (TMP0/TMP5) and two pairs. */
+#define P0L TMP1
+#define P0H TMP2
+#define P1L TMP3
+#define P1H TMP4
+
+//#########################################
+//# Constants to be used in the algorithm
+//#########################################
+ldrd P0L , [ R1 , -3 ]
+
+ldrd P1L , [ R1 , -2 ]
+
+
+
+//#############################################################################
+//# The Algorithm
+//#
+//# Operation: C=A/B
+//# stage 1 - find the reciprocal 1/B according to the following scheme:
+//# B = (2^E)*m (1<m<2, E=e-127)
+//# 1/B = 1/((2^E)*m) = 1/((2^(E+1))*m1) (0.5<m1<1)
+//# = (2^-(E+1))*(1/m1) = (2^E1)*(1/m1)
+//#
+//# Now we can find the new exponent:
+//# e1 = E1+127 = -E-1+127 = -e+127-1+127 = 253-e **
+//# 1/m1 alreadt has the exponent 127, so we have to add 126-e.
+//# the exponent might underflow, which we can detect as a sign change.
+//# Since the architeture uses flush-to-zero for subnormals, we can
+//# give the result 0. then.
+//#
+//# The 1/m1 term with 0.5<m1<1 is approximated with the Chebyshev polynomial
+//# 1/m1 = 2.58586*(m1^2) - 5.81818*m1 + 4.24242
+//#
+//# Next step is to use two iterations of Newton-Raphson algorithm to complete
+//# the reciprocal calculation.
+//#
+//# Final result is achieved by multiplying A with 1/B
+//#############################################################################
+
+
+
+// R0 exponent and sign "replacement" into TMP0
+AND TMP0,R0,P0L ;
+ORR TMP0,TMP0,P1L
+SUB TMP5,R0,TMP0 // R0 sign/exponent extraction into TMP5
+// Calculate new mantissa
+FMADD P1H,TMP0,P0H ;
+ // Calculate new exponent offset 126 - "old exponent"
+ SUB P1L,P1L,TMP5
+ ldrd P0L , [ R1 , -1 ]
+FMADD P0L,TMP0,P1H ;
+ eor P1H,r0,P1L // check for overflow (N-BIT).
+ blt .Lret_0
+// P0L exponent and sign "replacement"
+sub P0L,P0L,TMP5
+
+// Newton-Raphson iteration #1
+MOV TMP0,P0H ;
+FMSUB P0H,R0,P0L ;
+FMUL P0L,P0H,P0L ;
+// Newton-Raphson iteration #2
+FMSUB TMP0,R0,P0L ;
+FMUL R0,TMP0,P0L ;
+jr lr
+.Lret_0:ldrd P0L , [ R1 , -3 ]
+ lsr TMP0,r0,31 ; extract sign
+ lsl TMP0,TMP0,31
+ add P0L,P0L,r0 ; check for NaN input
+ eor P0L,P0L,r0
+ movgte r0,TMP0
+ jr lr
+// Quotient calculation is expected by the caller: FMUL quotient,divident,R0
+ ;
+ ENDFUNC(__fast_recipsf2)