aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/libgcc/config/xtensa/ieee754-sf.S
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/libgcc/config/xtensa/ieee754-sf.S')
-rw-r--r--gcc-4.9/libgcc/config/xtensa/ieee754-sf.S1757
1 files changed, 1757 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/xtensa/ieee754-sf.S b/gcc-4.9/libgcc/config/xtensa/ieee754-sf.S
new file mode 100644
index 000000000..e96785c3f
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/ieee754-sf.S
@@ -0,0 +1,1757 @@
+/* IEEE-754 single-precision functions for Xtensa
+ Copyright (C) 2006-2014 Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/* Warning! The branch displacements for some Xtensa branch instructions
+ are quite small, and this code has been carefully laid out to keep
+ branch targets in range. If you change anything, be sure to check that
+ the assembler is not relaxing anything to branch over a jump. */
+
+#ifdef L_negsf2
+
+ .align 4
+ .global __negsf2
+ .type __negsf2, @function
+__negsf2:
+ leaf_entry sp, 16
+ movi a4, 0x80000000
+ xor a2, a2, a4
+ leaf_return
+
+#endif /* L_negsf2 */
+
+#ifdef L_addsubsf3
+
+ /* Addition */
+__addsf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Ladd_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall a3, a6, 1f
+ /* If x is a NaN, return it. Otherwise, return y. */
+ slli a7, a2, 9
+ beqz a7, .Ladd_ynan_or_inf
+1: leaf_return
+
+.Ladd_ynan_or_inf:
+ /* Return y. */
+ mov a2, a3
+ leaf_return
+
+.Ladd_opposite_signs:
+ /* Operand signs differ. Do a subtraction. */
+ slli a7, a6, 8
+ xor a3, a3, a7
+ j .Lsub_same_sign
+
+ .align 4
+ .global __addsf3
+ .type __addsf3, @function
+__addsf3:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, a2, a3
+ bltz a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:
+ /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
+ ball a2, a6, .Ladd_xnan_or_inf
+ ball a3, a6, .Ladd_ynan_or_inf
+
+ /* Compare the exponents. The smaller operand will be shifted
+ right by the exponent difference and added to the larger
+ one. */
+ extui a7, a2, 23, 9
+ extui a8, a3, 23, 9
+ bltu a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone a3, a6, .Ladd_yexpzero
+
+ /* Replace y sign/exponent with 0x008. */
+ or a3, a3, a6
+ slli a3, a3, 8
+ srli a3, a3, 8
+
+.Ladd_yexpdiff:
+ /* Compute the exponent difference. */
+ sub a10, a7, a8
+
+ /* Exponent difference > 32 -- just return the bigger value. */
+ bgeui a10, 32, 1f
+
+ /* Shift y right by the exponent difference. Any bits that are
+ shifted out of y are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, a3, a9
+ srl a3, a3
+
+ /* Do the addition. */
+ add a2, a2, a3
+
+ /* Check if the add overflowed into the exponent. */
+ extui a10, a2, 23, 9
+ beq a10, a7, .Ladd_round
+ mov a8, a7
+ j .Ladd_carry
+
+.Ladd_yexpzero:
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0", and increment the apparent exponent
+ because subnormals behave as if they had the minimum (nonzero)
+ exponent. Test for the case when both exponents are zero. */
+ slli a3, a3, 9
+ srli a3, a3, 9
+ bnone a2, a6, .Ladd_bothexpzero
+ addi a8, a8, 1
+ j .Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+ /* Both exponents are zero. Handle this as a special case. There
+ is no need to shift or round, and the normal code for handling
+ a carry into the exponent field will not work because it
+ assumes there is an implicit "1.0" that needs to be added. */
+ add a2, a2, a3
+1: leaf_return
+
+.Ladd_xexpzero:
+ /* Same as "yexpzero" except skip handling the case when both
+ exponents are zero. */
+ slli a2, a2, 9
+ srli a2, a2, 9
+ addi a7, a7, 1
+ j .Ladd_xexpdiff
+
+.Ladd_shiftx:
+ /* Same thing as the "shifty" code, but with x and y swapped. Also,
+ because the exponent difference is always nonzero in this version,
+ the shift sequence can use SLL and skip loading a constant zero. */
+ bnone a2, a6, .Ladd_xexpzero
+
+ or a2, a2, a6
+ slli a2, a2, 8
+ srli a2, a2, 8
+
+.Ladd_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Ladd_returny
+
+ ssr a10
+ sll a9, a2
+ srl a2, a2
+
+ add a2, a2, a3
+
+ /* Check if the add overflowed into the exponent. */
+ extui a10, a2, 23, 9
+ bne a10, a8, .Ladd_carry
+
+.Ladd_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi a2, a2, 1
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_returny:
+ mov a2, a3
+ leaf_return
+
+.Ladd_carry:
+ /* The addition has overflowed into the exponent field, so the
+ value needs to be renormalized. The mantissa of the result
+ can be recovered by subtracting the original exponent and
+ adding 0x800000 (which is the explicit "1.0" for the
+ mantissa of the non-shifted operand -- the "1.0" for the
+ shifted operand was already added). The mantissa can then
+ be shifted right by one bit. The explicit "1.0" of the
+ shifted mantissa then needs to be replaced by the exponent,
+ incremented by one to account for the normalizing shift.
+ It is faster to combine these operations: do the shift first
+ and combine the additions and subtractions. If x is the
+ original exponent, the result is:
+ shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
+ or:
+ shifted mantissa + ((x + 1) << 22)
+ Note that the exponent is incremented here by leaving the
+ explicit "1.0" of the mantissa in the exponent field. */
+
+ /* Shift x right by one bit. Save the lsb. */
+ mov a10, a2
+ srli a2, a2, 1
+
+ /* See explanation above. The original exponent is in a8. */
+ addi a8, a8, 1
+ slli a8, a8, 22
+ add a2, a2, a8
+
+ /* Return an Infinity if the exponent overflowed. */
+ ball a2, a6, .Ladd_infinity
+
+ /* Same thing as the "round" code except the msb of the leftover
+ fraction is bit 0 of a10, with the rest of the fraction in a9. */
+ bbci.l a10, 0, 1f
+ addi a2, a2, 1
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_infinity:
+ /* Clear the mantissa. */
+ srli a2, a2, 23
+ slli a2, a2, 23
+
+ /* The sign bit may have been lost in a carry-out. Put it back. */
+ slli a8, a8, 1
+ or a2, a2, a8
+ leaf_return
+
+.Ladd_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+
+ /* Subtraction */
+__subsf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Lsub_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall a3, a6, 1f
+ /* Both x and y are either NaN or Inf, so the result is NaN. */
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+1: leaf_return
+
+.Lsub_ynan_or_inf:
+ /* Negate y and return it. */
+ slli a7, a6, 8
+ xor a2, a3, a7
+ leaf_return
+
+.Lsub_opposite_signs:
+ /* Operand signs differ. Do an addition. */
+ slli a7, a6, 8
+ xor a3, a3, a7
+ j .Ladd_same_sign
+
+ .align 4
+ .global __subsf3
+ .type __subsf3, @function
+__subsf3:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, a2, a3
+ bltz a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:
+ /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
+ ball a2, a6, .Lsub_xnan_or_inf
+ ball a3, a6, .Lsub_ynan_or_inf
+
+ /* Compare the operands. In contrast to addition, the entire
+ value matters here. */
+ extui a7, a2, 23, 8
+ extui a8, a3, 23, 8
+ bltu a2, a3, .Lsub_xsmaller
+
+.Lsub_ysmaller:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone a3, a6, .Lsub_yexpzero
+
+ /* Replace y sign/exponent with 0x008. */
+ or a3, a3, a6
+ slli a3, a3, 8
+ srli a3, a3, 8
+
+.Lsub_yexpdiff:
+ /* Compute the exponent difference. */
+ sub a10, a7, a8
+
+ /* Exponent difference > 32 -- just return the bigger value. */
+ bgeui a10, 32, 1f
+
+ /* Shift y right by the exponent difference. Any bits that are
+ shifted out of y are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, a3, a9
+ srl a3, a3
+
+ sub a2, a2, a3
+
+ /* Subtract the leftover bits in a9 from zero and propagate any
+ borrow from a2. */
+ neg a9, a9
+ addi a10, a2, -1
+ movnez a2, a10, a9
+
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, a2, 23, 8
+ beq a10, a7, .Lsub_round
+ j .Lsub_borrow
+
+.Lsub_yexpzero:
+ /* Return zero if the inputs are equal. (For the non-subnormal
+ case, subtracting the "1.0" will cause a borrow from the exponent
+ and this case can be detected when handling the borrow.) */
+ beq a2, a3, .Lsub_return_zero
+
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0". Unless x is also a subnormal, increment
+ y's apparent exponent because subnormals behave as if they had
+ the minimum (nonzero) exponent. */
+ slli a3, a3, 9
+ srli a3, a3, 9
+ bnone a2, a6, .Lsub_yexpdiff
+ addi a8, a8, 1
+ j .Lsub_yexpdiff
+
+.Lsub_returny:
+ /* Negate and return y. */
+ slli a7, a6, 8
+ xor a2, a3, a7
+1: leaf_return
+
+.Lsub_xsmaller:
+ /* Same thing as the "ysmaller" code, but with x and y swapped and
+ with y negated. */
+ bnone a2, a6, .Lsub_xexpzero
+
+ or a2, a2, a6
+ slli a2, a2, 8
+ srli a2, a2, 8
+
+.Lsub_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Lsub_returny
+
+ ssr a10
+ movi a9, 0
+ src a9, a2, a9
+ srl a2, a2
+
+ /* Negate y. */
+ slli a11, a6, 8
+ xor a3, a3, a11
+
+ sub a2, a3, a2
+
+ neg a9, a9
+ addi a10, a2, -1
+ movnez a2, a10, a9
+
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, a2, 23, 8
+ bne a10, a8, .Lsub_borrow
+
+.Lsub_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi a2, a2, 1
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Lsub_exactlyhalf
+1: leaf_return
+
+.Lsub_xexpzero:
+ /* Same as "yexpzero". */
+ beq a2, a3, .Lsub_return_zero
+ slli a2, a2, 9
+ srli a2, a2, 9
+ bnone a3, a6, .Lsub_xexpdiff
+ addi a7, a7, 1
+ j .Lsub_xexpdiff
+
+.Lsub_return_zero:
+ movi a2, 0
+ leaf_return
+
+.Lsub_borrow:
+ /* The subtraction has underflowed into the exponent field, so the
+ value needs to be renormalized. Shift the mantissa left as
+ needed to remove any leading zeros and adjust the exponent
+ accordingly. If the exponent is not large enough to remove
+ all the leading zeros, the result will be a subnormal value. */
+
+ slli a8, a2, 9
+ beqz a8, .Lsub_xzero
+ do_nsau a6, a8, a7, a11
+ srli a8, a8, 9
+ bge a6, a10, .Lsub_subnormal
+ addi a6, a6, 1
+
+.Lsub_normalize_shift:
+ /* Shift the mantissa (a8/a9) left by a6. */
+ ssl a6
+ src a8, a8, a9
+ sll a9, a9
+
+ /* Combine the shifted mantissa with the sign and exponent,
+ decrementing the exponent by a6. (The exponent has already
+ been decremented by one due to the borrow from the subtraction,
+ but adding the mantissa will increment the exponent by one.) */
+ srli a2, a2, 23
+ sub a2, a2, a6
+ slli a2, a2, 23
+ add a2, a2, a8
+ j .Lsub_round
+
+.Lsub_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+.Lsub_xzero:
+ /* If there was a borrow from the exponent, and the mantissa and
+ guard digits are all zero, then the inputs were equal and the
+ result should be zero. */
+ beqz a9, .Lsub_return_zero
+
+ /* Only the guard digit is nonzero. Shift by min(24, a10). */
+ addi a11, a10, -24
+ movi a6, 24
+ movltz a6, a10, a11
+ j .Lsub_normalize_shift
+
+.Lsub_subnormal:
+ /* The exponent is too small to shift away all the leading zeros.
+ Set a6 to the current exponent (which has already been
+ decremented by the borrow) so that the exponent of the result
+ will be zero. Do not add 1 to a6 in this case, because: (1)
+ adding the mantissa will not increment the exponent, so there is
+ no need to subtract anything extra from the exponent to
+ compensate, and (2) the effective exponent of a subnormal is 1
+ not 0 so the shift amount must be 1 smaller than normal. */
+ mov a6, a10
+ j .Lsub_normalize_shift
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_mulsf3
+
+ /* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__mulsf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Lmul_xexpzero:
+ /* Clear the sign bit of x. */
+ slli a2, a2, 1
+ srli a2, a2, 1
+
+ /* If x is zero, return zero. */
+ beqz a2, .Lmul_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ do_nsau a10, a2, a11, a12
+ addi a10, a10, -8
+ ssl a10
+ sll a2, a2
+ movi a8, 1
+ sub a8, a8, a10
+ j .Lmul_xnormalized
+
+.Lmul_yexpzero:
+ /* Clear the sign bit of y. */
+ slli a3, a3, 1
+ srli a3, a3, 1
+
+ /* If y is zero, return zero. */
+ beqz a3, .Lmul_return_zero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ do_nsau a10, a3, a11, a12
+ addi a10, a10, -8
+ ssl a10
+ sll a3, a3
+ movi a9, 1
+ sub a9, a9, a10
+ j .Lmul_ynormalized
+
+.Lmul_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ j .Lmul_done
+
+.Lmul_xnan_or_inf:
+ /* If y is zero, return NaN. */
+ slli a8, a3, 1
+ bnez a8, 1f
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+ j .Lmul_done
+1:
+ /* If y is NaN, return y. */
+ bnall a3, a6, .Lmul_returnx
+ slli a8, a3, 9
+ beqz a8, .Lmul_returnx
+
+.Lmul_returny:
+ mov a2, a3
+
+.Lmul_returnx:
+ /* Set the sign bit and return. */
+ extui a7, a7, 31, 1
+ slli a2, a2, 1
+ ssai 1
+ src a2, a7, a2
+ j .Lmul_done
+
+.Lmul_ynan_or_inf:
+ /* If x is zero, return NaN. */
+ slli a8, a2, 1
+ bnez a8, .Lmul_returny
+ movi a7, 0x400000 /* make it a quiet NaN */
+ or a2, a3, a7
+ j .Lmul_done
+
+ .align 4
+ .global __mulsf3
+ .type __mulsf3, @function
+__mulsf3:
+#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
+ addi sp, sp, -32
+ s32i a12, sp, 16
+ s32i a13, sp, 20
+ s32i a14, sp, 24
+ s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 64
+#else
+ leaf_entry sp, 32
+#endif
+ movi a6, 0x7f800000
+
+ /* Get the sign of the result. */
+ xor a7, a2, a3
+
+ /* Check for NaN and infinity. */
+ ball a2, a6, .Lmul_xnan_or_inf
+ ball a3, a6, .Lmul_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, a2, 23, 8
+ extui a9, a3, 23, 8
+
+ beqz a8, .Lmul_xexpzero
+.Lmul_xnormalized:
+ beqz a9, .Lmul_yexpzero
+.Lmul_ynormalized:
+
+ /* Add the exponents. */
+ add a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0xffffff
+ or a2, a2, a6
+ and a2, a2, a10
+ or a3, a3, a6
+ and a3, a3, a10
+
+ /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
+
+#if XCHAL_HAVE_MUL32_HIGH
+
+ mull a6, a2, a3
+ muluh a2, a2, a3
+
+#else
+
+ /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
+ products. These partial products are:
+
+ 0 xl * yl
+
+ 1 xl * yh
+ 2 xh * yl
+
+ 3 xh * yh
+
+ If using the Mul16 or Mul32 multiplier options, these input
+ chunks must be stored in separate registers. For Mac16, the
+ UMUL.AA.* opcodes can specify that the inputs come from either
+ half of the registers, so there is no need to shift them out
+ ahead of time. If there is no multiply hardware, the 16-bit
+ chunks can be extracted when setting up the arguments to the
+ separate multiply function. */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Calling a separate multiply function will clobber a0 and requires
+ use of a8 as a temporary, so save those values now. (The function
+ uses a custom ABI so nothing else needs to be saved.) */
+ s32i a0, sp, 0
+ s32i a8, sp, 4
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+ /* Get the high halves of the inputs into registers. */
+ srli a2h, a2, 16
+ srli a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+ /* Clear the high halves of the inputs. This does not matter
+ for MUL16 because the high bits are ignored. */
+ extui a2, a2, 0, 16
+ extui a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mul16u dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mull dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+ a period in the definition of do_mul below. These macros are a workaround
+ using underscores instead of periods when doing the concatenation. */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ umul_aa_ ## xhalf ## yhalf xreg, yreg; \
+ rsr dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+ extui dst, src, 0, 16
+#define set_arg_h(dst, src) \
+ srli dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a13, xreg); \
+ set_arg_ ## yhalf (a14, yreg); \
+ call0 .Lmul_mulsi3; \
+ mov dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+ /* Add pp1 and pp2 into a6 with carry-out in a9. */
+ do_mul(a6, a2, l, a3, h) /* pp 1 */
+ do_mul(a11, a2, h, a3, l) /* pp 2 */
+ movi a9, 0
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Shift the high half of a9/a6 into position in a9. Note that
+ this value can be safely incremented without any carry-outs. */
+ ssai 16
+ src a9, a9, a6
+
+ /* Compute the low word into a6. */
+ do_mul(a11, a2, l, a3, l) /* pp 0 */
+ sll a6, a6
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Compute the high word into a2. */
+ do_mul(a2, a2, h, a3, h) /* pp 3 */
+ add a2, a2, a9
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Restore values saved on the stack during the multiplication. */
+ l32i a0, sp, 0
+ l32i a8, sp, 4
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+ /* Shift left by 9 bits, unless there was a carry-out from the
+ multiply, in which case, shift by 8 bits and increment the
+ exponent. */
+ movi a4, 9
+ srli a5, a2, 24 - 9
+ beqz a5, 1f
+ addi a4, a4, -1
+ addi a8, a8, 1
+1: ssl a4
+ src a2, a2, a6
+ sll a6, a6
+
+ /* Subtract the extra bias from the exponent sum (plus one to account
+ for the explicit "1.0" of the mantissa that will be added to the
+ exponent in the final result). */
+ movi a4, 0x80
+ sub a8, a8, a4
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..fd are OK here. */
+ movi a4, 0xfe
+ bgeu a8, a4, .Lmul_overflow
+
+.Lmul_round:
+ /* Round. */
+ bgez a6, .Lmul_rounded
+ addi a2, a2, 1
+ slli a6, a6, 1
+ beqz a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 23
+ add a2, a2, a8
+
+.Lmul_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or a2, a2, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+ l32i a12, sp, 16
+ l32i a13, sp, 20
+ l32i a14, sp, 24
+ l32i a15, sp, 28
+ addi sp, sp, 32
+#endif
+ leaf_return
+
+.Lmul_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ j .Lmul_rounded
+
+.Lmul_overflow:
+ bltz a8, .Lmul_underflow
+ /* Return +/- Infinity. */
+ movi a8, 0xff
+ slli a2, a8, 23
+ j .Lmul_addsign
+
+.Lmul_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ mov a9, a6
+ ssr a8
+ bgeui a8, 32, .Lmul_flush_to_zero
+
+ /* Shift a2 right. Any bits that are shifted out of a2 are saved
+ in a6 (combined with the shifted-out bits currently in a6) for
+ rounding the result. */
+ sll a6, a2
+ srl a2, a2
+
+ /* Set the exponent to zero. */
+ movi a8, 0
+
+ /* Pack any nonzero bits shifted out into a6. */
+ beqz a9, .Lmul_round
+ movi a9, 1
+ or a6, a6, a9
+ j .Lmul_round
+
+.Lmul_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ j .Lmul_done
+
+#if XCHAL_NO_MUL
+
+ /* For Xtensa processors with no multiply hardware, this simplified
+ version of _mulsi3 is used for multiplying 16-bit chunks of
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
+ .align 4
+.Lmul_mulsi3:
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_mulsf3 */
+
+#ifdef L_divsf3
+
+ /* Division */
+__divsf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Ldiv_yexpzero:
+ /* Clear the sign bit of y. */
+ slli a3, a3, 1
+ srli a3, a3, 1
+
+ /* Check for division by zero. */
+ beqz a3, .Ldiv_yzero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ do_nsau a10, a3, a4, a5
+ addi a10, a10, -8
+ ssl a10
+ sll a3, a3
+ movi a9, 1
+ sub a9, a9, a10
+ j .Ldiv_ynormalized
+
+.Ldiv_yzero:
+ /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
+ slli a4, a2, 1
+ srli a4, a4, 1
+ srli a2, a7, 31
+ slli a2, a2, 31
+ or a2, a2, a6
+ bnez a4, 1f
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+1: leaf_return
+
+.Ldiv_xexpzero:
+ /* Clear the sign bit of x. */
+ slli a2, a2, 1
+ srli a2, a2, 1
+
+ /* If x is zero, return zero. */
+ beqz a2, .Ldiv_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ do_nsau a10, a2, a4, a5
+ addi a10, a10, -8
+ ssl a10
+ sll a2, a2
+ movi a8, 1
+ sub a8, a8, a10
+ j .Ldiv_xnormalized
+
+.Ldiv_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ leaf_return
+
+.Ldiv_xnan_or_inf:
+ /* Set the sign bit of the result. */
+ srli a7, a3, 31
+ slli a7, a7, 31
+ xor a2, a2, a7
+ /* If y is NaN or Inf, return NaN. */
+ bnall a3, a6, 1f
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+1: leaf_return
+
+.Ldiv_ynan_or_inf:
+ /* If y is Infinity, return zero. */
+ slli a8, a3, 9
+ beqz a8, .Ldiv_return_zero
+ /* y is NaN; return it. */
+ mov a2, a3
+ leaf_return
+
+ .align 4
+ .global __divsf3
+ .type __divsf3, @function
+__divsf3:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+
+ /* Get the sign of the result. */
+ xor a7, a2, a3
+
+ /* Check for NaN and infinity. */
+ ball a2, a6, .Ldiv_xnan_or_inf
+ ball a3, a6, .Ldiv_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, a2, 23, 8
+ extui a9, a3, 23, 8
+
+ beqz a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:
+ beqz a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:
+
+ /* Subtract the exponents. */
+ sub a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0xffffff
+ or a2, a2, a6
+ and a2, a2, a10
+ or a3, a3, a6
+ and a3, a3, a10
+
+ /* The first digit of the mantissa division must be a one.
+ Shift x (and adjust the exponent) as needed to make this true. */
+ bltu a3, a2, 1f
+ slli a2, a2, 1
+ addi a8, a8, -1
+1:
+ /* Do the first subtraction and shift. */
+ sub a2, a2, a3
+ slli a2, a2, 1
+
+ /* Put the quotient into a10. */
+ movi a10, 1
+
+ /* Divide one bit at a time for 23 bits. */
+ movi a9, 23
+#if XCHAL_HAVE_LOOPS
+ loop a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+ /* Shift the quotient << 1. */
+ slli a10, a10, 1
+
+ /* Is this digit a 0 or 1? */
+ bltu a2, a3, 1f
+
+ /* Output a 1 and subtract. */
+ addi a10, a10, 1
+ sub a2, a2, a3
+
+ /* Shift the dividend << 1. */
+1: slli a2, a2, 1
+
+#if !XCHAL_HAVE_LOOPS
+ addi a9, a9, -1
+ bnez a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+ /* Add the exponent bias (less one to account for the explicit "1.0"
+ of the mantissa that will be added to the exponent in the final
+ result). */
+ addi a8, a8, 0x7e
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..fd are OK here. */
+ movi a4, 0xfe
+ bgeu a8, a4, .Ldiv_overflow
+
+.Ldiv_round:
+ /* Round. The remainder (<< 1) is in a2. */
+ bltu a2, a3, .Ldiv_rounded
+ addi a10, a10, 1
+ beq a2, a3, .Ldiv_exactlyhalf
+
+.Ldiv_rounded:
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 23
+ add a2, a10, a8
+
+.Ldiv_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or a2, a2, a7
+ leaf_return
+
+.Ldiv_overflow:
+ bltz a8, .Ldiv_underflow
+ /* Return +/- Infinity. */
+ addi a8, a4, 1 /* 0xff */
+ slli a2, a8, 23
+ j .Ldiv_addsign
+
+.Ldiv_exactlyhalf:
+ /* Remainder is exactly half the divisor. Round even. */
+ srli a10, a10, 1
+ slli a10, a10, 1
+ j .Ldiv_rounded
+
+.Ldiv_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ ssr a8
+ bgeui a8, 32, .Ldiv_flush_to_zero
+
+ /* Shift a10 right. Any bits that are shifted out of a10 are
+ saved in a6 for rounding the result. */
+ sll a6, a10
+ srl a10, a10
+
+ /* Set the exponent to zero. */
+ movi a8, 0
+
+ /* Pack any nonzero remainder (in a2) into a6. */
+ beqz a2, 1f
+ movi a9, 1
+ or a6, a6, a9
+
+ /* Round a10 based on the bits shifted out into a6. */
+1: bgez a6, .Ldiv_rounded
+ addi a10, a10, 1
+ slli a6, a6, 1
+ bnez a6, .Ldiv_rounded
+ srli a10, a10, 1
+ slli a10, a10, 1
+ j .Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ leaf_return
+
+#endif /* L_divsf3 */
+
+#ifdef L_cmpsf2
+
+ /* Equal and Not Equal */
+
+ .align 4
+ .global __eqsf2
+ .global __nesf2
+ .set __nesf2, __eqsf2
+ .type __eqsf2, @function
+__eqsf2:
+ leaf_entry sp, 16
+ bne a2, a3, 4f
+
+ /* The values are equal but NaN != NaN. Check the exponent. */
+ movi a6, 0x7f800000
+ ball a2, a6, 3f
+
+ /* Equal. */
+ movi a2, 0
+ leaf_return
+
+ /* Not equal. */
+2: movi a2, 1
+ leaf_return
+
+ /* Check if the mantissas are nonzero. */
+3: slli a7, a2, 9
+ j 5f
+
+ /* Check if x and y are zero with different signs. */
+4: or a7, a2, a3
+ slli a7, a7, 1
+
+ /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+ or x when exponent(x) = 0x7f8 and x == y. */
+5: movi a2, 0
+ movi a3, 1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than */
+
+ .align 4
+ .global __gtsf2
+ .type __gtsf2, @function
+__gtsf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Lle_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+
+ /* Less Than or Equal */
+
+ .align 4
+ .global __lesf2
+ .type __lesf2, @function
+__lesf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Lle_cmp
+ movi a2, 1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+.Lle_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, a2, a3
+ bltz a7, .Lle_diff_signs
+
+ /* Check if x is negative. */
+ bltz a2, .Lle_xneg
+
+ /* Check if x <= y. */
+ bltu a3, a2, 5f
+4: movi a2, 0
+ leaf_return
+
+.Lle_xneg:
+ /* Check if y <= x. */
+ bgeu a2, a3, 4b
+5: movi a2, 1
+ leaf_return
+
+.Lle_diff_signs:
+ bltz a2, 4b
+
+ /* Check if both x and y are zero. */
+ or a7, a2, a3
+ slli a7, a7, 1
+ movi a2, 1
+ movi a3, 0
+ moveqz a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than or Equal */
+
+ .align 4
+ .global __gesf2
+ .type __gesf2, @function
+__gesf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Llt_cmp
+ movi a2, -1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, -1
+ leaf_return
+
+
+ /* Less Than */
+
+ .align 4
+ .global __ltsf2
+ .type __ltsf2, @function
+__ltsf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Llt_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+.Llt_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, a2, a3
+ bltz a7, .Llt_diff_signs
+
+ /* Check if x is negative. */
+ bltz a2, .Llt_xneg
+
+ /* Check if x < y. */
+ bgeu a2, a3, 5f
+4: movi a2, -1
+ leaf_return
+
+.Llt_xneg:
+ /* Check if y < x. */
+ bltu a3, a2, 4b
+5: movi a2, 0
+ leaf_return
+
+.Llt_diff_signs:
+ bgez a2, 5b
+
+ /* Check if both x and y are nonzero. */
+ or a7, a2, a3
+ slli a7, a7, 1
+ movi a2, 0
+ movi a3, -1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Unordered */
+
+ .align 4
+ .global __unordsf2
+ .type __unordsf2, @function
+__unordsf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 3f
+1: ball a3, a6, 4f
+2: movi a2, 0
+ leaf_return
+
+3: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+4: slli a7, a3, 9
+ beqz a7, 2b
+ movi a2, 1
+ leaf_return
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_fixsfsi
+
+ .align 4
+ .global __fixsfsi
+ .type __fixsfsi, @function
+__fixsfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixsfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7e
+ bgei a4, 32, .Lfixsfsi_maxint
+ blti a4, 1, .Lfixsfsi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli a5, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixsfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixsfsi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi a2, 0
+
+.Lfixsfsi_maxint:
+ slli a4, a6, 8 /* 0x80000000 */
+ addi a5, a4, -1 /* 0x7fffffff */
+ movgez a4, a5, a2
+ mov a2, a4
+ leaf_return
+
+.Lfixsfsi_zero:
+ movi a2, 0
+ leaf_return
+
+#endif /* L_fixsfsi */
+
+#ifdef L_fixsfdi
+
+ .align 4
+ .global __fixsfdi
+ .type __fixsfdi, @function
+__fixsfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixsfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7e
+ bgei a4, 64, .Lfixsfdi_maxint
+ blti a4, 1, .Lfixsfdi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli xh, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixsfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixsfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixsfdi_smallshift:
+ movi xl, 0
+ sll xl, xh
+ srl xh, xh
+ j .Lfixsfdi_shifted
+
+.Lfixsfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixsfdi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi a2, 0
+
+.Lfixsfdi_maxint:
+ slli a7, a6, 8 /* 0x80000000 */
+ bgez a2, 1f
+ mov xh, a7
+ movi xl, 0
+ leaf_return
+
+1: addi xh, a7, -1 /* 0x7fffffff */
+ movi xl, -1
+ leaf_return
+
+.Lfixsfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+#endif /* L_fixsfdi */
+
+#ifdef L_fixunssfsi
+
+ .align 4
+ .global __fixunssfsi
+ .type __fixunssfsi, @function
+__fixunssfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixunssfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7f
+ bgei a4, 32, .Lfixunssfsi_maxint
+ bltz a4, .Lfixunssfsi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli a5, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 32, .Lfixunssfsi_bigexp
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixunssfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixunssfsi_maxint
+
+ /* Translate NaN to 0xffffffff. */
+ movi a2, -1
+ leaf_return
+
+.Lfixunssfsi_maxint:
+ slli a4, a6, 8 /* 0x80000000 */
+ movi a5, -1 /* 0xffffffff */
+ movgez a4, a5, a2
+ mov a2, a4
+ leaf_return
+
+.Lfixunssfsi_zero:
+ movi a2, 0
+ leaf_return
+
+.Lfixunssfsi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz a2, 1f
+ mov a2, a5 /* no shift needed */
+ leaf_return
+
+ /* Return 0x80000000 if negative. */
+1: slli a2, a6, 8
+ leaf_return
+
+#endif /* L_fixunssfsi */
+
+#ifdef L_fixunssfdi
+
+ .align 4
+ .global __fixunssfdi
+ .type __fixunssfdi, @function
+__fixunssfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixunssfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7f
+ bgei a4, 64, .Lfixunssfdi_maxint
+ bltz a4, .Lfixunssfdi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli xh, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 64, .Lfixunssfdi_bigexp
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixunssfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixunssfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixunssfdi_smallshift:
+ movi xl, 0
+ src xl, xh, xl
+ srl xh, xh
+ j .Lfixunssfdi_shifted
+
+.Lfixunssfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixunssfdi_maxint
+
+ /* Translate NaN to 0xffffffff.... */
+1: movi xh, -1
+ movi xl, -1
+ leaf_return
+
+.Lfixunssfdi_maxint:
+ bgez a2, 1b
+2: slli xh, a6, 8 /* 0x80000000 */
+ movi xl, 0
+ leaf_return
+
+.Lfixunssfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+.Lfixunssfdi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz a7, 2b
+ movi xl, 0
+ leaf_return /* no shift needed */
+
+#endif /* L_fixunssfdi */
+
+#ifdef L_floatsisf
+
+ .align 4
+ .global __floatunsisf
+ .type __floatunsisf, @function
+__floatunsisf:
+ leaf_entry sp, 16
+ beqz a2, .Lfloatsisf_return
+
+ /* Set the sign to zero and jump to the floatsisf code. */
+ movi a7, 0
+ j .Lfloatsisf_normalize
+
+ .align 4
+ .global __floatsisf
+ .type __floatsisf, @function
+__floatsisf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ beqz a2, .Lfloatsisf_return
+
+ /* Save the sign. */
+ extui a7, a2, 31, 1
+
+ /* Get the absolute value. */
+#if XCHAL_HAVE_ABS
+ abs a2, a2
+#else
+ neg a4, a2
+ movltz a2, a4, a2
+#endif
+
+.Lfloatsisf_normalize:
+ /* Normalize with the first 1 bit in the msb. */
+ do_nsau a4, a2, a5, a6
+ ssl a4
+ sll a5, a2
+
+ /* Shift the mantissa into position, with rounding bits in a6. */
+ srli a2, a5, 8
+ slli a6, a5, (32 - 8)
+
+ /* Set the exponent. */
+ movi a5, 0x9d /* 0x7e + 31 */
+ sub a5, a5, a4
+ slli a5, a5, 23
+ add a2, a2, a5
+
+ /* Add the sign. */
+ slli a7, a7, 31
+ or a2, a2, a7
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a6, .Lfloatsisf_return
+ addi a2, a2, 1 /* Overflow to the exponent is OK. */
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a6, a6, 1
+ beqz a6, .Lfloatsisf_exactlyhalf
+
+.Lfloatsisf_return:
+ leaf_return
+
+.Lfloatsisf_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+#endif /* L_floatsisf */
+
+#ifdef L_floatdisf
+
+ .align 4
+ .global __floatundisf
+ .type __floatundisf, @function
+__floatundisf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Set the sign to zero and jump to the floatdisf code. */
+ movi a7, 0
+ j .Lfloatdisf_normalize
+
+ .align 4
+ .global __floatdisf
+ .type __floatdisf, @function
+__floatdisf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Save the sign. */
+ extui a7, xh, 31, 1
+
+ /* Get the absolute value. */
+ bgez xh, .Lfloatdisf_normalize
+ neg xl, xl
+ neg xh, xh
+ beqz xl, .Lfloatdisf_normalize
+ addi xh, xh, -1
+
+.Lfloatdisf_normalize:
+ /* Normalize with the first 1 bit in the msb of xh. */
+ beqz xh, .Lfloatdisf_bigshift
+ do_nsau a4, xh, a5, a6
+ ssl a4
+ src xh, xh, xl
+ sll xl, xl
+
+.Lfloatdisf_shifted:
+ /* Shift the mantissa into position, with rounding bits in a6. */
+ ssai 8
+ sll a5, xl
+ src a6, xh, xl
+ srl xh, xh
+ beqz a5, 1f
+ movi a5, 1
+ or a6, a6, a5
+1:
+ /* Set the exponent. */
+ movi a5, 0xbd /* 0x7e + 63 */
+ sub a5, a5, a4
+ slli a5, a5, 23
+ add a2, xh, a5
+
+ /* Add the sign. */
+ slli a7, a7, 31
+ or a2, a2, a7
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a6, 2f
+ addi a2, a2, 1 /* Overflow to the exponent is OK. */
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a6, a6, 1
+ beqz a6, .Lfloatdisf_exactlyhalf
+2: leaf_return
+
+.Lfloatdisf_bigshift:
+ /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
+ do_nsau a4, xl, a5, a6
+ ssl a4
+ sll xh, xl
+ movi xl, 0
+ addi a4, a4, 32
+ j .Lfloatdisf_shifted
+
+.Lfloatdisf_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+#endif /* L_floatdisf */