aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/libgcc/config/xtensa
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/libgcc/config/xtensa')
-rw-r--r--gcc-4.9/libgcc/config/xtensa/crti.S51
-rw-r--r--gcc-4.9/libgcc/config/xtensa/crtn.S46
-rw-r--r--gcc-4.9/libgcc/config/xtensa/ieee754-df.S2388
-rw-r--r--gcc-4.9/libgcc/config/xtensa/ieee754-sf.S1757
-rw-r--r--gcc-4.9/libgcc/config/xtensa/lib1funcs.S844
-rw-r--r--gcc-4.9/libgcc/config/xtensa/lib2funcs.S186
-rw-r--r--gcc-4.9/libgcc/config/xtensa/libgcc-glibc.ver3
-rw-r--r--gcc-4.9/libgcc/config/xtensa/linux-unwind.h97
-rw-r--r--gcc-4.9/libgcc/config/xtensa/t-elf5
-rw-r--r--gcc-4.9/libgcc/config/xtensa/t-linux1
-rw-r--r--gcc-4.9/libgcc/config/xtensa/t-xtensa16
-rw-r--r--gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.c543
-rw-r--r--gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.h49
13 files changed, 5986 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/xtensa/crti.S b/gcc-4.9/libgcc/config/xtensa/crti.S
new file mode 100644
index 000000000..13e5e7ed9
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/crti.S
@@ -0,0 +1,51 @@
+# Start .init and .fini sections.
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just makes a stack frame for the contents of the .fini and
+# .init sections. Users may put any desired instructions in those
+# sections.
+
+#include "xtensa-config.h"
+
+ .section .init
+ .globl _init
+ .type _init,@function
+ .align 4
+_init:
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ entry sp, 64
+#else
+ addi sp, sp, -32
+ s32i a0, sp, 0
+#endif
+
+ .section .fini
+ .globl _fini
+ .type _fini,@function
+ .align 4
+_fini:
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ entry sp, 64
+#else
+ addi sp, sp, -32
+ s32i a0, sp, 0
+#endif
diff --git a/gcc-4.9/libgcc/config/xtensa/crtn.S b/gcc-4.9/libgcc/config/xtensa/crtn.S
new file mode 100644
index 000000000..fda453b97
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/crtn.S
@@ -0,0 +1,46 @@
+# End of .init and .fini sections.
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
+
+
+# This file just makes sure that the .fini and .init sections do in
+# fact return. Users may put any desired instructions in those sections.
+# This file is the last thing linked into any executable.
+
+#include "xtensa-config.h"
+
+ .section .init
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ retw
+#else
+ l32i a0, sp, 0
+ addi sp, sp, 32
+ ret
+#endif
+
+ .section .fini
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ retw
+#else
+ l32i a0, sp, 0
+ addi sp, sp, 32
+ ret
+#endif
diff --git a/gcc-4.9/libgcc/config/xtensa/ieee754-df.S b/gcc-4.9/libgcc/config/xtensa/ieee754-df.S
new file mode 100644
index 000000000..3582338ac
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/ieee754-df.S
@@ -0,0 +1,2388 @@
+/* IEEE-754 double-precision functions for Xtensa
+ Copyright (C) 2006-2014 Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/* Warning! The branch displacements for some Xtensa branch instructions
+ are quite small, and this code has been carefully laid out to keep
+ branch targets in range. If you change anything, be sure to check that
+ the assembler is not relaxing anything to branch over a jump. */
+
+#ifdef L_negdf2
+
+ .align 4
+ .global __negdf2
+ .type __negdf2, @function
+__negdf2:
+ leaf_entry sp, 16
+ movi a4, 0x80000000
+ xor xh, xh, a4
+ leaf_return
+
+#endif /* L_negdf2 */
+
+#ifdef L_addsubdf3
+
+ /* Addition */
+__adddf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Ladd_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall yh, a6, 1f
+ /* If x is a NaN, return it. Otherwise, return y. */
+ slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, .Ladd_ynan_or_inf
+1: leaf_return
+
+.Ladd_ynan_or_inf:
+ /* Return y. */
+ mov xh, yh
+ mov xl, yl
+ leaf_return
+
+.Ladd_opposite_signs:
+ /* Operand signs differ. Do a subtraction. */
+ slli a7, a6, 11
+ xor yh, yh, a7
+ j .Lsub_same_sign
+
+ .align 4
+ .global __adddf3
+ .type __adddf3, @function
+__adddf3:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, xh, yh
+ bltz a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:
+ /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
+ ball xh, a6, .Ladd_xnan_or_inf
+ ball yh, a6, .Ladd_ynan_or_inf
+
+ /* Compare the exponents. The smaller operand will be shifted
+ right by the exponent difference and added to the larger
+ one. */
+ extui a7, xh, 20, 12
+ extui a8, yh, 20, 12
+ bltu a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone yh, a6, .Ladd_yexpzero
+
+ /* Replace yh sign/exponent with 0x001. */
+ or yh, yh, a6
+ slli yh, yh, 11
+ srli yh, yh, 11
+
+.Ladd_yexpdiff:
+ /* Compute the exponent difference. Optimize for difference < 32. */
+ sub a10, a7, a8
+ bgeui a10, 32, .Ladd_bigshifty
+
+ /* Shift yh/yl right by the exponent difference. Any bits that are
+ shifted out of yl are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, yl, a9
+ src yl, yh, yl
+ srl yh, yh
+
+.Ladd_addy:
+ /* Do the 64-bit addition. */
+ add xl, xl, yl
+ add xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, 1
+1:
+ /* Check if the add overflowed into the exponent. */
+ extui a10, xh, 20, 12
+ beq a10, a7, .Ladd_round
+ mov a8, a7
+ j .Ladd_carry
+
+.Ladd_yexpzero:
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0", and increment the apparent exponent
+ because subnormals behave as if they had the minimum (nonzero)
+ exponent. Test for the case when both exponents are zero. */
+ slli yh, yh, 12
+ srli yh, yh, 12
+ bnone xh, a6, .Ladd_bothexpzero
+ addi a8, a8, 1
+ j .Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+ /* Both exponents are zero. Handle this as a special case. There
+ is no need to shift or round, and the normal code for handling
+ a carry into the exponent field will not work because it
+ assumes there is an implicit "1.0" that needs to be added. */
+ add xl, xl, yl
+ add xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, 1
+1: leaf_return
+
+.Ladd_bigshifty:
+ /* Exponent difference > 64 -- just return the bigger value. */
+ bgeui a10, 64, 1b
+
+ /* Shift yh/yl right by the exponent difference. Any bits that are
+ shifted out are saved in a9 for rounding the result. */
+ ssr a10
+ sll a11, yl /* lost bits shifted out of yl */
+ src a9, yh, yl
+ srl yl, yh
+ movi yh, 0
+ beqz a11, .Ladd_addy
+ or a9, a9, a10 /* any positive, nonzero value will work */
+ j .Ladd_addy
+
+.Ladd_xexpzero:
+ /* Same as "yexpzero" except skip handling the case when both
+ exponents are zero. */
+ slli xh, xh, 12
+ srli xh, xh, 12
+ addi a7, a7, 1
+ j .Ladd_xexpdiff
+
+.Ladd_shiftx:
+ /* Same thing as the "shifty" code, but with x and y swapped. Also,
+ because the exponent difference is always nonzero in this version,
+ the shift sequence can use SLL and skip loading a constant zero. */
+ bnone xh, a6, .Ladd_xexpzero
+
+ or xh, xh, a6
+ slli xh, xh, 11
+ srli xh, xh, 11
+
+.Ladd_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Ladd_bigshiftx
+
+ ssr a10
+ sll a9, xl
+ src xl, xh, xl
+ srl xh, xh
+
+.Ladd_addx:
+ add xl, xl, yl
+ add xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, 1
+1:
+ /* Check if the add overflowed into the exponent. */
+ extui a10, xh, 20, 12
+ bne a10, a8, .Ladd_carry
+
+.Ladd_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi xl, xl, 1
+ beqz xl, .Ladd_roundcarry
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_bigshiftx:
+ /* Mostly the same thing as "bigshifty".... */
+ bgeui a10, 64, .Ladd_returny
+
+ ssr a10
+ sll a11, xl
+ src a9, xh, xl
+ srl xl, xh
+ movi xh, 0
+ beqz a11, .Ladd_addx
+ or a9, a9, a10
+ j .Ladd_addx
+
+.Ladd_returny:
+ mov xh, yh
+ mov xl, yl
+ leaf_return
+
+.Ladd_carry:
+ /* The addition has overflowed into the exponent field, so the
+ value needs to be renormalized. The mantissa of the result
+ can be recovered by subtracting the original exponent and
+ adding 0x100000 (which is the explicit "1.0" for the
+ mantissa of the non-shifted operand -- the "1.0" for the
+ shifted operand was already added). The mantissa can then
+ be shifted right by one bit. The explicit "1.0" of the
+ shifted mantissa then needs to be replaced by the exponent,
+ incremented by one to account for the normalizing shift.
+ It is faster to combine these operations: do the shift first
+ and combine the additions and subtractions. If x is the
+ original exponent, the result is:
+ shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
+ or:
+ shifted mantissa + ((x + 1) << 19)
+ Note that the exponent is incremented here by leaving the
+ explicit "1.0" of the mantissa in the exponent field. */
+
+ /* Shift xh/xl right by one bit. Save the lsb of xl. */
+ mov a10, xl
+ ssai 1
+ src xl, xh, xl
+ srl xh, xh
+
+ /* See explanation above. The original exponent is in a8. */
+ addi a8, a8, 1
+ slli a8, a8, 19
+ add xh, xh, a8
+
+ /* Return an Infinity if the exponent overflowed. */
+ ball xh, a6, .Ladd_infinity
+
+ /* Same thing as the "round" code except the msb of the leftover
+ fraction is bit 0 of a10, with the rest of the fraction in a9. */
+ bbci.l a10, 0, 1f
+ addi xl, xl, 1
+ beqz xl, .Ladd_roundcarry
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_infinity:
+ /* Clear the mantissa. */
+ movi xl, 0
+ srli xh, xh, 20
+ slli xh, xh, 20
+
+ /* The sign bit may have been lost in a carry-out. Put it back. */
+ slli a8, a8, 1
+ or xh, xh, a8
+ leaf_return
+
+.Ladd_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli xl, xl, 1
+ slli xl, xl, 1
+ leaf_return
+
+.Ladd_roundcarry:
+ /* xl is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi xh, xh, 1
+ /* Overflow to the exponent is OK. */
+ leaf_return
+
+
+ /* Subtraction */
+__subdf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Lsub_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall yh, a6, 1f
+ /* Both x and y are either NaN or Inf, so the result is NaN. */
+ movi a4, 0x80000 /* make it a quiet NaN */
+ or xh, xh, a4
+1: leaf_return
+
+.Lsub_ynan_or_inf:
+ /* Negate y and return it. */
+ slli a7, a6, 11
+ xor xh, yh, a7
+ mov xl, yl
+ leaf_return
+
+.Lsub_opposite_signs:
+ /* Operand signs differ. Do an addition. */
+ slli a7, a6, 11
+ xor yh, yh, a7
+ j .Ladd_same_sign
+
+ .align 4
+ .global __subdf3
+ .type __subdf3, @function
+__subdf3:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, xh, yh
+ bltz a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:
+ /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
+ ball xh, a6, .Lsub_xnan_or_inf
+ ball yh, a6, .Lsub_ynan_or_inf
+
+ /* Compare the operands. In contrast to addition, the entire
+ value matters here. */
+ extui a7, xh, 20, 11
+ extui a8, yh, 20, 11
+ bltu xh, yh, .Lsub_xsmaller
+ beq xh, yh, .Lsub_compare_low
+
+.Lsub_ysmaller:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone yh, a6, .Lsub_yexpzero
+
+ /* Replace yh sign/exponent with 0x001. */
+ or yh, yh, a6
+ slli yh, yh, 11
+ srli yh, yh, 11
+
+.Lsub_yexpdiff:
+ /* Compute the exponent difference. Optimize for difference < 32. */
+ sub a10, a7, a8
+ bgeui a10, 32, .Lsub_bigshifty
+
+ /* Shift yh/yl right by the exponent difference. Any bits that are
+ shifted out of yl are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, yl, a9
+ src yl, yh, yl
+ srl yh, yh
+
+.Lsub_suby:
+ /* Do the 64-bit subtraction. */
+ sub xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, -1
+1: sub xl, xl, yl
+
+ /* Subtract the leftover bits in a9 from zero and propagate any
+ borrow from xh/xl. */
+ neg a9, a9
+ beqz a9, 1f
+ addi a5, xh, -1
+ moveqz xh, a5, xl
+ addi xl, xl, -1
+1:
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, xh, 20, 11
+ beq a10, a7, .Lsub_round
+ j .Lsub_borrow
+
+.Lsub_compare_low:
+ /* The high words are equal. Compare the low words. */
+ bltu xl, yl, .Lsub_xsmaller
+ bltu yl, xl, .Lsub_ysmaller
+ /* The operands are equal. Return 0.0. */
+ movi xh, 0
+ movi xl, 0
+1: leaf_return
+
+.Lsub_yexpzero:
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0". Unless x is also a subnormal, increment
+ y's apparent exponent because subnormals behave as if they had
+ the minimum (nonzero) exponent. */
+ slli yh, yh, 12
+ srli yh, yh, 12
+ bnone xh, a6, .Lsub_yexpdiff
+ addi a8, a8, 1
+ j .Lsub_yexpdiff
+
+.Lsub_bigshifty:
+ /* Exponent difference > 64 -- just return the bigger value. */
+ bgeui a10, 64, 1b
+
+ /* Shift yh/yl right by the exponent difference. Any bits that are
+ shifted out are saved in a9 for rounding the result. */
+ ssr a10
+ sll a11, yl /* lost bits shifted out of yl */
+ src a9, yh, yl
+ srl yl, yh
+ movi yh, 0
+ beqz a11, .Lsub_suby
+ or a9, a9, a10 /* any positive, nonzero value will work */
+ j .Lsub_suby
+
+.Lsub_xsmaller:
+ /* Same thing as the "ysmaller" code, but with x and y swapped and
+ with y negated. */
+ bnone xh, a6, .Lsub_xexpzero
+
+ or xh, xh, a6
+ slli xh, xh, 11
+ srli xh, xh, 11
+
+.Lsub_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Lsub_bigshiftx
+
+ ssr a10
+ movi a9, 0
+ src a9, xl, a9
+ src xl, xh, xl
+ srl xh, xh
+
+ /* Negate y. */
+ slli a11, a6, 11
+ xor yh, yh, a11
+
+.Lsub_subx:
+ sub xl, yl, xl
+ sub xh, yh, xh
+ bgeu yl, xl, 1f
+ addi xh, xh, -1
+1:
+ /* Subtract the leftover bits in a9 from zero and propagate any
+ borrow from xh/xl. */
+ neg a9, a9
+ beqz a9, 1f
+ addi a5, xh, -1
+ moveqz xh, a5, xl
+ addi xl, xl, -1
+1:
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, xh, 20, 11
+ bne a10, a8, .Lsub_borrow
+
+.Lsub_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi xl, xl, 1
+ beqz xl, .Lsub_roundcarry
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Lsub_exactlyhalf
+1: leaf_return
+
+.Lsub_xexpzero:
+ /* Same as "yexpzero". */
+ slli xh, xh, 12
+ srli xh, xh, 12
+ bnone yh, a6, .Lsub_xexpdiff
+ addi a7, a7, 1
+ j .Lsub_xexpdiff
+
+.Lsub_bigshiftx:
+ /* Mostly the same thing as "bigshifty", but with the sign bit of the
+ shifted value set so that the subsequent subtraction flips the
+ sign of y. */
+ bgeui a10, 64, .Lsub_returny
+
+ ssr a10
+ sll a11, xl
+ src a9, xh, xl
+ srl xl, xh
+ slli xh, a6, 11 /* set sign bit of xh */
+ beqz a11, .Lsub_subx
+ or a9, a9, a10
+ j .Lsub_subx
+
+.Lsub_returny:
+ /* Negate and return y. */
+ slli a7, a6, 11
+ xor xh, yh, a7
+ mov xl, yl
+ leaf_return
+
+.Lsub_borrow:
+ /* The subtraction has underflowed into the exponent field, so the
+ value needs to be renormalized. Shift the mantissa left as
+ needed to remove any leading zeros and adjust the exponent
+ accordingly. If the exponent is not large enough to remove
+ all the leading zeros, the result will be a subnormal value. */
+
+ slli a8, xh, 12
+ beqz a8, .Lsub_xhzero
+ do_nsau a6, a8, a7, a11
+ srli a8, a8, 12
+ bge a6, a10, .Lsub_subnormal
+ addi a6, a6, 1
+
+.Lsub_shift_lt32:
+ /* Shift the mantissa (a8/xl/a9) left by a6. */
+ ssl a6
+ src a8, a8, xl
+ src xl, xl, a9
+ sll a9, a9
+
+ /* Combine the shifted mantissa with the sign and exponent,
+ decrementing the exponent by a6. (The exponent has already
+ been decremented by one due to the borrow from the subtraction,
+ but adding the mantissa will increment the exponent by one.) */
+ srli xh, xh, 20
+ sub xh, xh, a6
+ slli xh, xh, 20
+ add xh, xh, a8
+ j .Lsub_round
+
+.Lsub_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli xl, xl, 1
+ slli xl, xl, 1
+ leaf_return
+
+.Lsub_roundcarry:
+ /* xl is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi xh, xh, 1
+ /* Overflow to the exponent is OK. */
+ leaf_return
+
+.Lsub_xhzero:
+ /* When normalizing the result, all the mantissa bits in the high
+ word are zero. Shift by "20 + (leading zero count of xl) + 1". */
+ do_nsau a6, xl, a7, a11
+ addi a6, a6, 21
+ blt a10, a6, .Lsub_subnormal
+
+.Lsub_normalize_shift:
+ bltui a6, 32, .Lsub_shift_lt32
+
+ ssl a6
+ src a8, xl, a9
+ sll xl, a9
+ movi a9, 0
+
+ srli xh, xh, 20
+ sub xh, xh, a6
+ slli xh, xh, 20
+ add xh, xh, a8
+ j .Lsub_round
+
+.Lsub_subnormal:
+ /* The exponent is too small to shift away all the leading zeros.
+ Set a6 to the current exponent (which has already been
+ decremented by the borrow) so that the exponent of the result
+ will be zero. Do not add 1 to a6 in this case, because: (1)
+ adding the mantissa will not increment the exponent, so there is
+ no need to subtract anything extra from the exponent to
+ compensate, and (2) the effective exponent of a subnormal is 1
+ not 0 so the shift amount must be 1 smaller than normal. */
+ mov a6, a10
+ j .Lsub_normalize_shift
+
+#endif /* L_addsubdf3 */
+
+#ifdef L_muldf3
+
+ /* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__muldf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Lmul_xexpzero:
+ /* Clear the sign bit of x. */
+ slli xh, xh, 1
+ srli xh, xh, 1
+
+ /* If x is zero, return zero. */
+ or a10, xh, xl
+ beqz a10, .Lmul_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ beqz xh, .Lmul_xh_zero
+ do_nsau a10, xh, a11, a12
+ addi a10, a10, -11
+ ssl a10
+ src xh, xh, xl
+ sll xl, xl
+ movi a8, 1
+ sub a8, a8, a10
+ j .Lmul_xnormalized
+.Lmul_xh_zero:
+ do_nsau a10, xl, a11, a12
+ addi a10, a10, -11
+ movi a8, -31
+ sub a8, a8, a10
+ ssl a10
+ bltz a10, .Lmul_xl_srl
+ sll xh, xl
+ movi xl, 0
+ j .Lmul_xnormalized
+.Lmul_xl_srl:
+ srl xh, xl
+ sll xl, xl
+ j .Lmul_xnormalized
+
+.Lmul_yexpzero:
+ /* Clear the sign bit of y. */
+ slli yh, yh, 1
+ srli yh, yh, 1
+
+ /* If y is zero, return zero. */
+ or a10, yh, yl
+ beqz a10, .Lmul_return_zero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ beqz yh, .Lmul_yh_zero
+ do_nsau a10, yh, a11, a12
+ addi a10, a10, -11
+ ssl a10
+ src yh, yh, yl
+ sll yl, yl
+ movi a9, 1
+ sub a9, a9, a10
+ j .Lmul_ynormalized
+.Lmul_yh_zero:
+ do_nsau a10, yl, a11, a12
+ addi a10, a10, -11
+ movi a9, -31
+ sub a9, a9, a10
+ ssl a10
+ bltz a10, .Lmul_yl_srl
+ sll yh, yl
+ movi yl, 0
+ j .Lmul_ynormalized
+.Lmul_yl_srl:
+ srl yh, yl
+ sll yl, yl
+ j .Lmul_ynormalized
+
+.Lmul_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli xh, a7, 31
+ slli xh, xh, 31
+ movi xl, 0
+ j .Lmul_done
+
+.Lmul_xnan_or_inf:
+ /* If y is zero, return NaN. */
+ bnez yl, 1f
+ slli a8, yh, 1
+ bnez a8, 1f
+ movi a4, 0x80000 /* make it a quiet NaN */
+ or xh, xh, a4
+ j .Lmul_done
+1:
+ /* If y is NaN, return y. */
+ bnall yh, a6, .Lmul_returnx
+ slli a8, yh, 12
+ or a8, a8, yl
+ beqz a8, .Lmul_returnx
+
+.Lmul_returny:
+ mov xh, yh
+ mov xl, yl
+
+.Lmul_returnx:
+ /* Set the sign bit and return. */
+ extui a7, a7, 31, 1
+ slli xh, xh, 1
+ ssai 1
+ src xh, a7, xh
+ j .Lmul_done
+
+.Lmul_ynan_or_inf:
+ /* If x is zero, return NaN. */
+ bnez xl, .Lmul_returny
+ slli a8, xh, 1
+ bnez a8, .Lmul_returny
+ movi a7, 0x80000 /* make it a quiet NaN */
+ or xh, yh, a7
+ j .Lmul_done
+
+ .align 4
+ .global __muldf3
+ .type __muldf3, @function
+__muldf3:
+#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
+ addi sp, sp, -32
+ s32i a12, sp, 16
+ s32i a13, sp, 20
+ s32i a14, sp, 24
+ s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 64
+#else
+ leaf_entry sp, 32
+#endif
+ movi a6, 0x7ff00000
+
+ /* Get the sign of the result. */
+ xor a7, xh, yh
+
+ /* Check for NaN and infinity. */
+ ball xh, a6, .Lmul_xnan_or_inf
+ ball yh, a6, .Lmul_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, xh, 20, 11
+ extui a9, yh, 20, 11
+
+ beqz a8, .Lmul_xexpzero
+.Lmul_xnormalized:
+ beqz a9, .Lmul_yexpzero
+.Lmul_ynormalized:
+
+ /* Add the exponents. */
+ add a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0x1fffff
+ or xh, xh, a6
+ and xh, xh, a10
+ or yh, yh, a6
+ and yh, yh, a10
+
+ /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6.
+ The least-significant word of the result is thrown away except
+ that if it is nonzero, the lsb of a6 is set to 1. */
+#if XCHAL_HAVE_MUL32_HIGH
+
+ /* Compute a6 with any carry-outs in a10. */
+ movi a10, 0
+ mull a6, xl, yh
+ mull a11, xh, yl
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a10, a10, 1
+1:
+ muluh a11, xl, yl
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a10, a10, 1
+1:
+ /* If the low word of the result is nonzero, set the lsb of a6. */
+ mull a11, xl, yl
+ beqz a11, 1f
+ movi a9, 1
+ or a6, a6, a9
+1:
+ /* Compute xl with any carry-outs in a9. */
+ movi a9, 0
+ mull a11, xh, yh
+ add a10, a10, a11
+ bgeu a10, a11, 1f
+ addi a9, a9, 1
+1:
+ muluh a11, xh, yl
+ add a10, a10, a11
+ bgeu a10, a11, 1f
+ addi a9, a9, 1
+1:
+ muluh xl, xl, yh
+ add xl, xl, a10
+ bgeu xl, a10, 1f
+ addi a9, a9, 1
+1:
+ /* Compute xh. */
+ muluh xh, xh, yh
+ add xh, xh, a9
+
+#else /* ! XCHAL_HAVE_MUL32_HIGH */
+
+ /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
+ products. These partial products are:
+
+ 0 xll * yll
+
+ 1 xll * ylh
+ 2 xlh * yll
+
+ 3 xll * yhl
+ 4 xlh * ylh
+ 5 xhl * yll
+
+ 6 xll * yhh
+ 7 xlh * yhl
+ 8 xhl * ylh
+ 9 xhh * yll
+
+ 10 xlh * yhh
+ 11 xhl * yhl
+ 12 xhh * ylh
+
+ 13 xhl * yhh
+ 14 xhh * yhl
+
+ 15 xhh * yhh
+
+ where the input chunks are (hh, hl, lh, ll). If using the Mul16
+ or Mul32 multiplier options, these input chunks must be stored in
+ separate registers. For Mac16, the UMUL.AA.* opcodes can specify
+ that the inputs come from either half of the registers, so there
+ is no need to shift them out ahead of time. If there is no
+ multiply hardware, the 16-bit chunks can be extracted when setting
+ up the arguments to the separate multiply function. */
+
+ /* Save a7 since it is needed to hold a temporary value. */
+ s32i a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Calling a separate multiply function will clobber a0 and requires
+ use of a8 as a temporary, so save those values now. (The function
+ uses a custom ABI so nothing else needs to be saved.) */
+ s32i a0, sp, 0
+ s32i a8, sp, 8
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define xlh a12
+#define ylh a13
+#define xhh a14
+#define yhh a15
+
+ /* Get the high halves of the inputs into registers. */
+ srli xlh, xl, 16
+ srli ylh, yl, 16
+ srli xhh, xh, 16
+ srli yhh, yh, 16
+
+#define xll xl
+#define yll yl
+#define xhl xh
+#define yhl yh
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+ /* Clear the high halves of the inputs. This does not matter
+ for MUL16 because the high bits are ignored. */
+ extui xl, xl, 0, 16
+ extui xh, xh, 0, 16
+ extui yl, yl, 0, 16
+ extui yh, yh, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mul16u dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mull dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+ a period in the definition of do_mul below. These macros are a workaround
+ using underscores instead of periods when doing the concatenation. */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ umul_aa_ ## xhalf ## yhalf xreg, yreg; \
+ rsr dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+ extui dst, src, 0, 16
+#define set_arg_h(dst, src) \
+ srli dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a13, xreg); \
+ set_arg_ ## yhalf (a14, yreg); \
+ call0 .Lmul_mulsi3; \
+ mov dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+ /* Add pp1 and pp2 into a10 with carry-out in a9. */
+ do_mul(a10, xl, l, yl, h) /* pp 1 */
+ do_mul(a11, xl, h, yl, l) /* pp 2 */
+ movi a9, 0
+ add a10, a10, a11
+ bgeu a10, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Initialize a6 with a9/a10 shifted into position. Note that
+ this value can be safely incremented without any carry-outs. */
+ ssai 16
+ src a6, a9, a10
+
+ /* Compute the low word into a10. */
+ do_mul(a11, xl, l, yl, l) /* pp 0 */
+ sll a10, a10
+ add a10, a10, a11
+ bgeu a10, a11, 1f
+ addi a6, a6, 1
+1:
+ /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
+ This is good enough to determine the low half of a6, so that any
+ nonzero bits from the low word of the result can be collapsed
+ into a6, freeing up a register. */
+ movi a9, 0
+ do_mul(a11, xl, l, yh, l) /* pp 3 */
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ do_mul(a11, xl, h, yl, h) /* pp 4 */
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ do_mul(a11, xh, l, yl, l) /* pp 5 */
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Collapse any nonzero bits from the low word into a6. */
+ beqz a10, 1f
+ movi a11, 1
+ or a6, a6, a11
+1:
+ /* Add pp6-9 into a11 with carry-outs in a10. */
+ do_mul(a7, xl, l, yh, h) /* pp 6 */
+ do_mul(a11, xh, h, yl, l) /* pp 9 */
+ movi a10, 0
+ add a11, a11, a7
+ bgeu a11, a7, 1f
+ addi a10, a10, 1
+1:
+ do_mul(a7, xl, h, yh, l) /* pp 7 */
+ add a11, a11, a7
+ bgeu a11, a7, 1f
+ addi a10, a10, 1
+1:
+ do_mul(a7, xh, l, yl, h) /* pp 8 */
+ add a11, a11, a7
+ bgeu a11, a7, 1f
+ addi a10, a10, 1
+1:
+ /* Shift a10/a11 into position, and add low half of a11 to a6. */
+ src a10, a10, a11
+ add a10, a10, a9
+ sll a11, a11
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a10, a10, 1
+1:
+ /* Add pp10-12 into xl with carry-outs in a9. */
+ movi a9, 0
+ do_mul(xl, xl, h, yh, h) /* pp 10 */
+ add xl, xl, a10
+ bgeu xl, a10, 1f
+ addi a9, a9, 1
+1:
+ do_mul(a10, xh, l, yh, l) /* pp 11 */
+ add xl, xl, a10
+ bgeu xl, a10, 1f
+ addi a9, a9, 1
+1:
+ do_mul(a10, xh, h, yl, h) /* pp 12 */
+ add xl, xl, a10
+ bgeu xl, a10, 1f
+ addi a9, a9, 1
+1:
+ /* Add pp13-14 into a11 with carry-outs in a10. */
+ do_mul(a11, xh, l, yh, h) /* pp 13 */
+ do_mul(a7, xh, h, yh, l) /* pp 14 */
+ movi a10, 0
+ add a11, a11, a7
+ bgeu a11, a7, 1f
+ addi a10, a10, 1
+1:
+ /* Shift a10/a11 into position, and add low half of a11 to a6. */
+ src a10, a10, a11
+ add a10, a10, a9
+ sll a11, a11
+ add xl, xl, a11
+ bgeu xl, a11, 1f
+ addi a10, a10, 1
+1:
+ /* Compute xh. */
+ do_mul(xh, xh, h, yh, h) /* pp 15 */
+ add xh, xh, a10
+
+ /* Restore values saved on the stack during the multiplication. */
+ l32i a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ l32i a0, sp, 0
+ l32i a8, sp, 8
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+ /* Shift left by 12 bits, unless there was a carry-out from the
+ multiply, in which case, shift by 11 bits and increment the
+ exponent. Note: It is convenient to use the constant 0x3ff
+ instead of 0x400 when removing the extra exponent bias (so that
+ it is easy to construct 0x7fe for the overflow check). Reverse
+ the logic here to decrement the exponent sum by one unless there
+ was a carry-out. */
+ movi a4, 11
+ srli a5, xh, 21 - 12
+ bnez a5, 1f
+ addi a4, a4, 1
+ addi a8, a8, -1
+1: ssl a4
+ src xh, xh, xl
+ src xl, xl, a6
+ sll a6, a6
+
+ /* Subtract the extra bias from the exponent sum (plus one to account
+ for the explicit "1.0" of the mantissa that will be added to the
+ exponent in the final result). */
+ movi a4, 0x3ff
+ sub a8, a8, a4
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..7fd are OK here. */
+ slli a4, a4, 1 /* 0x7fe */
+ bgeu a8, a4, .Lmul_overflow
+
+.Lmul_round:
+ /* Round. */
+ bgez a6, .Lmul_rounded
+ addi xl, xl, 1
+ beqz xl, .Lmul_roundcarry
+ slli a6, a6, 1
+ beqz a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 20
+ add xh, xh, a8
+
+.Lmul_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or xh, xh, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+ l32i a12, sp, 16
+ l32i a13, sp, 20
+ l32i a14, sp, 24
+ l32i a15, sp, 28
+ addi sp, sp, 32
+#endif
+ leaf_return
+
+.Lmul_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli xl, xl, 1
+ slli xl, xl, 1
+ j .Lmul_rounded
+
+.Lmul_roundcarry:
+ /* xl is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi xh, xh, 1
+ /* Overflow is OK -- it will be added to the exponent. */
+ j .Lmul_rounded
+
+.Lmul_overflow:
+ bltz a8, .Lmul_underflow
+ /* Return +/- Infinity. */
+ addi a8, a4, 1 /* 0x7ff */
+ slli xh, a8, 20
+ movi xl, 0
+ j .Lmul_addsign
+
+.Lmul_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ mov a9, a6
+ ssr a8
+ bgeui a8, 32, .Lmul_bigshift
+
+ /* Shift xh/xl right. Any bits that are shifted out of xl are saved
+ in a6 (combined with the shifted-out bits currently in a6) for
+ rounding the result. */
+ sll a6, xl
+ src xl, xh, xl
+ srl xh, xh
+ j 1f
+
+.Lmul_bigshift:
+ bgeui a8, 64, .Lmul_flush_to_zero
+ sll a10, xl /* lost bits shifted out of xl */
+ src a6, xh, xl
+ srl xl, xh
+ movi xh, 0
+ or a9, a9, a10
+
+ /* Set the exponent to zero. */
+1: movi a8, 0
+
+ /* Pack any nonzero bits shifted out into a6. */
+ beqz a9, .Lmul_round
+ movi a9, 1
+ or a6, a6, a9
+ j .Lmul_round
+
+.Lmul_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli xh, a7, 31
+ slli xh, xh, 31
+ movi xl, 0
+ j .Lmul_done
+
+#if XCHAL_NO_MUL
+
+ /* For Xtensa processors with no multiply hardware, this simplified
+ version of _mulsi3 is used for multiplying 16-bit chunks of
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
+ .align 4
+.Lmul_mulsi3:
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_muldf3 */
+
+#ifdef L_divdf3
+
+ /* Division */
+__divdf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Ldiv_yexpzero:
+ /* Clear the sign bit of y. */
+ slli yh, yh, 1
+ srli yh, yh, 1
+
+ /* Check for division by zero. */
+ or a10, yh, yl
+ beqz a10, .Ldiv_yzero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ beqz yh, .Ldiv_yh_zero
+ do_nsau a10, yh, a11, a9
+ addi a10, a10, -11
+ ssl a10
+ src yh, yh, yl
+ sll yl, yl
+ movi a9, 1
+ sub a9, a9, a10
+ j .Ldiv_ynormalized
+.Ldiv_yh_zero:
+ do_nsau a10, yl, a11, a9
+ addi a10, a10, -11
+ movi a9, -31
+ sub a9, a9, a10
+ ssl a10
+ bltz a10, .Ldiv_yl_srl
+ sll yh, yl
+ movi yl, 0
+ j .Ldiv_ynormalized
+.Ldiv_yl_srl:
+ srl yh, yl
+ sll yl, yl
+ j .Ldiv_ynormalized
+
+.Ldiv_yzero:
+ /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
+ slli xh, xh, 1
+ srli xh, xh, 1
+ or xl, xl, xh
+ srli xh, a7, 31
+ slli xh, xh, 31
+ or xh, xh, a6
+ bnez xl, 1f
+ movi a4, 0x80000 /* make it a quiet NaN */
+ or xh, xh, a4
+1: movi xl, 0
+ leaf_return
+
+.Ldiv_xexpzero:
+ /* Clear the sign bit of x. */
+ slli xh, xh, 1
+ srli xh, xh, 1
+
+ /* If x is zero, return zero. */
+ or a10, xh, xl
+ beqz a10, .Ldiv_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ beqz xh, .Ldiv_xh_zero
+ do_nsau a10, xh, a11, a8
+ addi a10, a10, -11
+ ssl a10
+ src xh, xh, xl
+ sll xl, xl
+ movi a8, 1
+ sub a8, a8, a10
+ j .Ldiv_xnormalized
+.Ldiv_xh_zero:
+ do_nsau a10, xl, a11, a8
+ addi a10, a10, -11
+ movi a8, -31
+ sub a8, a8, a10
+ ssl a10
+ bltz a10, .Ldiv_xl_srl
+ sll xh, xl
+ movi xl, 0
+ j .Ldiv_xnormalized
+.Ldiv_xl_srl:
+ srl xh, xl
+ sll xl, xl
+ j .Ldiv_xnormalized
+
+.Ldiv_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli xh, a7, 31
+ slli xh, xh, 31
+ movi xl, 0
+ leaf_return
+
+.Ldiv_xnan_or_inf:
+ /* Set the sign bit of the result. */
+ srli a7, yh, 31
+ slli a7, a7, 31
+ xor xh, xh, a7
+ /* If y is NaN or Inf, return NaN. */
+ bnall yh, a6, 1f
+ movi a4, 0x80000 /* make it a quiet NaN */
+ or xh, xh, a4
+1: leaf_return
+
+.Ldiv_ynan_or_inf:
+ /* If y is Infinity, return zero. */
+ slli a8, yh, 12
+ or a8, a8, yl
+ beqz a8, .Ldiv_return_zero
+ /* y is NaN; return it. */
+ mov xh, yh
+ mov xl, yl
+ leaf_return
+
+.Ldiv_highequal1:
+ bltu xl, yl, 2f
+ j 3f
+
+ .align 4
+ .global __divdf3
+ .type __divdf3, @function
+__divdf3:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+
+ /* Get the sign of the result. */
+ xor a7, xh, yh
+
+ /* Check for NaN and infinity. */
+ ball xh, a6, .Ldiv_xnan_or_inf
+ ball yh, a6, .Ldiv_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, xh, 20, 11
+ extui a9, yh, 20, 11
+
+ beqz a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:
+ beqz a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:
+
+ /* Subtract the exponents. */
+ sub a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0x1fffff
+ or xh, xh, a6
+ and xh, xh, a10
+ or yh, yh, a6
+ and yh, yh, a10
+
+ /* Set SAR for left shift by one. */
+ ssai (32 - 1)
+
+ /* The first digit of the mantissa division must be a one.
+ Shift x (and adjust the exponent) as needed to make this true. */
+ bltu yh, xh, 3f
+ beq yh, xh, .Ldiv_highequal1
+2: src xh, xh, xl
+ sll xl, xl
+ addi a8, a8, -1
+3:
+ /* Do the first subtraction and shift. */
+ sub xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, -1
+1: sub xl, xl, yl
+ src xh, xh, xl
+ sll xl, xl
+
+ /* Put the quotient into a10/a11. */
+ movi a10, 0
+ movi a11, 1
+
+ /* Divide one bit at a time for 52 bits. */
+ movi a9, 52
+#if XCHAL_HAVE_LOOPS
+ loop a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+ /* Shift the quotient << 1. */
+ src a10, a10, a11
+ sll a11, a11
+
+ /* Is this digit a 0 or 1? */
+ bltu xh, yh, 3f
+ beq xh, yh, .Ldiv_highequal2
+
+ /* Output a 1 and subtract. */
+2: addi a11, a11, 1
+ sub xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, -1
+1: sub xl, xl, yl
+
+ /* Shift the dividend << 1. */
+3: src xh, xh, xl
+ sll xl, xl
+
+#if !XCHAL_HAVE_LOOPS
+ addi a9, a9, -1
+ bnez a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+ /* Add the exponent bias (less one to account for the explicit "1.0"
+ of the mantissa that will be added to the exponent in the final
+ result). */
+ movi a9, 0x3fe
+ add a8, a8, a9
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..7fd are OK here. */
+ addmi a9, a9, 0x400 /* 0x7fe */
+ bgeu a8, a9, .Ldiv_overflow
+
+.Ldiv_round:
+ /* Round. The remainder (<< 1) is in xh/xl. */
+ bltu xh, yh, .Ldiv_rounded
+ beq xh, yh, .Ldiv_highequal3
+.Ldiv_roundup:
+ addi a11, a11, 1
+ beqz a11, .Ldiv_roundcarry
+
+.Ldiv_rounded:
+ mov xl, a11
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 20
+ add xh, a10, a8
+
+.Ldiv_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or xh, xh, a7
+ leaf_return
+
+.Ldiv_highequal2:
+ bgeu xl, yl, 2b
+ j 3b
+
+.Ldiv_highequal3:
+ bltu xl, yl, .Ldiv_rounded
+ bne xl, yl, .Ldiv_roundup
+
+ /* Remainder is exactly half the divisor. Round even. */
+ addi a11, a11, 1
+ beqz a11, .Ldiv_roundcarry
+ srli a11, a11, 1
+ slli a11, a11, 1
+ j .Ldiv_rounded
+
+.Ldiv_overflow:
+ bltz a8, .Ldiv_underflow
+ /* Return +/- Infinity. */
+ addi a8, a9, 1 /* 0x7ff */
+ slli xh, a8, 20
+ movi xl, 0
+ j .Ldiv_addsign
+
+.Ldiv_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ ssr a8
+ bgeui a8, 32, .Ldiv_bigshift
+
+ /* Shift a10/a11 right. Any bits that are shifted out of a11 are
+ saved in a6 for rounding the result. */
+ sll a6, a11
+ src a11, a10, a11
+ srl a10, a10
+ j 1f
+
+.Ldiv_bigshift:
+ bgeui a8, 64, .Ldiv_flush_to_zero
+ sll a9, a11 /* lost bits shifted out of a11 */
+ src a6, a10, a11
+ srl a11, a10
+ movi a10, 0
+ or xl, xl, a9
+
+ /* Set the exponent to zero. */
+1: movi a8, 0
+
+ /* Pack any nonzero remainder (in xh/xl) into a6. */
+ or xh, xh, xl
+ beqz xh, 1f
+ movi a9, 1
+ or a6, a6, a9
+
+ /* Round a10/a11 based on the bits shifted out into a6. */
+1: bgez a6, .Ldiv_rounded
+ addi a11, a11, 1
+ beqz a11, .Ldiv_roundcarry
+ slli a6, a6, 1
+ bnez a6, .Ldiv_rounded
+ srli a11, a11, 1
+ slli a11, a11, 1
+ j .Ldiv_rounded
+
+.Ldiv_roundcarry:
+ /* a11 is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi a10, a10, 1
+ /* Overflow to the exponent field is OK. */
+ j .Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli xh, a7, 31
+ slli xh, xh, 31
+ movi xl, 0
+ leaf_return
+
+#endif /* L_divdf3 */
+
+#ifdef L_cmpdf2
+
+ /* Equal and Not Equal */
+
+ .align 4
+ .global __eqdf2
+ .global __nedf2
+ .set __nedf2, __eqdf2
+ .type __eqdf2, @function
+__eqdf2:
+ leaf_entry sp, 16
+ bne xl, yl, 2f
+ bne xh, yh, 4f
+
+ /* The values are equal but NaN != NaN. Check the exponent. */
+ movi a6, 0x7ff00000
+ ball xh, a6, 3f
+
+ /* Equal. */
+ movi a2, 0
+ leaf_return
+
+ /* Not equal. */
+2: movi a2, 1
+ leaf_return
+
+ /* Check if the mantissas are nonzero. */
+3: slli a7, xh, 12
+ or a7, a7, xl
+ j 5f
+
+ /* Check if x and y are zero with different signs. */
+4: or a7, xh, yh
+ slli a7, a7, 1
+ or a7, a7, xl /* xl == yl here */
+
+ /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+ or x when exponent(x) = 0x7ff and x == y. */
+5: movi a2, 0
+ movi a3, 1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than */
+
+ .align 4
+ .global __gtdf2
+ .type __gtdf2, @function
+__gtdf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 2f
+1: bnall yh, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, .Lle_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+
+ /* Less Than or Equal */
+
+ .align 4
+ .global __ledf2
+ .type __ledf2, @function
+__ledf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 2f
+1: bnall yh, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, .Lle_cmp
+ movi a2, 1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+.Lle_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, xh, yh
+ bltz a7, .Lle_diff_signs
+
+ /* Check if x is negative. */
+ bltz xh, .Lle_xneg
+
+ /* Check if x <= y. */
+ bltu xh, yh, 4f
+ bne xh, yh, 5f
+ bltu yl, xl, 5f
+4: movi a2, 0
+ leaf_return
+
+.Lle_xneg:
+ /* Check if y <= x. */
+ bltu yh, xh, 4b
+ bne yh, xh, 5f
+ bgeu xl, yl, 4b
+5: movi a2, 1
+ leaf_return
+
+.Lle_diff_signs:
+ bltz xh, 4b
+
+ /* Check if both x and y are zero. */
+ or a7, xh, yh
+ slli a7, a7, 1
+ or a7, a7, xl
+ or a7, a7, yl
+ movi a2, 1
+ movi a3, 0
+ moveqz a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than or Equal */
+
+ .align 4
+ .global __gedf2
+ .type __gedf2, @function
+__gedf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 2f
+1: bnall yh, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, .Llt_cmp
+ movi a2, -1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, -1
+ leaf_return
+
+
+ /* Less Than */
+
+ .align 4
+ .global __ltdf2
+ .type __ltdf2, @function
+__ltdf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 2f
+1: bnall yh, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, .Llt_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+.Llt_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, xh, yh
+ bltz a7, .Llt_diff_signs
+
+ /* Check if x is negative. */
+ bltz xh, .Llt_xneg
+
+ /* Check if x < y. */
+ bltu xh, yh, 4f
+ bne xh, yh, 5f
+ bgeu xl, yl, 5f
+4: movi a2, -1
+ leaf_return
+
+.Llt_xneg:
+ /* Check if y < x. */
+ bltu yh, xh, 4b
+ bne yh, xh, 5f
+ bltu yl, xl, 4b
+5: movi a2, 0
+ leaf_return
+
+.Llt_diff_signs:
+ bgez xh, 5b
+
+ /* Check if both x and y are nonzero. */
+ or a7, xh, yh
+ slli a7, a7, 1
+ or a7, a7, xl
+ or a7, a7, yl
+ movi a2, 0
+ movi a3, -1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Unordered */
+
+ .align 4
+ .global __unorddf2
+ .type __unorddf2, @function
+__unorddf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 3f
+1: ball yh, a6, 4f
+2: movi a2, 0
+ leaf_return
+
+3: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+4: slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, 2b
+ movi a2, 1
+ leaf_return
+
+#endif /* L_cmpdf2 */
+
+#ifdef L_fixdfsi
+
+ .align 4
+ .global __fixdfsi
+ .type __fixdfsi, @function
+__fixdfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7ff00000
+ ball xh, a6, .Lfixdfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */
+ extui a4, xh, 20, 11
+ extui a5, a6, 19, 10 /* 0x3fe */
+ sub a4, a4, a5
+ bgei a4, 32, .Lfixdfsi_maxint
+ blti a4, 1, .Lfixdfsi_zero
+
+ /* Add explicit "1.0" and shift << 11. */
+ or a7, xh, a6
+ ssai (32 - 11)
+ src a5, a7, xl
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixdfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, xh, 12
+ or a4, a4, xl
+ beqz a4, .Lfixdfsi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi xh, 0
+
+.Lfixdfsi_maxint:
+ slli a4, a6, 11 /* 0x80000000 */
+ addi a5, a4, -1 /* 0x7fffffff */
+ movgez a4, a5, xh
+ mov a2, a4
+ leaf_return
+
+.Lfixdfsi_zero:
+ movi a2, 0
+ leaf_return
+
+#endif /* L_fixdfsi */
+
+#ifdef L_fixdfdi
+
+ .align 4
+ .global __fixdfdi
+ .type __fixdfdi, @function
+__fixdfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7ff00000
+ ball xh, a6, .Lfixdfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */
+ extui a4, xh, 20, 11
+ extui a5, a6, 19, 10 /* 0x3fe */
+ sub a4, a4, a5
+ bgei a4, 64, .Lfixdfdi_maxint
+ blti a4, 1, .Lfixdfdi_zero
+
+ /* Add explicit "1.0" and shift << 11. */
+ or a7, xh, a6
+ ssai (32 - 11)
+ src xh, a7, xl
+ sll xl, xl
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixdfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixdfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixdfdi_smallshift:
+ src xl, xh, xl
+ srl xh, xh
+ j .Lfixdfdi_shifted
+
+.Lfixdfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, xh, 12
+ or a4, a4, xl
+ beqz a4, .Lfixdfdi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi xh, 0
+
+.Lfixdfdi_maxint:
+ slli a7, a6, 11 /* 0x80000000 */
+ bgez xh, 1f
+ mov xh, a7
+ movi xl, 0
+ leaf_return
+
+1: addi xh, a7, -1 /* 0x7fffffff */
+ movi xl, -1
+ leaf_return
+
+.Lfixdfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+#endif /* L_fixdfdi */
+
+#ifdef L_fixunsdfsi
+
+ .align 4
+ .global __fixunsdfsi
+ .type __fixunsdfsi, @function
+__fixunsdfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7ff00000
+ ball xh, a6, .Lfixunsdfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */
+ extui a4, xh, 20, 11
+ extui a5, a6, 20, 10 /* 0x3ff */
+ sub a4, a4, a5
+ bgei a4, 32, .Lfixunsdfsi_maxint
+ bltz a4, .Lfixunsdfsi_zero
+
+ /* Add explicit "1.0" and shift << 11. */
+ or a7, xh, a6
+ ssai (32 - 11)
+ src a5, a7, xl
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 32, .Lfixunsdfsi_bigexp
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixunsdfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, xh, 12
+ or a4, a4, xl
+ beqz a4, .Lfixunsdfsi_maxint
+
+ /* Translate NaN to 0xffffffff. */
+ movi a2, -1
+ leaf_return
+
+.Lfixunsdfsi_maxint:
+ slli a4, a6, 11 /* 0x80000000 */
+ movi a5, -1 /* 0xffffffff */
+ movgez a4, a5, xh
+ mov a2, a4
+ leaf_return
+
+.Lfixunsdfsi_zero:
+ movi a2, 0
+ leaf_return
+
+.Lfixunsdfsi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz xh, 1f
+ mov a2, a5 /* no shift needed */
+ leaf_return
+
+ /* Return 0x80000000 if negative. */
+1: slli a2, a6, 11
+ leaf_return
+
+#endif /* L_fixunsdfsi */
+
+#ifdef L_fixunsdfdi
+
+ .align 4
+ .global __fixunsdfdi
+ .type __fixunsdfdi, @function
+__fixunsdfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7ff00000
+ ball xh, a6, .Lfixunsdfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */
+ extui a4, xh, 20, 11
+ extui a5, a6, 20, 10 /* 0x3ff */
+ sub a4, a4, a5
+ bgei a4, 64, .Lfixunsdfdi_maxint
+ bltz a4, .Lfixunsdfdi_zero
+
+ /* Add explicit "1.0" and shift << 11. */
+ or a7, xh, a6
+ ssai (32 - 11)
+ src xh, a7, xl
+ sll xl, xl
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 64, .Lfixunsdfdi_bigexp
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixunsdfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixunsdfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixunsdfdi_smallshift:
+ src xl, xh, xl
+ srl xh, xh
+ j .Lfixunsdfdi_shifted
+
+.Lfixunsdfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, xh, 12
+ or a4, a4, xl
+ beqz a4, .Lfixunsdfdi_maxint
+
+ /* Translate NaN to 0xffffffff.... */
+1: movi xh, -1
+ movi xl, -1
+ leaf_return
+
+.Lfixunsdfdi_maxint:
+ bgez xh, 1b
+2: slli xh, a6, 11 /* 0x80000000 */
+ movi xl, 0
+ leaf_return
+
+.Lfixunsdfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+.Lfixunsdfdi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz a7, 2b
+ leaf_return /* no shift needed */
+
+#endif /* L_fixunsdfdi */
+
+#ifdef L_floatsidf
+
+ .align 4
+ .global __floatunsidf
+ .type __floatunsidf, @function
+__floatunsidf:
+ leaf_entry sp, 16
+ beqz a2, .Lfloatsidf_return_zero
+
+ /* Set the sign to zero and jump to the floatsidf code. */
+ movi a7, 0
+ j .Lfloatsidf_normalize
+
+ .align 4
+ .global __floatsidf
+ .type __floatsidf, @function
+__floatsidf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ beqz a2, .Lfloatsidf_return_zero
+
+ /* Save the sign. */
+ extui a7, a2, 31, 1
+
+ /* Get the absolute value. */
+#if XCHAL_HAVE_ABS
+ abs a2, a2
+#else
+ neg a4, a2
+ movltz a2, a4, a2
+#endif
+
+.Lfloatsidf_normalize:
+ /* Normalize with the first 1 bit in the msb. */
+ do_nsau a4, a2, a5, a6
+ ssl a4
+ sll a5, a2
+
+ /* Shift the mantissa into position. */
+ srli xh, a5, 11
+ slli xl, a5, (32 - 11)
+
+ /* Set the exponent. */
+ movi a5, 0x41d /* 0x3fe + 31 */
+ sub a5, a5, a4
+ slli a5, a5, 20
+ add xh, xh, a5
+
+ /* Add the sign and return. */
+ slli a7, a7, 31
+ or xh, xh, a7
+ leaf_return
+
+.Lfloatsidf_return_zero:
+ movi a3, 0
+ leaf_return
+
+#endif /* L_floatsidf */
+
+#ifdef L_floatdidf
+
+ .align 4
+ .global __floatundidf
+ .type __floatundidf, @function
+__floatundidf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Set the sign to zero and jump to the floatdidf code. */
+ movi a7, 0
+ j .Lfloatdidf_normalize
+
+ .align 4
+ .global __floatdidf
+ .type __floatdidf, @function
+__floatdidf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Save the sign. */
+ extui a7, xh, 31, 1
+
+ /* Get the absolute value. */
+ bgez xh, .Lfloatdidf_normalize
+ neg xl, xl
+ neg xh, xh
+ beqz xl, .Lfloatdidf_normalize
+ addi xh, xh, -1
+
+.Lfloatdidf_normalize:
+ /* Normalize with the first 1 bit in the msb of xh. */
+ beqz xh, .Lfloatdidf_bigshift
+ do_nsau a4, xh, a5, a6
+ ssl a4
+ src xh, xh, xl
+ sll xl, xl
+
+.Lfloatdidf_shifted:
+ /* Shift the mantissa into position, with rounding bits in a6. */
+ ssai 11
+ sll a6, xl
+ src xl, xh, xl
+ srl xh, xh
+
+ /* Set the exponent. */
+ movi a5, 0x43d /* 0x3fe + 63 */
+ sub a5, a5, a4
+ slli a5, a5, 20
+ add xh, xh, a5
+
+ /* Add the sign. */
+ slli a7, a7, 31
+ or xh, xh, a7
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a6, 2f
+ addi xl, xl, 1
+ beqz xl, .Lfloatdidf_roundcarry
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a6, a6, 1
+ beqz a6, .Lfloatdidf_exactlyhalf
+2: leaf_return
+
+.Lfloatdidf_bigshift:
+ /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
+ do_nsau a4, xl, a5, a6
+ ssl a4
+ sll xh, xl
+ movi xl, 0
+ addi a4, a4, 32
+ j .Lfloatdidf_shifted
+
+.Lfloatdidf_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli xl, xl, 1
+ slli xl, xl, 1
+ leaf_return
+
+.Lfloatdidf_roundcarry:
+ /* xl is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi xh, xh, 1
+ /* Overflow to the exponent is OK. */
+ leaf_return
+
+#endif /* L_floatdidf */
+
+#ifdef L_truncdfsf2
+
+ .align 4
+ .global __truncdfsf2
+ .type __truncdfsf2, @function
+__truncdfsf2:
+ leaf_entry sp, 16
+
+ /* Adjust the exponent bias. */
+ movi a4, (0x3ff - 0x7f) << 20
+ sub a5, xh, a4
+
+ /* Check for underflow. */
+ xor a6, xh, a5
+ bltz a6, .Ltrunc_underflow
+ extui a6, a5, 20, 11
+ beqz a6, .Ltrunc_underflow
+
+ /* Check for overflow. */
+ movi a4, 255
+ bge a6, a4, .Ltrunc_overflow
+
+ /* Shift a5/xl << 3 into a5/a4. */
+ ssai (32 - 3)
+ src a5, a5, xl
+ sll a4, xl
+
+.Ltrunc_addsign:
+ /* Add the sign bit. */
+ extui a6, xh, 31, 1
+ slli a6, a6, 31
+ or a2, a6, a5
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a4, 1f
+ addi a2, a2, 1
+ /* Overflow to the exponent is OK. The answer will be correct. */
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a4, a4, 1
+ beqz a4, .Ltrunc_exactlyhalf
+1: leaf_return
+
+.Ltrunc_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+.Ltrunc_overflow:
+ /* Check if exponent == 0x7ff. */
+ movi a4, 0x7ff00000
+ bnall xh, a4, 1f
+
+ /* Check if mantissa is nonzero. */
+ slli a5, xh, 12
+ or a5, a5, xl
+ beqz a5, 1f
+
+ /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */
+ srli a4, a4, 1
+
+1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */
+ /* Add the sign bit. */
+ extui a6, xh, 31, 1
+ ssai 1
+ src a2, a6, a4
+ leaf_return
+
+.Ltrunc_underflow:
+ /* Find shift count for a subnormal. Flush to zero if >= 32. */
+ extui a6, xh, 20, 11
+ movi a5, 0x3ff - 0x7f
+ sub a6, a5, a6
+ addi a6, a6, 1
+ bgeui a6, 32, 1f
+
+ /* Replace the exponent with an explicit "1.0". */
+ slli a5, a5, 13 /* 0x700000 */
+ or a5, a5, xh
+ slli a5, a5, 11
+ srli a5, a5, 11
+
+ /* Shift the mantissa left by 3 bits (into a5/a4). */
+ ssai (32 - 3)
+ src a5, a5, xl
+ sll a4, xl
+
+ /* Shift right by a6. */
+ ssr a6
+ sll a7, a4
+ src a4, a5, a4
+ srl a5, a5
+ beqz a7, .Ltrunc_addsign
+ or a4, a4, a6 /* any positive, nonzero value will work */
+ j .Ltrunc_addsign
+
+ /* Return +/- zero. */
+1: extui a2, xh, 31, 1
+ slli a2, a2, 31
+ leaf_return
+
+#endif /* L_truncdfsf2 */
+
+#ifdef L_extendsfdf2
+
+ .align 4
+ .global __extendsfdf2
+ .type __extendsfdf2, @function
+__extendsfdf2:
+ leaf_entry sp, 16
+
+ /* Save the sign bit and then shift it off. */
+ extui a5, a2, 31, 1
+ slli a5, a5, 31
+ slli a4, a2, 1
+
+ /* Extract and check the exponent. */
+ extui a6, a2, 23, 8
+ beqz a6, .Lextend_expzero
+ addi a6, a6, 1
+ beqi a6, 256, .Lextend_nan_or_inf
+
+ /* Shift >> 3 into a4/xl. */
+ srli a4, a4, 4
+ slli xl, a2, (32 - 3)
+
+ /* Adjust the exponent bias. */
+ movi a6, (0x3ff - 0x7f) << 20
+ add a4, a4, a6
+
+ /* Add the sign bit. */
+ or xh, a4, a5
+ leaf_return
+
+.Lextend_nan_or_inf:
+ movi a4, 0x7ff00000
+
+ /* Check for NaN. */
+ slli a7, a2, 9
+ beqz a7, 1f
+
+ slli a6, a6, 11 /* 0x80000 */
+ or a4, a4, a6
+
+ /* Add the sign and return. */
+1: or xh, a4, a5
+ movi xl, 0
+ leaf_return
+
+.Lextend_expzero:
+ beqz a4, 1b
+
+ /* Normalize it to have 8 zero bits before the first 1 bit. */
+ do_nsau a7, a4, a2, a3
+ addi a7, a7, -8
+ ssl a7
+ sll a4, a4
+
+ /* Shift >> 3 into a4/xl. */
+ slli xl, a4, (32 - 3)
+ srli a4, a4, 3
+
+ /* Set the exponent. */
+ movi a6, 0x3fe - 0x7f
+ sub a6, a6, a7
+ slli a6, a6, 20
+ add a4, a4, a6
+
+ /* Add the sign and return. */
+ or xh, a4, a5
+ leaf_return
+
+#endif /* L_extendsfdf2 */
+
+
diff --git a/gcc-4.9/libgcc/config/xtensa/ieee754-sf.S b/gcc-4.9/libgcc/config/xtensa/ieee754-sf.S
new file mode 100644
index 000000000..e96785c3f
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/ieee754-sf.S
@@ -0,0 +1,1757 @@
+/* IEEE-754 single-precision functions for Xtensa
+ Copyright (C) 2006-2014 Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/* Warning! The branch displacements for some Xtensa branch instructions
+ are quite small, and this code has been carefully laid out to keep
+ branch targets in range. If you change anything, be sure to check that
+ the assembler is not relaxing anything to branch over a jump. */
+
+#ifdef L_negsf2
+
+ .align 4
+ .global __negsf2
+ .type __negsf2, @function
+__negsf2:
+ leaf_entry sp, 16
+ movi a4, 0x80000000
+ xor a2, a2, a4
+ leaf_return
+
+#endif /* L_negsf2 */
+
+#ifdef L_addsubsf3
+
+ /* Addition */
+__addsf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Ladd_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall a3, a6, 1f
+ /* If x is a NaN, return it. Otherwise, return y. */
+ slli a7, a2, 9
+ beqz a7, .Ladd_ynan_or_inf
+1: leaf_return
+
+.Ladd_ynan_or_inf:
+ /* Return y. */
+ mov a2, a3
+ leaf_return
+
+.Ladd_opposite_signs:
+ /* Operand signs differ. Do a subtraction. */
+ slli a7, a6, 8
+ xor a3, a3, a7
+ j .Lsub_same_sign
+
+ .align 4
+ .global __addsf3
+ .type __addsf3, @function
+__addsf3:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, a2, a3
+ bltz a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:
+ /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
+ ball a2, a6, .Ladd_xnan_or_inf
+ ball a3, a6, .Ladd_ynan_or_inf
+
+ /* Compare the exponents. The smaller operand will be shifted
+ right by the exponent difference and added to the larger
+ one. */
+ extui a7, a2, 23, 9
+ extui a8, a3, 23, 9
+ bltu a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone a3, a6, .Ladd_yexpzero
+
+ /* Replace y sign/exponent with 0x008. */
+ or a3, a3, a6
+ slli a3, a3, 8
+ srli a3, a3, 8
+
+.Ladd_yexpdiff:
+ /* Compute the exponent difference. */
+ sub a10, a7, a8
+
+ /* Exponent difference > 32 -- just return the bigger value. */
+ bgeui a10, 32, 1f
+
+ /* Shift y right by the exponent difference. Any bits that are
+ shifted out of y are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, a3, a9
+ srl a3, a3
+
+ /* Do the addition. */
+ add a2, a2, a3
+
+ /* Check if the add overflowed into the exponent. */
+ extui a10, a2, 23, 9
+ beq a10, a7, .Ladd_round
+ mov a8, a7
+ j .Ladd_carry
+
+.Ladd_yexpzero:
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0", and increment the apparent exponent
+ because subnormals behave as if they had the minimum (nonzero)
+ exponent. Test for the case when both exponents are zero. */
+ slli a3, a3, 9
+ srli a3, a3, 9
+ bnone a2, a6, .Ladd_bothexpzero
+ addi a8, a8, 1
+ j .Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+ /* Both exponents are zero. Handle this as a special case. There
+ is no need to shift or round, and the normal code for handling
+ a carry into the exponent field will not work because it
+ assumes there is an implicit "1.0" that needs to be added. */
+ add a2, a2, a3
+1: leaf_return
+
+.Ladd_xexpzero:
+ /* Same as "yexpzero" except skip handling the case when both
+ exponents are zero. */
+ slli a2, a2, 9
+ srli a2, a2, 9
+ addi a7, a7, 1
+ j .Ladd_xexpdiff
+
+.Ladd_shiftx:
+ /* Same thing as the "shifty" code, but with x and y swapped. Also,
+ because the exponent difference is always nonzero in this version,
+ the shift sequence can use SLL and skip loading a constant zero. */
+ bnone a2, a6, .Ladd_xexpzero
+
+ or a2, a2, a6
+ slli a2, a2, 8
+ srli a2, a2, 8
+
+.Ladd_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Ladd_returny
+
+ ssr a10
+ sll a9, a2
+ srl a2, a2
+
+ add a2, a2, a3
+
+ /* Check if the add overflowed into the exponent. */
+ extui a10, a2, 23, 9
+ bne a10, a8, .Ladd_carry
+
+.Ladd_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi a2, a2, 1
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_returny:
+ mov a2, a3
+ leaf_return
+
+.Ladd_carry:
+ /* The addition has overflowed into the exponent field, so the
+ value needs to be renormalized. The mantissa of the result
+ can be recovered by subtracting the original exponent and
+ adding 0x800000 (which is the explicit "1.0" for the
+ mantissa of the non-shifted operand -- the "1.0" for the
+ shifted operand was already added). The mantissa can then
+ be shifted right by one bit. The explicit "1.0" of the
+ shifted mantissa then needs to be replaced by the exponent,
+ incremented by one to account for the normalizing shift.
+ It is faster to combine these operations: do the shift first
+ and combine the additions and subtractions. If x is the
+ original exponent, the result is:
+ shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
+ or:
+ shifted mantissa + ((x + 1) << 22)
+ Note that the exponent is incremented here by leaving the
+ explicit "1.0" of the mantissa in the exponent field. */
+
+ /* Shift x right by one bit. Save the lsb. */
+ mov a10, a2
+ srli a2, a2, 1
+
+ /* See explanation above. The original exponent is in a8. */
+ addi a8, a8, 1
+ slli a8, a8, 22
+ add a2, a2, a8
+
+ /* Return an Infinity if the exponent overflowed. */
+ ball a2, a6, .Ladd_infinity
+
+ /* Same thing as the "round" code except the msb of the leftover
+ fraction is bit 0 of a10, with the rest of the fraction in a9. */
+ bbci.l a10, 0, 1f
+ addi a2, a2, 1
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_infinity:
+ /* Clear the mantissa. */
+ srli a2, a2, 23
+ slli a2, a2, 23
+
+ /* The sign bit may have been lost in a carry-out. Put it back. */
+ slli a8, a8, 1
+ or a2, a2, a8
+ leaf_return
+
+.Ladd_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+
+ /* Subtraction */
+__subsf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Lsub_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall a3, a6, 1f
+ /* Both x and y are either NaN or Inf, so the result is NaN. */
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+1: leaf_return
+
+.Lsub_ynan_or_inf:
+ /* Negate y and return it. */
+ slli a7, a6, 8
+ xor a2, a3, a7
+ leaf_return
+
+.Lsub_opposite_signs:
+ /* Operand signs differ. Do an addition. */
+ slli a7, a6, 8
+ xor a3, a3, a7
+ j .Ladd_same_sign
+
+ .align 4
+ .global __subsf3
+ .type __subsf3, @function
+__subsf3:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, a2, a3
+ bltz a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:
+ /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
+ ball a2, a6, .Lsub_xnan_or_inf
+ ball a3, a6, .Lsub_ynan_or_inf
+
+ /* Compare the operands. In contrast to addition, the entire
+ value matters here. */
+ extui a7, a2, 23, 8
+ extui a8, a3, 23, 8
+ bltu a2, a3, .Lsub_xsmaller
+
+.Lsub_ysmaller:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone a3, a6, .Lsub_yexpzero
+
+ /* Replace y sign/exponent with 0x008. */
+ or a3, a3, a6
+ slli a3, a3, 8
+ srli a3, a3, 8
+
+.Lsub_yexpdiff:
+ /* Compute the exponent difference. */
+ sub a10, a7, a8
+
+ /* Exponent difference > 32 -- just return the bigger value. */
+ bgeui a10, 32, 1f
+
+ /* Shift y right by the exponent difference. Any bits that are
+ shifted out of y are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, a3, a9
+ srl a3, a3
+
+ sub a2, a2, a3
+
+ /* Subtract the leftover bits in a9 from zero and propagate any
+ borrow from a2. */
+ neg a9, a9
+ addi a10, a2, -1
+ movnez a2, a10, a9
+
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, a2, 23, 8
+ beq a10, a7, .Lsub_round
+ j .Lsub_borrow
+
+.Lsub_yexpzero:
+ /* Return zero if the inputs are equal. (For the non-subnormal
+ case, subtracting the "1.0" will cause a borrow from the exponent
+ and this case can be detected when handling the borrow.) */
+ beq a2, a3, .Lsub_return_zero
+
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0". Unless x is also a subnormal, increment
+ y's apparent exponent because subnormals behave as if they had
+ the minimum (nonzero) exponent. */
+ slli a3, a3, 9
+ srli a3, a3, 9
+ bnone a2, a6, .Lsub_yexpdiff
+ addi a8, a8, 1
+ j .Lsub_yexpdiff
+
+.Lsub_returny:
+ /* Negate and return y. */
+ slli a7, a6, 8
+ xor a2, a3, a7
+1: leaf_return
+
+.Lsub_xsmaller:
+ /* Same thing as the "ysmaller" code, but with x and y swapped and
+ with y negated. */
+ bnone a2, a6, .Lsub_xexpzero
+
+ or a2, a2, a6
+ slli a2, a2, 8
+ srli a2, a2, 8
+
+.Lsub_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Lsub_returny
+
+ ssr a10
+ movi a9, 0
+ src a9, a2, a9
+ srl a2, a2
+
+ /* Negate y. */
+ slli a11, a6, 8
+ xor a3, a3, a11
+
+ sub a2, a3, a2
+
+ neg a9, a9
+ addi a10, a2, -1
+ movnez a2, a10, a9
+
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, a2, 23, 8
+ bne a10, a8, .Lsub_borrow
+
+.Lsub_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi a2, a2, 1
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Lsub_exactlyhalf
+1: leaf_return
+
+.Lsub_xexpzero:
+ /* Same as "yexpzero". */
+ beq a2, a3, .Lsub_return_zero
+ slli a2, a2, 9
+ srli a2, a2, 9
+ bnone a3, a6, .Lsub_xexpdiff
+ addi a7, a7, 1
+ j .Lsub_xexpdiff
+
+.Lsub_return_zero:
+ movi a2, 0
+ leaf_return
+
+.Lsub_borrow:
+ /* The subtraction has underflowed into the exponent field, so the
+ value needs to be renormalized. Shift the mantissa left as
+ needed to remove any leading zeros and adjust the exponent
+ accordingly. If the exponent is not large enough to remove
+ all the leading zeros, the result will be a subnormal value. */
+
+ slli a8, a2, 9
+ beqz a8, .Lsub_xzero
+ do_nsau a6, a8, a7, a11
+ srli a8, a8, 9
+ bge a6, a10, .Lsub_subnormal
+ addi a6, a6, 1
+
+.Lsub_normalize_shift:
+ /* Shift the mantissa (a8/a9) left by a6. */
+ ssl a6
+ src a8, a8, a9
+ sll a9, a9
+
+ /* Combine the shifted mantissa with the sign and exponent,
+ decrementing the exponent by a6. (The exponent has already
+ been decremented by one due to the borrow from the subtraction,
+ but adding the mantissa will increment the exponent by one.) */
+ srli a2, a2, 23
+ sub a2, a2, a6
+ slli a2, a2, 23
+ add a2, a2, a8
+ j .Lsub_round
+
+.Lsub_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+.Lsub_xzero:
+ /* If there was a borrow from the exponent, and the mantissa and
+ guard digits are all zero, then the inputs were equal and the
+ result should be zero. */
+ beqz a9, .Lsub_return_zero
+
+ /* Only the guard digit is nonzero. Shift by min(24, a10). */
+ addi a11, a10, -24
+ movi a6, 24
+ movltz a6, a10, a11
+ j .Lsub_normalize_shift
+
+.Lsub_subnormal:
+ /* The exponent is too small to shift away all the leading zeros.
+ Set a6 to the current exponent (which has already been
+ decremented by the borrow) so that the exponent of the result
+ will be zero. Do not add 1 to a6 in this case, because: (1)
+ adding the mantissa will not increment the exponent, so there is
+ no need to subtract anything extra from the exponent to
+ compensate, and (2) the effective exponent of a subnormal is 1
+ not 0 so the shift amount must be 1 smaller than normal. */
+ mov a6, a10
+ j .Lsub_normalize_shift
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_mulsf3
+
+ /* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__mulsf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Lmul_xexpzero:
+ /* Clear the sign bit of x. */
+ slli a2, a2, 1
+ srli a2, a2, 1
+
+ /* If x is zero, return zero. */
+ beqz a2, .Lmul_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ do_nsau a10, a2, a11, a12
+ addi a10, a10, -8
+ ssl a10
+ sll a2, a2
+ movi a8, 1
+ sub a8, a8, a10
+ j .Lmul_xnormalized
+
+.Lmul_yexpzero:
+ /* Clear the sign bit of y. */
+ slli a3, a3, 1
+ srli a3, a3, 1
+
+ /* If y is zero, return zero. */
+ beqz a3, .Lmul_return_zero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ do_nsau a10, a3, a11, a12
+ addi a10, a10, -8
+ ssl a10
+ sll a3, a3
+ movi a9, 1
+ sub a9, a9, a10
+ j .Lmul_ynormalized
+
+.Lmul_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ j .Lmul_done
+
+.Lmul_xnan_or_inf:
+ /* If y is zero, return NaN. */
+ slli a8, a3, 1
+ bnez a8, 1f
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+ j .Lmul_done
+1:
+ /* If y is NaN, return y. */
+ bnall a3, a6, .Lmul_returnx
+ slli a8, a3, 9
+ beqz a8, .Lmul_returnx
+
+.Lmul_returny:
+ mov a2, a3
+
+.Lmul_returnx:
+ /* Set the sign bit and return. */
+ extui a7, a7, 31, 1
+ slli a2, a2, 1
+ ssai 1
+ src a2, a7, a2
+ j .Lmul_done
+
+.Lmul_ynan_or_inf:
+ /* If x is zero, return NaN. */
+ slli a8, a2, 1
+ bnez a8, .Lmul_returny
+ movi a7, 0x400000 /* make it a quiet NaN */
+ or a2, a3, a7
+ j .Lmul_done
+
+ .align 4
+ .global __mulsf3
+ .type __mulsf3, @function
+__mulsf3:
+#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
+ addi sp, sp, -32
+ s32i a12, sp, 16
+ s32i a13, sp, 20
+ s32i a14, sp, 24
+ s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 64
+#else
+ leaf_entry sp, 32
+#endif
+ movi a6, 0x7f800000
+
+ /* Get the sign of the result. */
+ xor a7, a2, a3
+
+ /* Check for NaN and infinity. */
+ ball a2, a6, .Lmul_xnan_or_inf
+ ball a3, a6, .Lmul_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, a2, 23, 8
+ extui a9, a3, 23, 8
+
+ beqz a8, .Lmul_xexpzero
+.Lmul_xnormalized:
+ beqz a9, .Lmul_yexpzero
+.Lmul_ynormalized:
+
+ /* Add the exponents. */
+ add a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0xffffff
+ or a2, a2, a6
+ and a2, a2, a10
+ or a3, a3, a6
+ and a3, a3, a10
+
+ /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
+
+#if XCHAL_HAVE_MUL32_HIGH
+
+ mull a6, a2, a3
+ muluh a2, a2, a3
+
+#else
+
+ /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
+ products. These partial products are:
+
+ 0 xl * yl
+
+ 1 xl * yh
+ 2 xh * yl
+
+ 3 xh * yh
+
+ If using the Mul16 or Mul32 multiplier options, these input
+ chunks must be stored in separate registers. For Mac16, the
+ UMUL.AA.* opcodes can specify that the inputs come from either
+ half of the registers, so there is no need to shift them out
+ ahead of time. If there is no multiply hardware, the 16-bit
+ chunks can be extracted when setting up the arguments to the
+ separate multiply function. */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Calling a separate multiply function will clobber a0 and requires
+ use of a8 as a temporary, so save those values now. (The function
+ uses a custom ABI so nothing else needs to be saved.) */
+ s32i a0, sp, 0
+ s32i a8, sp, 4
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+ /* Get the high halves of the inputs into registers. */
+ srli a2h, a2, 16
+ srli a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+ /* Clear the high halves of the inputs. This does not matter
+ for MUL16 because the high bits are ignored. */
+ extui a2, a2, 0, 16
+ extui a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mul16u dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mull dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+ a period in the definition of do_mul below. These macros are a workaround
+ using underscores instead of periods when doing the concatenation. */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ umul_aa_ ## xhalf ## yhalf xreg, yreg; \
+ rsr dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+ extui dst, src, 0, 16
+#define set_arg_h(dst, src) \
+ srli dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a13, xreg); \
+ set_arg_ ## yhalf (a14, yreg); \
+ call0 .Lmul_mulsi3; \
+ mov dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+ /* Add pp1 and pp2 into a6 with carry-out in a9. */
+ do_mul(a6, a2, l, a3, h) /* pp 1 */
+ do_mul(a11, a2, h, a3, l) /* pp 2 */
+ movi a9, 0
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Shift the high half of a9/a6 into position in a9. Note that
+ this value can be safely incremented without any carry-outs. */
+ ssai 16
+ src a9, a9, a6
+
+ /* Compute the low word into a6. */
+ do_mul(a11, a2, l, a3, l) /* pp 0 */
+ sll a6, a6
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Compute the high word into a2. */
+ do_mul(a2, a2, h, a3, h) /* pp 3 */
+ add a2, a2, a9
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Restore values saved on the stack during the multiplication. */
+ l32i a0, sp, 0
+ l32i a8, sp, 4
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+ /* Shift left by 9 bits, unless there was a carry-out from the
+ multiply, in which case, shift by 8 bits and increment the
+ exponent. */
+ movi a4, 9
+ srli a5, a2, 24 - 9
+ beqz a5, 1f
+ addi a4, a4, -1
+ addi a8, a8, 1
+1: ssl a4
+ src a2, a2, a6
+ sll a6, a6
+
+ /* Subtract the extra bias from the exponent sum (plus one to account
+ for the explicit "1.0" of the mantissa that will be added to the
+ exponent in the final result). */
+ movi a4, 0x80
+ sub a8, a8, a4
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..fd are OK here. */
+ movi a4, 0xfe
+ bgeu a8, a4, .Lmul_overflow
+
+.Lmul_round:
+ /* Round. */
+ bgez a6, .Lmul_rounded
+ addi a2, a2, 1
+ slli a6, a6, 1
+ beqz a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 23
+ add a2, a2, a8
+
+.Lmul_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or a2, a2, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+ l32i a12, sp, 16
+ l32i a13, sp, 20
+ l32i a14, sp, 24
+ l32i a15, sp, 28
+ addi sp, sp, 32
+#endif
+ leaf_return
+
+.Lmul_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ j .Lmul_rounded
+
+.Lmul_overflow:
+ bltz a8, .Lmul_underflow
+ /* Return +/- Infinity. */
+ movi a8, 0xff
+ slli a2, a8, 23
+ j .Lmul_addsign
+
+.Lmul_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ mov a9, a6
+ ssr a8
+ bgeui a8, 32, .Lmul_flush_to_zero
+
+ /* Shift a2 right. Any bits that are shifted out of a2 are saved
+ in a6 (combined with the shifted-out bits currently in a6) for
+ rounding the result. */
+ sll a6, a2
+ srl a2, a2
+
+ /* Set the exponent to zero. */
+ movi a8, 0
+
+ /* Pack any nonzero bits shifted out into a6. */
+ beqz a9, .Lmul_round
+ movi a9, 1
+ or a6, a6, a9
+ j .Lmul_round
+
+.Lmul_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ j .Lmul_done
+
+#if XCHAL_NO_MUL
+
+ /* For Xtensa processors with no multiply hardware, this simplified
+ version of _mulsi3 is used for multiplying 16-bit chunks of
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
+ .align 4
+.Lmul_mulsi3:
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_mulsf3 */
+
+#ifdef L_divsf3
+
+ /* Division */
+__divsf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Ldiv_yexpzero:
+ /* Clear the sign bit of y. */
+ slli a3, a3, 1
+ srli a3, a3, 1
+
+ /* Check for division by zero. */
+ beqz a3, .Ldiv_yzero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ do_nsau a10, a3, a4, a5
+ addi a10, a10, -8
+ ssl a10
+ sll a3, a3
+ movi a9, 1
+ sub a9, a9, a10
+ j .Ldiv_ynormalized
+
+.Ldiv_yzero:
+ /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
+ slli a4, a2, 1
+ srli a4, a4, 1
+ srli a2, a7, 31
+ slli a2, a2, 31
+ or a2, a2, a6
+ bnez a4, 1f
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+1: leaf_return
+
+.Ldiv_xexpzero:
+ /* Clear the sign bit of x. */
+ slli a2, a2, 1
+ srli a2, a2, 1
+
+ /* If x is zero, return zero. */
+ beqz a2, .Ldiv_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ do_nsau a10, a2, a4, a5
+ addi a10, a10, -8
+ ssl a10
+ sll a2, a2
+ movi a8, 1
+ sub a8, a8, a10
+ j .Ldiv_xnormalized
+
+.Ldiv_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ leaf_return
+
+.Ldiv_xnan_or_inf:
+ /* Set the sign bit of the result. */
+ srli a7, a3, 31
+ slli a7, a7, 31
+ xor a2, a2, a7
+ /* If y is NaN or Inf, return NaN. */
+ bnall a3, a6, 1f
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+1: leaf_return
+
+.Ldiv_ynan_or_inf:
+ /* If y is Infinity, return zero. */
+ slli a8, a3, 9
+ beqz a8, .Ldiv_return_zero
+ /* y is NaN; return it. */
+ mov a2, a3
+ leaf_return
+
+ .align 4
+ .global __divsf3
+ .type __divsf3, @function
+__divsf3:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+
+ /* Get the sign of the result. */
+ xor a7, a2, a3
+
+ /* Check for NaN and infinity. */
+ ball a2, a6, .Ldiv_xnan_or_inf
+ ball a3, a6, .Ldiv_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, a2, 23, 8
+ extui a9, a3, 23, 8
+
+ beqz a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:
+ beqz a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:
+
+ /* Subtract the exponents. */
+ sub a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0xffffff
+ or a2, a2, a6
+ and a2, a2, a10
+ or a3, a3, a6
+ and a3, a3, a10
+
+ /* The first digit of the mantissa division must be a one.
+ Shift x (and adjust the exponent) as needed to make this true. */
+ bltu a3, a2, 1f
+ slli a2, a2, 1
+ addi a8, a8, -1
+1:
+ /* Do the first subtraction and shift. */
+ sub a2, a2, a3
+ slli a2, a2, 1
+
+ /* Put the quotient into a10. */
+ movi a10, 1
+
+ /* Divide one bit at a time for 23 bits. */
+ movi a9, 23
+#if XCHAL_HAVE_LOOPS
+ loop a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+ /* Shift the quotient << 1. */
+ slli a10, a10, 1
+
+ /* Is this digit a 0 or 1? */
+ bltu a2, a3, 1f
+
+ /* Output a 1 and subtract. */
+ addi a10, a10, 1
+ sub a2, a2, a3
+
+ /* Shift the dividend << 1. */
+1: slli a2, a2, 1
+
+#if !XCHAL_HAVE_LOOPS
+ addi a9, a9, -1
+ bnez a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+ /* Add the exponent bias (less one to account for the explicit "1.0"
+ of the mantissa that will be added to the exponent in the final
+ result). */
+ addi a8, a8, 0x7e
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..fd are OK here. */
+ movi a4, 0xfe
+ bgeu a8, a4, .Ldiv_overflow
+
+.Ldiv_round:
+ /* Round. The remainder (<< 1) is in a2. */
+ bltu a2, a3, .Ldiv_rounded
+ addi a10, a10, 1
+ beq a2, a3, .Ldiv_exactlyhalf
+
+.Ldiv_rounded:
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 23
+ add a2, a10, a8
+
+.Ldiv_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or a2, a2, a7
+ leaf_return
+
+.Ldiv_overflow:
+ bltz a8, .Ldiv_underflow
+ /* Return +/- Infinity. */
+ addi a8, a4, 1 /* 0xff */
+ slli a2, a8, 23
+ j .Ldiv_addsign
+
+.Ldiv_exactlyhalf:
+ /* Remainder is exactly half the divisor. Round even. */
+ srli a10, a10, 1
+ slli a10, a10, 1
+ j .Ldiv_rounded
+
+.Ldiv_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ ssr a8
+ bgeui a8, 32, .Ldiv_flush_to_zero
+
+ /* Shift a10 right. Any bits that are shifted out of a10 are
+ saved in a6 for rounding the result. */
+ sll a6, a10
+ srl a10, a10
+
+ /* Set the exponent to zero. */
+ movi a8, 0
+
+ /* Pack any nonzero remainder (in a2) into a6. */
+ beqz a2, 1f
+ movi a9, 1
+ or a6, a6, a9
+
+ /* Round a10 based on the bits shifted out into a6. */
+1: bgez a6, .Ldiv_rounded
+ addi a10, a10, 1
+ slli a6, a6, 1
+ bnez a6, .Ldiv_rounded
+ srli a10, a10, 1
+ slli a10, a10, 1
+ j .Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ leaf_return
+
+#endif /* L_divsf3 */
+
+#ifdef L_cmpsf2
+
+ /* Equal and Not Equal */
+
+ .align 4
+ .global __eqsf2
+ .global __nesf2
+ .set __nesf2, __eqsf2
+ .type __eqsf2, @function
+__eqsf2:
+ leaf_entry sp, 16
+ bne a2, a3, 4f
+
+ /* The values are equal but NaN != NaN. Check the exponent. */
+ movi a6, 0x7f800000
+ ball a2, a6, 3f
+
+ /* Equal. */
+ movi a2, 0
+ leaf_return
+
+ /* Not equal. */
+2: movi a2, 1
+ leaf_return
+
+ /* Check if the mantissas are nonzero. */
+3: slli a7, a2, 9
+ j 5f
+
+ /* Check if x and y are zero with different signs. */
+4: or a7, a2, a3
+ slli a7, a7, 1
+
+ /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+ or x when exponent(x) = 0x7f8 and x == y. */
+5: movi a2, 0
+ movi a3, 1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than */
+
+ .align 4
+ .global __gtsf2
+ .type __gtsf2, @function
+__gtsf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Lle_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+
+ /* Less Than or Equal */
+
+ .align 4
+ .global __lesf2
+ .type __lesf2, @function
+__lesf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Lle_cmp
+ movi a2, 1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+.Lle_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, a2, a3
+ bltz a7, .Lle_diff_signs
+
+ /* Check if x is negative. */
+ bltz a2, .Lle_xneg
+
+ /* Check if x <= y. */
+ bltu a3, a2, 5f
+4: movi a2, 0
+ leaf_return
+
+.Lle_xneg:
+ /* Check if y <= x. */
+ bgeu a2, a3, 4b
+5: movi a2, 1
+ leaf_return
+
+.Lle_diff_signs:
+ bltz a2, 4b
+
+ /* Check if both x and y are zero. */
+ or a7, a2, a3
+ slli a7, a7, 1
+ movi a2, 1
+ movi a3, 0
+ moveqz a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than or Equal */
+
+ .align 4
+ .global __gesf2
+ .type __gesf2, @function
+__gesf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Llt_cmp
+ movi a2, -1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, -1
+ leaf_return
+
+
+ /* Less Than */
+
+ .align 4
+ .global __ltsf2
+ .type __ltsf2, @function
+__ltsf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Llt_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+.Llt_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, a2, a3
+ bltz a7, .Llt_diff_signs
+
+ /* Check if x is negative. */
+ bltz a2, .Llt_xneg
+
+ /* Check if x < y. */
+ bgeu a2, a3, 5f
+4: movi a2, -1
+ leaf_return
+
+.Llt_xneg:
+ /* Check if y < x. */
+ bltu a3, a2, 4b
+5: movi a2, 0
+ leaf_return
+
+.Llt_diff_signs:
+ bgez a2, 5b
+
+ /* Check if both x and y are nonzero. */
+ or a7, a2, a3
+ slli a7, a7, 1
+ movi a2, 0
+ movi a3, -1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Unordered */
+
+ .align 4
+ .global __unordsf2
+ .type __unordsf2, @function
+__unordsf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 3f
+1: ball a3, a6, 4f
+2: movi a2, 0
+ leaf_return
+
+3: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+4: slli a7, a3, 9
+ beqz a7, 2b
+ movi a2, 1
+ leaf_return
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_fixsfsi
+
+ .align 4
+ .global __fixsfsi
+ .type __fixsfsi, @function
+__fixsfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixsfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7e
+ bgei a4, 32, .Lfixsfsi_maxint
+ blti a4, 1, .Lfixsfsi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli a5, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixsfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixsfsi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi a2, 0
+
+.Lfixsfsi_maxint:
+ slli a4, a6, 8 /* 0x80000000 */
+ addi a5, a4, -1 /* 0x7fffffff */
+ movgez a4, a5, a2
+ mov a2, a4
+ leaf_return
+
+.Lfixsfsi_zero:
+ movi a2, 0
+ leaf_return
+
+#endif /* L_fixsfsi */
+
+#ifdef L_fixsfdi
+
+ .align 4
+ .global __fixsfdi
+ .type __fixsfdi, @function
+__fixsfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixsfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7e
+ bgei a4, 64, .Lfixsfdi_maxint
+ blti a4, 1, .Lfixsfdi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli xh, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixsfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixsfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixsfdi_smallshift:
+ movi xl, 0
+ sll xl, xh
+ srl xh, xh
+ j .Lfixsfdi_shifted
+
+.Lfixsfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixsfdi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi a2, 0
+
+.Lfixsfdi_maxint:
+ slli a7, a6, 8 /* 0x80000000 */
+ bgez a2, 1f
+ mov xh, a7
+ movi xl, 0
+ leaf_return
+
+1: addi xh, a7, -1 /* 0x7fffffff */
+ movi xl, -1
+ leaf_return
+
+.Lfixsfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+#endif /* L_fixsfdi */
+
+#ifdef L_fixunssfsi
+
+ .align 4
+ .global __fixunssfsi
+ .type __fixunssfsi, @function
+__fixunssfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixunssfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7f
+ bgei a4, 32, .Lfixunssfsi_maxint
+ bltz a4, .Lfixunssfsi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli a5, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 32, .Lfixunssfsi_bigexp
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixunssfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixunssfsi_maxint
+
+ /* Translate NaN to 0xffffffff. */
+ movi a2, -1
+ leaf_return
+
+.Lfixunssfsi_maxint:
+ slli a4, a6, 8 /* 0x80000000 */
+ movi a5, -1 /* 0xffffffff */
+ movgez a4, a5, a2
+ mov a2, a4
+ leaf_return
+
+.Lfixunssfsi_zero:
+ movi a2, 0
+ leaf_return
+
+.Lfixunssfsi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz a2, 1f
+ mov a2, a5 /* no shift needed */
+ leaf_return
+
+ /* Return 0x80000000 if negative. */
+1: slli a2, a6, 8
+ leaf_return
+
+#endif /* L_fixunssfsi */
+
+#ifdef L_fixunssfdi
+
+ .align 4
+ .global __fixunssfdi
+ .type __fixunssfdi, @function
+__fixunssfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixunssfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7f
+ bgei a4, 64, .Lfixunssfdi_maxint
+ bltz a4, .Lfixunssfdi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli xh, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 64, .Lfixunssfdi_bigexp
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixunssfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixunssfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixunssfdi_smallshift:
+ movi xl, 0
+ src xl, xh, xl
+ srl xh, xh
+ j .Lfixunssfdi_shifted
+
+.Lfixunssfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixunssfdi_maxint
+
+ /* Translate NaN to 0xffffffff.... */
+1: movi xh, -1
+ movi xl, -1
+ leaf_return
+
+.Lfixunssfdi_maxint:
+ bgez a2, 1b
+2: slli xh, a6, 8 /* 0x80000000 */
+ movi xl, 0
+ leaf_return
+
+.Lfixunssfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+.Lfixunssfdi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz a7, 2b
+ movi xl, 0
+ leaf_return /* no shift needed */
+
+#endif /* L_fixunssfdi */
+
+#ifdef L_floatsisf
+
+ .align 4
+ .global __floatunsisf
+ .type __floatunsisf, @function
+__floatunsisf:
+ leaf_entry sp, 16
+ beqz a2, .Lfloatsisf_return
+
+ /* Set the sign to zero and jump to the floatsisf code. */
+ movi a7, 0
+ j .Lfloatsisf_normalize
+
+ .align 4
+ .global __floatsisf
+ .type __floatsisf, @function
+__floatsisf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ beqz a2, .Lfloatsisf_return
+
+ /* Save the sign. */
+ extui a7, a2, 31, 1
+
+ /* Get the absolute value. */
+#if XCHAL_HAVE_ABS
+ abs a2, a2
+#else
+ neg a4, a2
+ movltz a2, a4, a2
+#endif
+
+.Lfloatsisf_normalize:
+ /* Normalize with the first 1 bit in the msb. */
+ do_nsau a4, a2, a5, a6
+ ssl a4
+ sll a5, a2
+
+ /* Shift the mantissa into position, with rounding bits in a6. */
+ srli a2, a5, 8
+ slli a6, a5, (32 - 8)
+
+ /* Set the exponent. */
+ movi a5, 0x9d /* 0x7e + 31 */
+ sub a5, a5, a4
+ slli a5, a5, 23
+ add a2, a2, a5
+
+ /* Add the sign. */
+ slli a7, a7, 31
+ or a2, a2, a7
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a6, .Lfloatsisf_return
+ addi a2, a2, 1 /* Overflow to the exponent is OK. */
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a6, a6, 1
+ beqz a6, .Lfloatsisf_exactlyhalf
+
+.Lfloatsisf_return:
+ leaf_return
+
+.Lfloatsisf_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+#endif /* L_floatsisf */
+
+#ifdef L_floatdisf
+
+ .align 4
+ .global __floatundisf
+ .type __floatundisf, @function
+__floatundisf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Set the sign to zero and jump to the floatdisf code. */
+ movi a7, 0
+ j .Lfloatdisf_normalize
+
+ .align 4
+ .global __floatdisf
+ .type __floatdisf, @function
+__floatdisf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Save the sign. */
+ extui a7, xh, 31, 1
+
+ /* Get the absolute value. */
+ bgez xh, .Lfloatdisf_normalize
+ neg xl, xl
+ neg xh, xh
+ beqz xl, .Lfloatdisf_normalize
+ addi xh, xh, -1
+
+.Lfloatdisf_normalize:
+ /* Normalize with the first 1 bit in the msb of xh. */
+ beqz xh, .Lfloatdisf_bigshift
+ do_nsau a4, xh, a5, a6
+ ssl a4
+ src xh, xh, xl
+ sll xl, xl
+
+.Lfloatdisf_shifted:
+ /* Shift the mantissa into position, with rounding bits in a6. */
+ ssai 8
+ sll a5, xl
+ src a6, xh, xl
+ srl xh, xh
+ beqz a5, 1f
+ movi a5, 1
+ or a6, a6, a5
+1:
+ /* Set the exponent. */
+ movi a5, 0xbd /* 0x7e + 63 */
+ sub a5, a5, a4
+ slli a5, a5, 23
+ add a2, xh, a5
+
+ /* Add the sign. */
+ slli a7, a7, 31
+ or a2, a2, a7
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a6, 2f
+ addi a2, a2, 1 /* Overflow to the exponent is OK. */
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a6, a6, 1
+ beqz a6, .Lfloatdisf_exactlyhalf
+2: leaf_return
+
+.Lfloatdisf_bigshift:
+ /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
+ do_nsau a4, xl, a5, a6
+ ssl a4
+ sll xh, xl
+ movi xl, 0
+ addi a4, a4, 32
+ j .Lfloatdisf_shifted
+
+.Lfloatdisf_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+#endif /* L_floatdisf */
diff --git a/gcc-4.9/libgcc/config/xtensa/lib1funcs.S b/gcc-4.9/libgcc/config/xtensa/lib1funcs.S
new file mode 100644
index 000000000..7c16cc850
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/lib1funcs.S
@@ -0,0 +1,844 @@
+/* Assembly functions for the Xtensa version of libgcc1.
+ Copyright (C) 2001-2014 Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "xtensa-config.h"
+
+/* Define macros for the ABS and ADDX* instructions to handle cases
+ where they are not included in the Xtensa processor configuration. */
+
+ .macro do_abs dst, src, tmp
+#if XCHAL_HAVE_ABS
+ abs \dst, \src
+#else
+ neg \tmp, \src
+ movgez \tmp, \src, \src
+ mov \dst, \tmp
+#endif
+ .endm
+
+ .macro do_addx2 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+ addx2 \dst, \as, \at
+#else
+ slli \tmp, \as, 1
+ add \dst, \tmp, \at
+#endif
+ .endm
+
+ .macro do_addx4 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+ addx4 \dst, \as, \at
+#else
+ slli \tmp, \as, 2
+ add \dst, \tmp, \at
+#endif
+ .endm
+
+ .macro do_addx8 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+ addx8 \dst, \as, \at
+#else
+ slli \tmp, \as, 3
+ add \dst, \tmp, \at
+#endif
+ .endm
+
+/* Define macros for leaf function entry and return, supporting either the
+ standard register windowed ABI or the non-windowed call0 ABI. These
+ macros do not allocate any extra stack space, so they only work for
+ leaf functions that do not need to spill anything to the stack. */
+
+ .macro leaf_entry reg, size
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ entry \reg, \size
+#else
+ /* do nothing */
+#endif
+ .endm
+
+ .macro leaf_return
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ retw
+#else
+ ret
+#endif
+ .endm
+
+
+#ifdef L_mulsi3
+ .align 4
+ .global __mulsi3
+ .type __mulsi3, @function
+__mulsi3:
+ leaf_entry sp, 16
+
+#if XCHAL_HAVE_MUL32
+ mull a2, a2, a3
+
+#elif XCHAL_HAVE_MUL16
+ or a4, a2, a3
+ srai a4, a4, 16
+ bnez a4, .LMUL16
+ mul16u a2, a2, a3
+ leaf_return
+.LMUL16:
+ srai a4, a2, 16
+ srai a5, a3, 16
+ mul16u a7, a4, a3
+ mul16u a6, a5, a2
+ mul16u a4, a2, a3
+ add a7, a7, a6
+ slli a7, a7, 16
+ add a2, a7, a4
+
+#elif XCHAL_HAVE_MAC16
+ mul.aa.hl a2, a3
+ mula.aa.lh a2, a3
+ rsr a5, ACCLO
+ umul.aa.ll a2, a3
+ rsr a4, ACCLO
+ slli a5, a5, 16
+ add a2, a4, a5
+
+#else /* !MUL32 && !MUL16 && !MAC16 */
+
+ /* Multiply one bit at a time, but unroll the loop 4x to better
+ exploit the addx instructions and avoid overhead.
+ Peel the first iteration to save a cycle on init. */
+
+ /* Avoid negative numbers. */
+ xor a5, a2, a3 /* Top bit is 1 if one input is negative. */
+ do_abs a3, a3, a6
+ do_abs a2, a2, a6
+
+ /* Swap so the second argument is smaller. */
+ sub a7, a2, a3
+ mov a4, a3
+ movgez a4, a2, a7 /* a4 = max (a2, a3) */
+ movltz a3, a2, a7 /* a3 = min (a2, a3) */
+
+ movi a2, 0
+ extui a6, a3, 0, 1
+ movnez a2, a4, a6
+
+ do_addx2 a7, a4, a2, a7
+ extui a6, a3, 1, 1
+ movnez a2, a7, a6
+
+ do_addx4 a7, a4, a2, a7
+ extui a6, a3, 2, 1
+ movnez a2, a7, a6
+
+ do_addx8 a7, a4, a2, a7
+ extui a6, a3, 3, 1
+ movnez a2, a7, a6
+
+ bgeui a3, 16, .Lmult_main_loop
+ neg a3, a2
+ movltz a2, a3, a5
+ leaf_return
+
+ .align 4
+.Lmult_main_loop:
+ srli a3, a3, 4
+ slli a4, a4, 4
+
+ add a7, a4, a2
+ extui a6, a3, 0, 1
+ movnez a2, a7, a6
+
+ do_addx2 a7, a4, a2, a7
+ extui a6, a3, 1, 1
+ movnez a2, a7, a6
+
+ do_addx4 a7, a4, a2, a7
+ extui a6, a3, 2, 1
+ movnez a2, a7, a6
+
+ do_addx8 a7, a4, a2, a7
+ extui a6, a3, 3, 1
+ movnez a2, a7, a6
+
+ bgeui a3, 16, .Lmult_main_loop
+
+ neg a3, a2
+ movltz a2, a3, a5
+
+#endif /* !MUL32 && !MUL16 && !MAC16 */
+
+ leaf_return
+ .size __mulsi3, . - __mulsi3
+
+#endif /* L_mulsi3 */
+
+
+#ifdef L_umulsidi3
+
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+ .align 4
+ .global __umulsidi3
+ .type __umulsidi3, @function
+__umulsidi3:
+#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
+ addi sp, sp, -32
+ s32i a12, sp, 16
+ s32i a13, sp, 20
+ s32i a14, sp, 24
+ s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 48
+#else
+ leaf_entry sp, 16
+#endif
+
+#ifdef __XTENSA_EB__
+#define wh a2
+#define wl a3
+#else
+#define wh a3
+#define wl a2
+#endif /* __XTENSA_EB__ */
+
+ /* This code is taken from the mulsf3 routine in ieee754-sf.S.
+ See more comments there. */
+
+#if XCHAL_HAVE_MUL32_HIGH
+ mull a6, a2, a3
+ muluh wh, a2, a3
+ mov wl, a6
+
+#else /* ! MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* a0 and a8 will be clobbered by calling the multiply function
+ but a8 is not used here and need not be saved. */
+ s32i a0, sp, 0
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+ /* Get the high halves of the inputs into registers. */
+ srli a2h, a2, 16
+ srli a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+ /* Clear the high halves of the inputs. This does not matter
+ for MUL16 because the high bits are ignored. */
+ extui a2, a2, 0, 16
+ extui a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mul16u dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mull dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+ a period in the definition of do_mul below. These macros are a workaround
+ using underscores instead of periods when doing the concatenation. */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ umul_aa_ ## xhalf ## yhalf xreg, yreg; \
+ rsr dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+ extui dst, src, 0, 16
+#define set_arg_h(dst, src) \
+ srli dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a13, xreg); \
+ set_arg_ ## yhalf (a14, yreg); \
+ call0 .Lmul_mulsi3; \
+ mov dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+ /* Add pp1 and pp2 into a6 with carry-out in a9. */
+ do_mul(a6, a2, l, a3, h) /* pp 1 */
+ do_mul(a11, a2, h, a3, l) /* pp 2 */
+ movi a9, 0
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Shift the high half of a9/a6 into position in a9. Note that
+ this value can be safely incremented without any carry-outs. */
+ ssai 16
+ src a9, a9, a6
+
+ /* Compute the low word into a6. */
+ do_mul(a11, a2, l, a3, l) /* pp 0 */
+ sll a6, a6
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Compute the high word into wh. */
+ do_mul(wh, a2, h, a3, h) /* pp 3 */
+ add wh, wh, a9
+ mov wl, a6
+
+#endif /* !MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Restore the original return address. */
+ l32i a0, sp, 0
+#endif
+#if __XTENSA_CALL0_ABI__
+ l32i a12, sp, 16
+ l32i a13, sp, 20
+ l32i a14, sp, 24
+ l32i a15, sp, 28
+ addi sp, sp, 32
+#endif
+ leaf_return
+
+#if XCHAL_NO_MUL
+
+ /* For Xtensa processors with no multiply hardware, this simplified
+ version of _mulsi3 is used for multiplying 16-bit chunks of
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
+ .align 4
+.Lmul_mulsi3:
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
+
+ .size __umulsidi3, . - __umulsidi3
+
+#endif /* L_umulsidi3 */
+
+
+/* Define a macro for the NSAU (unsigned normalize shift amount)
+ instruction, which computes the number of leading zero bits,
+ to handle cases where it is not included in the Xtensa processor
+ configuration. */
+
+ .macro do_nsau cnt, val, tmp, a
+#if XCHAL_HAVE_NSA
+ nsau \cnt, \val
+#else
+ mov \a, \val
+ movi \cnt, 0
+ extui \tmp, \a, 16, 16
+ bnez \tmp, 0f
+ movi \cnt, 16
+ slli \a, \a, 16
+0:
+ extui \tmp, \a, 24, 8
+ bnez \tmp, 1f
+ addi \cnt, \cnt, 8
+ slli \a, \a, 8
+1:
+ movi \tmp, __nsau_data
+ extui \a, \a, 24, 8
+ add \tmp, \tmp, \a
+ l8ui \tmp, \tmp, 0
+ add \cnt, \cnt, \tmp
+#endif /* !XCHAL_HAVE_NSA */
+ .endm
+
+#ifdef L_clz
+ .section .rodata
+ .align 4
+ .global __nsau_data
+ .type __nsau_data, @object
+__nsau_data:
+#if !XCHAL_HAVE_NSA
+ .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
+ .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
+ .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+ .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+ .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#endif /* !XCHAL_HAVE_NSA */
+ .size __nsau_data, . - __nsau_data
+ .hidden __nsau_data
+#endif /* L_clz */
+
+
+#ifdef L_clzsi2
+ .align 4
+ .global __clzsi2
+ .type __clzsi2, @function
+__clzsi2:
+ leaf_entry sp, 16
+ do_nsau a2, a2, a3, a4
+ leaf_return
+ .size __clzsi2, . - __clzsi2
+
+#endif /* L_clzsi2 */
+
+
+#ifdef L_ctzsi2
+ .align 4
+ .global __ctzsi2
+ .type __ctzsi2, @function
+__ctzsi2:
+ leaf_entry sp, 16
+ neg a3, a2
+ and a3, a3, a2
+ do_nsau a2, a3, a4, a5
+ neg a2, a2
+ addi a2, a2, 31
+ leaf_return
+ .size __ctzsi2, . - __ctzsi2
+
+#endif /* L_ctzsi2 */
+
+
+#ifdef L_ffssi2
+ .align 4
+ .global __ffssi2
+ .type __ffssi2, @function
+__ffssi2:
+ leaf_entry sp, 16
+ neg a3, a2
+ and a3, a3, a2
+ do_nsau a2, a3, a4, a5
+ neg a2, a2
+ addi a2, a2, 32
+ leaf_return
+ .size __ffssi2, . - __ffssi2
+
+#endif /* L_ffssi2 */
+
+
+#ifdef L_udivsi3
+ .align 4
+ .global __udivsi3
+ .type __udivsi3, @function
+__udivsi3:
+ leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+ quou a2, a2, a3
+#else
+ bltui a3, 2, .Lle_one /* check if the divisor <= 1 */
+
+ mov a6, a2 /* keep dividend in a6 */
+ do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */
+ do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */
+ bgeu a5, a4, .Lspecial
+
+ sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
+ ssl a4
+ sll a3, a3 /* divisor <<= count */
+ movi a2, 0 /* quotient = 0 */
+
+ /* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+ bltu a6, a3, .Lzerobit
+ sub a6, a6, a3
+ addi a2, a2, 1
+.Lzerobit:
+ slli a2, a2, 1
+ srli a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+ addi a4, a4, -1
+ bnez a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+ bltu a6, a3, .Lreturn
+ addi a2, a2, 1 /* increment quotient if dividend >= divisor */
+.Lreturn:
+ leaf_return
+
+.Lle_one:
+ beqz a3, .Lerror /* if divisor == 1, return the dividend */
+ leaf_return
+
+.Lspecial:
+ /* return dividend >= divisor */
+ bltu a6, a3, .Lreturn0
+ movi a2, 1
+ leaf_return
+
+.Lerror:
+ /* Divide by zero: Use an illegal instruction to force an exception.
+ The subsequent "DIV0" string can be recognized by the exception
+ handler to identify the real cause of the exception. */
+ ill
+ .ascii "DIV0"
+
+.Lreturn0:
+ movi a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+ leaf_return
+ .size __udivsi3, . - __udivsi3
+
+#endif /* L_udivsi3 */
+
+
+#ifdef L_divsi3
+ .align 4
+ .global __divsi3
+ .type __divsi3, @function
+__divsi3:
+ leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+ quos a2, a2, a3
+#else
+ xor a7, a2, a3 /* sign = dividend ^ divisor */
+ do_abs a6, a2, a4 /* udividend = abs (dividend) */
+ do_abs a3, a3, a4 /* udivisor = abs (divisor) */
+ bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
+ do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */
+ do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */
+ bgeu a5, a4, .Lspecial
+
+ sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
+ ssl a4
+ sll a3, a3 /* udivisor <<= count */
+ movi a2, 0 /* quotient = 0 */
+
+ /* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+ bltu a6, a3, .Lzerobit
+ sub a6, a6, a3
+ addi a2, a2, 1
+.Lzerobit:
+ slli a2, a2, 1
+ srli a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+ addi a4, a4, -1
+ bnez a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+ bltu a6, a3, .Lreturn
+ addi a2, a2, 1 /* increment if udividend >= udivisor */
+.Lreturn:
+ neg a5, a2
+ movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */
+ leaf_return
+
+.Lle_one:
+ beqz a3, .Lerror
+ neg a2, a6 /* if udivisor == 1, then return... */
+ movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */
+ leaf_return
+
+.Lspecial:
+ bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
+ movi a2, 1
+ movi a4, -1
+ movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */
+ leaf_return
+
+.Lerror:
+ /* Divide by zero: Use an illegal instruction to force an exception.
+ The subsequent "DIV0" string can be recognized by the exception
+ handler to identify the real cause of the exception. */
+ ill
+ .ascii "DIV0"
+
+.Lreturn0:
+ movi a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+ leaf_return
+ .size __divsi3, . - __divsi3
+
+#endif /* L_divsi3 */
+
+
+#ifdef L_umodsi3
+ .align 4
+ .global __umodsi3
+ .type __umodsi3, @function
+__umodsi3:
+ leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+ remu a2, a2, a3
+#else
+ bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */
+
+ do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */
+ do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */
+ bgeu a5, a4, .Lspecial
+
+ sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
+ ssl a4
+ sll a3, a3 /* divisor <<= count */
+
+ /* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+ bltu a2, a3, .Lzerobit
+ sub a2, a2, a3
+.Lzerobit:
+ srli a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+ addi a4, a4, -1
+ bnez a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+ bltu a2, a3, .Lreturn
+ sub a2, a2, a3 /* subtract once more if dividend >= divisor */
+.Lreturn:
+ leaf_return
+
+.Lle_one:
+ bnez a3, .Lreturn0
+
+ /* Divide by zero: Use an illegal instruction to force an exception.
+ The subsequent "DIV0" string can be recognized by the exception
+ handler to identify the real cause of the exception. */
+ ill
+ .ascii "DIV0"
+
+.Lreturn0:
+ movi a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+ leaf_return
+ .size __umodsi3, . - __umodsi3
+
+#endif /* L_umodsi3 */
+
+
+#ifdef L_modsi3
+ .align 4
+ .global __modsi3
+ .type __modsi3, @function
+__modsi3:
+ leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+ rems a2, a2, a3
+#else
+ mov a7, a2 /* save original (signed) dividend */
+ do_abs a2, a2, a4 /* udividend = abs (dividend) */
+ do_abs a3, a3, a4 /* udivisor = abs (divisor) */
+ bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
+ do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */
+ do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */
+ bgeu a5, a4, .Lspecial
+
+ sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
+ ssl a4
+ sll a3, a3 /* udivisor <<= count */
+
+ /* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+ bltu a2, a3, .Lzerobit
+ sub a2, a2, a3
+.Lzerobit:
+ srli a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+ addi a4, a4, -1
+ bnez a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+ bltu a2, a3, .Lreturn
+ sub a2, a2, a3 /* subtract again if udividend >= udivisor */
+.Lreturn:
+ bgez a7, .Lpositive
+ neg a2, a2 /* if (dividend < 0), return -udividend */
+.Lpositive:
+ leaf_return
+
+.Lle_one:
+ bnez a3, .Lreturn0
+
+ /* Divide by zero: Use an illegal instruction to force an exception.
+ The subsequent "DIV0" string can be recognized by the exception
+ handler to identify the real cause of the exception. */
+ ill
+ .ascii "DIV0"
+
+.Lreturn0:
+ movi a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+ leaf_return
+ .size __modsi3, . - __modsi3
+
+#endif /* L_modsi3 */
+
+
+#ifdef __XTENSA_EB__
+#define uh a2
+#define ul a3
+#else
+#define uh a3
+#define ul a2
+#endif /* __XTENSA_EB__ */
+
+
+#ifdef L_ashldi3
+ .align 4
+ .global __ashldi3
+ .type __ashldi3, @function
+__ashldi3:
+ leaf_entry sp, 16
+ ssl a4
+ bgei a4, 32, .Llow_only
+ src uh, uh, ul
+ sll ul, ul
+ leaf_return
+
+.Llow_only:
+ sll uh, ul
+ movi ul, 0
+ leaf_return
+ .size __ashldi3, . - __ashldi3
+
+#endif /* L_ashldi3 */
+
+
+#ifdef L_ashrdi3
+ .align 4
+ .global __ashrdi3
+ .type __ashrdi3, @function
+__ashrdi3:
+ leaf_entry sp, 16
+ ssr a4
+ bgei a4, 32, .Lhigh_only
+ src ul, uh, ul
+ sra uh, uh
+ leaf_return
+
+.Lhigh_only:
+ sra ul, uh
+ srai uh, uh, 31
+ leaf_return
+ .size __ashrdi3, . - __ashrdi3
+
+#endif /* L_ashrdi3 */
+
+
+#ifdef L_lshrdi3
+ .align 4
+ .global __lshrdi3
+ .type __lshrdi3, @function
+__lshrdi3:
+ leaf_entry sp, 16
+ ssr a4
+ bgei a4, 32, .Lhigh_only1
+ src ul, uh, ul
+ srl uh, uh
+ leaf_return
+
+.Lhigh_only1:
+ srl ul, uh
+ movi uh, 0
+ leaf_return
+ .size __lshrdi3, . - __lshrdi3
+
+#endif /* L_lshrdi3 */
+
+
+#include "ieee754-df.S"
+#include "ieee754-sf.S"
diff --git a/gcc-4.9/libgcc/config/xtensa/lib2funcs.S b/gcc-4.9/libgcc/config/xtensa/lib2funcs.S
new file mode 100644
index 000000000..3ac8c1da2
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/lib2funcs.S
@@ -0,0 +1,186 @@
+/* Assembly functions for libgcc2.
+ Copyright (C) 2001-2014 Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "xtensa-config.h"
+
+/* __xtensa_libgcc_window_spill: This function flushes out all but the
+ current register window. This is used to set up the stack so that
+ arbitrary frames can be accessed. */
+
+ .align 4
+ .global __xtensa_libgcc_window_spill
+ .type __xtensa_libgcc_window_spill,@function
+__xtensa_libgcc_window_spill:
+ entry sp, 32
+ movi a2, 0
+ syscall
+ retw
+ .size __xtensa_libgcc_window_spill, .-__xtensa_libgcc_window_spill
+
+
+/* __xtensa_nonlocal_goto: This code does all the hard work of a
+ nonlocal goto on Xtensa. It is here in the library to avoid the
+ code size bloat of generating it in-line. There are two
+ arguments:
+
+ a2 = frame pointer for the procedure containing the label
+ a3 = goto handler address
+
+ This function never returns to its caller but instead goes directly
+ to the address of the specified goto handler. */
+
+ .align 4
+ .global __xtensa_nonlocal_goto
+ .type __xtensa_nonlocal_goto,@function
+__xtensa_nonlocal_goto:
+ entry sp, 32
+
+ /* Flush registers. */
+ mov a5, a2
+ movi a2, 0
+ syscall
+ mov a2, a5
+
+ /* Because the save area for a0-a3 is stored one frame below
+ the one identified by a2, the only way to restore those
+ registers is to unwind the stack. If alloca() were never
+ called, we could just unwind until finding the sp value
+ matching a2. However, a2 is a frame pointer, not a stack
+ pointer, and may not be encountered during the unwinding.
+ The solution is to unwind until going _past_ the value
+ given by a2. This involves keeping three stack pointer
+ values during the unwinding:
+
+ next = sp of frame N-1
+ cur = sp of frame N
+ prev = sp of frame N+1
+
+ When next > a2, the desired save area is stored relative
+ to prev. At this point, cur will be the same as a2
+ except in the alloca() case.
+
+ Besides finding the values to be restored to a0-a3, we also
+ need to find the current window size for the target
+ function. This can be extracted from the high bits of the
+ return address, initially in a0. As the unwinding
+ proceeds, the window size is taken from the value of a0
+ saved _two_ frames below the current frame. */
+
+ addi a5, sp, -16 /* a5 = prev - save area */
+ l32i a6, a5, 4
+ addi a6, a6, -16 /* a6 = cur - save area */
+ mov a8, a0 /* a8 = return address (for window size) */
+ j .Lfirstframe
+
+.Lnextframe:
+ l32i a8, a5, 0 /* next return address (for window size) */
+ mov a5, a6 /* advance prev */
+ addi a6, a7, -16 /* advance cur */
+.Lfirstframe:
+ l32i a7, a6, 4 /* a7 = next */
+ bgeu a2, a7, .Lnextframe
+
+ /* At this point, prev (a5) points to the save area with the saved
+ values of a0-a3. Copy those values into the save area at the
+ current sp so they will be reloaded when the return from this
+ function underflows. We don't have to worry about exceptions
+ while updating the current save area, because the windows have
+ already been flushed. */
+
+ addi a4, sp, -16 /* a4 = save area of this function */
+ l32i a6, a5, 0
+ l32i a7, a5, 4
+ s32i a6, a4, 0
+ s32i a7, a4, 4
+ l32i a6, a5, 8
+ l32i a7, a5, 12
+ s32i a6, a4, 8
+ s32i a7, a4, 12
+
+ /* Set return address to goto handler. Use the window size bits
+ from the return address two frames below the target. */
+ extui a8, a8, 30, 2 /* get window size from return addr. */
+ slli a3, a3, 2 /* get goto handler addr. << 2 */
+ ssai 2
+ src a0, a8, a3 /* combine them with a funnel shift */
+
+ retw
+ .size __xtensa_nonlocal_goto, .-__xtensa_nonlocal_goto
+
+
+/* __xtensa_sync_caches: This function is called after writing a trampoline
+ on the stack to force all the data writes to memory and invalidate the
+ instruction cache. a2 is the address of the new trampoline.
+
+ After the trampoline data is written out, it must be flushed out of
+ the data cache into memory. We use DHWB in case we have a writeback
+ cache. At least one DHWB instruction is needed for each data cache
+ line which may be touched by the trampoline. An ISYNC instruction
+ must follow the DHWBs.
+
+ We have to flush the i-cache to make sure that the new values get used.
+ At least one IHI instruction is needed for each i-cache line which may
+ be touched by the trampoline. An ISYNC instruction is also needed to
+ make sure that the modified instructions are loaded into the instruction
+ fetch buffer. */
+
+/* Use the maximum trampoline size. Flushing a bit extra is OK. */
+#define TRAMPOLINE_SIZE 60
+
+ .text
+ .align 4
+ .global __xtensa_sync_caches
+ .type __xtensa_sync_caches,@function
+__xtensa_sync_caches:
+ entry sp, 32
+#if XCHAL_DCACHE_SIZE > 0
+ /* Flush the trampoline from the data cache. */
+ extui a4, a2, 0, XCHAL_DCACHE_LINEWIDTH
+ addi a4, a4, TRAMPOLINE_SIZE
+ addi a4, a4, (1 << XCHAL_DCACHE_LINEWIDTH) - 1
+ srli a4, a4, XCHAL_DCACHE_LINEWIDTH
+ mov a3, a2
+.Ldcache_loop:
+ dhwb a3, 0
+ addi a3, a3, (1 << XCHAL_DCACHE_LINEWIDTH)
+ addi a4, a4, -1
+ bnez a4, .Ldcache_loop
+ isync
+#endif
+#if XCHAL_ICACHE_SIZE > 0
+ /* Invalidate the corresponding lines in the instruction cache. */
+ extui a4, a2, 0, XCHAL_ICACHE_LINEWIDTH
+ addi a4, a4, TRAMPOLINE_SIZE
+ addi a4, a4, (1 << XCHAL_ICACHE_LINEWIDTH) - 1
+ srli a4, a4, XCHAL_ICACHE_LINEWIDTH
+.Licache_loop:
+ ihi a2, 0
+ addi a2, a2, (1 << XCHAL_ICACHE_LINEWIDTH)
+ addi a4, a4, -1
+ bnez a4, .Licache_loop
+#endif
+ isync
+ retw
+ .size __xtensa_sync_caches, .-__xtensa_sync_caches
diff --git a/gcc-4.9/libgcc/config/xtensa/libgcc-glibc.ver b/gcc-4.9/libgcc/config/xtensa/libgcc-glibc.ver
new file mode 100644
index 000000000..43e7d4fc7
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/libgcc-glibc.ver
@@ -0,0 +1,3 @@
+GCC_4.3.0 {
+ __umulsidi3
+}
diff --git a/gcc-4.9/libgcc/config/xtensa/linux-unwind.h b/gcc-4.9/libgcc/config/xtensa/linux-unwind.h
new file mode 100644
index 000000000..6832d0b48
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/linux-unwind.h
@@ -0,0 +1,97 @@
+/* DWARF2 EH unwinding support for Xtensa.
+ Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Do code reading to identify a signal frame, and set the frame
+ state data appropriately. See unwind-dw2-xtensa.c for the structs.
+ Don't use this at all if inhibit_libc is used. */
+
+#ifndef inhibit_libc
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+/* Encoded bytes for Xtensa instructions:
+ movi a2, __NR_rt_sigreturn
+ syscall
+ entry (first byte only)
+ Some of the bytes are endian-dependent. */
+
+#define MOVI_BYTE0 0x22
+#define MOVI_BYTE2 225 /* __NR_rt_sigreturn */
+#define SYSC_BYTE0 0
+#define SYSC_BYTE2 0
+
+#ifdef __XTENSA_EB__
+#define MOVI_BYTE1 0x0a
+#define SYSC_BYTE1 0x05
+#define ENTRY_BYTE 0x6c
+#else
+#define MOVI_BYTE1 0xa0
+#define SYSC_BYTE1 0x50
+#define ENTRY_BYTE 0x36
+#endif
+
+#define MD_FALLBACK_FRAME_STATE_FOR xtensa_fallback_frame_state
+
+static _Unwind_Reason_Code
+xtensa_fallback_frame_state (struct _Unwind_Context *context,
+ _Unwind_FrameState *fs)
+{
+ unsigned char *pc = context->ra;
+ struct sigcontext *sc;
+
+ struct rt_sigframe {
+ siginfo_t info;
+ struct ucontext uc;
+ } *rt_;
+
+ /* movi a2, __NR_rt_sigreturn; syscall */
+ if (pc[0] != MOVI_BYTE0
+ || pc[1] != MOVI_BYTE1
+ || pc[2] != MOVI_BYTE2
+ || pc[3] != SYSC_BYTE0
+ || pc[4] != SYSC_BYTE1
+ || pc[5] != SYSC_BYTE2)
+ return _URC_END_OF_STACK;
+
+ rt_ = context->sp;
+ sc = &rt_->uc.uc_mcontext;
+ fs->signal_regs = (_Unwind_Word *) sc->sc_a;
+
+ /* If the signal arrived just before an ENTRY instruction, find the return
+ address and pretend the signal arrived before executing the CALL. */
+ if (*(unsigned char *) sc->sc_pc == ENTRY_BYTE)
+ {
+ unsigned callinc = (sc->sc_ps >> 16) & 3;
+ fs->signal_ra = ((sc->sc_a[callinc << 2] & XTENSA_RA_FIELD_MASK)
+ | context->ra_high_bits) - 3;
+ }
+ else
+ fs->signal_ra = sc->sc_pc;
+
+ fs->signal_frame = 1;
+ return _URC_NO_REASON;
+}
+
+#endif /* ifdef inhibit_libc */
diff --git a/gcc-4.9/libgcc/config/xtensa/t-elf b/gcc-4.9/libgcc/config/xtensa/t-elf
new file mode 100644
index 000000000..59d51210b
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/t-elf
@@ -0,0 +1,5 @@
+# Build CRT files and libgcc with the "longcalls" option
+CRTSTUFF_T_CFLAGS += -mlongcalls
+CRTSTUFF_T_CFLAGS_S += -mlongcalls
+
+HOST_LIBGCC2_CFLAGS += -mlongcalls
diff --git a/gcc-4.9/libgcc/config/xtensa/t-linux b/gcc-4.9/libgcc/config/xtensa/t-linux
new file mode 100644
index 000000000..6f4ae8934
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/t-linux
@@ -0,0 +1 @@
+SHLIB_MAPFILES += $(srcdir)/config/xtensa/libgcc-glibc.ver
diff --git a/gcc-4.9/libgcc/config/xtensa/t-xtensa b/gcc-4.9/libgcc/config/xtensa/t-xtensa
new file mode 100644
index 000000000..27399e67f
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/t-xtensa
@@ -0,0 +1,16 @@
+LIB1ASMSRC = xtensa/lib1funcs.S
+LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
+ _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \
+ _ashldi3 _ashrdi3 _lshrdi3 \
+ _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
+ _fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
+ _floatdisf _floatundisf \
+ _negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \
+ _fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \
+ _floatdidf _floatundidf \
+ _truncdfsf2 _extendsfdf2
+
+LIB2ADD = $(srcdir)/config/xtensa/lib2funcs.S
+
+LIB2ADDEH = $(srcdir)/config/xtensa/unwind-dw2-xtensa.c \
+ $(srcdir)/unwind-dw2-fde.c $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
diff --git a/gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.c b/gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.c
new file mode 100644
index 000000000..35f7797d4
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.c
@@ -0,0 +1,543 @@
+/* DWARF2 exception handling and frame unwinding for Xtensa.
+ Copyright (C) 1997-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "libgcc_tm.h"
+#include "dwarf2.h"
+#include "unwind.h"
+#ifdef __USING_SJLJ_EXCEPTIONS__
+# define NO_SIZE_OF_ENCODED_VALUE
+#endif
+#include "unwind-pe.h"
+#include "unwind-dw2-fde.h"
+#include "unwind-dw2-xtensa.h"
+
+#ifndef __USING_SJLJ_EXCEPTIONS__
+
+/* The standard CIE and FDE structures work fine for Xtensa but the
+ variable-size register window save areas are not a good fit for the rest
+ of the standard DWARF unwinding mechanism. Nor is that mechanism
+ necessary, since the register save areas are always in fixed locations
+ in each stack frame. This file is a stripped down and customized version
+ of the standard DWARF unwinding code. It needs to be customized to have
+ builtin logic for finding the save areas and also to track the stack
+ pointer value (besides the CFA) while unwinding since the primary save
+ area is located below the stack pointer. It is stripped down to reduce
+ code size and ease the maintenance burden of tracking changes in the
+ standard version of the code. */
+
+#ifndef DWARF_REG_TO_UNWIND_COLUMN
+#define DWARF_REG_TO_UNWIND_COLUMN(REGNO) (REGNO)
+#endif
+
+#define XTENSA_RA_FIELD_MASK 0x3FFFFFFF
+
+/* This is the register and unwind state for a particular frame. This
+ provides the information necessary to unwind up past a frame and return
+ to its caller. */
+struct _Unwind_Context
+{
+ /* Track register window save areas of 4 registers each, instead of
+ keeping separate addresses for the individual registers. */
+ _Unwind_Word *reg[4];
+
+ void *cfa;
+ void *sp;
+ void *ra;
+
+ /* Cache the 2 high bits to replace the window size in return addresses. */
+ _Unwind_Word ra_high_bits;
+
+ void *lsda;
+ struct dwarf_eh_bases bases;
+ /* Signal frame context. */
+#define SIGNAL_FRAME_BIT ((~(_Unwind_Word) 0 >> 1) + 1)
+ _Unwind_Word flags;
+ /* 0 for now, can be increased when further fields are added to
+ struct _Unwind_Context. */
+ _Unwind_Word version;
+};
+
+
+/* Read unaligned data from the instruction buffer. */
+
+union unaligned
+{
+ void *p;
+} __attribute__ ((packed));
+
+static void uw_update_context (struct _Unwind_Context *, _Unwind_FrameState *);
+static _Unwind_Reason_Code uw_frame_state_for (struct _Unwind_Context *,
+ _Unwind_FrameState *);
+
+static inline void *
+read_pointer (const void *p) { const union unaligned *up = p; return up->p; }
+
+static inline _Unwind_Word
+_Unwind_IsSignalFrame (struct _Unwind_Context *context)
+{
+ return (context->flags & SIGNAL_FRAME_BIT) ? 1 : 0;
+}
+
+static inline void
+_Unwind_SetSignalFrame (struct _Unwind_Context *context, int val)
+{
+ if (val)
+ context->flags |= SIGNAL_FRAME_BIT;
+ else
+ context->flags &= ~SIGNAL_FRAME_BIT;
+}
+
+/* Get the value of register INDEX as saved in CONTEXT. */
+
+inline _Unwind_Word
+_Unwind_GetGR (struct _Unwind_Context *context, int index)
+{
+ _Unwind_Word *ptr;
+
+ index = DWARF_REG_TO_UNWIND_COLUMN (index);
+ ptr = context->reg[index >> 2] + (index & 3);
+
+ return *ptr;
+}
+
+/* Get the value of the CFA as saved in CONTEXT. */
+
+_Unwind_Word
+_Unwind_GetCFA (struct _Unwind_Context *context)
+{
+ return (_Unwind_Ptr) context->cfa;
+}
+
+/* Overwrite the saved value for register INDEX in CONTEXT with VAL. */
+
+inline void
+_Unwind_SetGR (struct _Unwind_Context *context, int index, _Unwind_Word val)
+{
+ _Unwind_Word *ptr;
+
+ index = DWARF_REG_TO_UNWIND_COLUMN (index);
+ ptr = context->reg[index >> 2] + (index & 3);
+
+ *ptr = val;
+}
+
+/* Retrieve the return address for CONTEXT. */
+
+inline _Unwind_Ptr
+_Unwind_GetIP (struct _Unwind_Context *context)
+{
+ return (_Unwind_Ptr) context->ra;
+}
+
+/* Retrieve the return address and flag whether that IP is before
+ or after first not yet fully executed instruction. */
+
+inline _Unwind_Ptr
+_Unwind_GetIPInfo (struct _Unwind_Context *context, int *ip_before_insn)
+{
+ *ip_before_insn = _Unwind_IsSignalFrame (context);
+ return (_Unwind_Ptr) context->ra;
+}
+
+/* Overwrite the return address for CONTEXT with VAL. */
+
+inline void
+_Unwind_SetIP (struct _Unwind_Context *context, _Unwind_Ptr val)
+{
+ context->ra = (void *) val;
+}
+
+void *
+_Unwind_GetLanguageSpecificData (struct _Unwind_Context *context)
+{
+ return context->lsda;
+}
+
+_Unwind_Ptr
+_Unwind_GetRegionStart (struct _Unwind_Context *context)
+{
+ return (_Unwind_Ptr) context->bases.func;
+}
+
+void *
+_Unwind_FindEnclosingFunction (void *pc)
+{
+ struct dwarf_eh_bases bases;
+ const struct dwarf_fde *fde = _Unwind_Find_FDE (pc-1, &bases);
+ if (fde)
+ return bases.func;
+ else
+ return NULL;
+}
+
+_Unwind_Ptr
+_Unwind_GetDataRelBase (struct _Unwind_Context *context)
+{
+ return (_Unwind_Ptr) context->bases.dbase;
+}
+
+_Unwind_Ptr
+_Unwind_GetTextRelBase (struct _Unwind_Context *context)
+{
+ return (_Unwind_Ptr) context->bases.tbase;
+}
+
+#include "md-unwind-support.h"
+
+/* Extract any interesting information from the CIE for the translation
+ unit F belongs to. Return a pointer to the byte after the augmentation,
+ or NULL if we encountered an undecipherable augmentation. */
+
+static const unsigned char *
+extract_cie_info (const struct dwarf_cie *cie, struct _Unwind_Context *context,
+ _Unwind_FrameState *fs)
+{
+ const unsigned char *aug = cie->augmentation;
+ const unsigned char *p = aug + strlen ((const char *)aug) + 1;
+ const unsigned char *ret = NULL;
+ _uleb128_t utmp;
+ _sleb128_t stmp;
+
+ /* g++ v2 "eh" has pointer immediately following augmentation string,
+ so it must be handled first. */
+ if (aug[0] == 'e' && aug[1] == 'h')
+ {
+ fs->eh_ptr = read_pointer (p);
+ p += sizeof (void *);
+ aug += 2;
+ }
+
+ /* Immediately following the augmentation are the code and
+ data alignment and return address column. */
+ p = read_uleb128 (p, &utmp);
+ p = read_sleb128 (p, &stmp);
+ if (cie->version == 1)
+ fs->retaddr_column = *p++;
+ else
+ {
+ p = read_uleb128 (p, &utmp);
+ fs->retaddr_column = (_Unwind_Word)utmp;
+ }
+ fs->lsda_encoding = DW_EH_PE_omit;
+
+ /* If the augmentation starts with 'z', then a uleb128 immediately
+ follows containing the length of the augmentation field following
+ the size. */
+ if (*aug == 'z')
+ {
+ p = read_uleb128 (p, &utmp);
+ ret = p + utmp;
+
+ fs->saw_z = 1;
+ ++aug;
+ }
+
+ /* Iterate over recognized augmentation subsequences. */
+ while (*aug != '\0')
+ {
+ /* "L" indicates a byte showing how the LSDA pointer is encoded. */
+ if (aug[0] == 'L')
+ {
+ fs->lsda_encoding = *p++;
+ aug += 1;
+ }
+
+ /* "R" indicates a byte indicating how FDE addresses are encoded. */
+ else if (aug[0] == 'R')
+ {
+ fs->fde_encoding = *p++;
+ aug += 1;
+ }
+
+ /* "P" indicates a personality routine in the CIE augmentation. */
+ else if (aug[0] == 'P')
+ {
+ _Unwind_Ptr personality;
+
+ p = read_encoded_value (context, *p, p + 1, &personality);
+ fs->personality = (_Unwind_Personality_Fn) personality;
+ aug += 1;
+ }
+
+ /* "S" indicates a signal frame. */
+ else if (aug[0] == 'S')
+ {
+ fs->signal_frame = 1;
+ aug += 1;
+ }
+
+ /* Otherwise we have an unknown augmentation string.
+ Bail unless we saw a 'z' prefix. */
+ else
+ return ret;
+ }
+
+ return ret ? ret : p;
+}
+
+/* Given the _Unwind_Context CONTEXT for a stack frame, look up the FDE for
+ its caller and decode it into FS. This function also sets the
+ lsda member of CONTEXT, as it is really information
+ about the caller's frame. */
+
+static _Unwind_Reason_Code
+uw_frame_state_for (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+ const struct dwarf_fde *fde;
+ const struct dwarf_cie *cie;
+ const unsigned char *aug;
+ int window_size;
+ _Unwind_Word *ra_ptr;
+
+ memset (fs, 0, sizeof (*fs));
+ context->lsda = 0;
+
+ fde = _Unwind_Find_FDE (context->ra + _Unwind_IsSignalFrame (context) - 1,
+ &context->bases);
+ if (fde == NULL)
+ {
+#ifdef MD_FALLBACK_FRAME_STATE_FOR
+ _Unwind_Reason_Code reason;
+ /* Couldn't find frame unwind info for this function. Try a
+ target-specific fallback mechanism. This will necessarily
+ not provide a personality routine or LSDA. */
+ reason = MD_FALLBACK_FRAME_STATE_FOR (context, fs);
+ if (reason != _URC_END_OF_STACK)
+ return reason;
+#endif
+ /* The frame was not recognized and handled by the fallback function,
+ but it is not really the end of the stack. Fall through here and
+ unwind it anyway. */
+ }
+ else
+ {
+ cie = get_cie (fde);
+ if (extract_cie_info (cie, context, fs) == NULL)
+ /* CIE contained unknown augmentation. */
+ return _URC_FATAL_PHASE1_ERROR;
+
+ /* Locate augmentation for the fde. */
+ aug = (const unsigned char *) fde + sizeof (*fde);
+ aug += 2 * size_of_encoded_value (fs->fde_encoding);
+ if (fs->saw_z)
+ {
+ _uleb128_t i;
+ aug = read_uleb128 (aug, &i);
+ }
+ if (fs->lsda_encoding != DW_EH_PE_omit)
+ {
+ _Unwind_Ptr lsda;
+
+ aug = read_encoded_value (context, fs->lsda_encoding, aug, &lsda);
+ context->lsda = (void *) lsda;
+ }
+ }
+
+ /* Check for the end of the stack. This needs to be checked after
+ the MD_FALLBACK_FRAME_STATE_FOR check for signal frames because
+ the contents of context->reg[0] are undefined at a signal frame,
+ and register a0 may appear to be zero. (The return address in
+ context->ra comes from register a4 or a8). */
+ ra_ptr = context->reg[0];
+ if (ra_ptr && *ra_ptr == 0)
+ return _URC_END_OF_STACK;
+
+ /* Find the window size from the high bits of the return address. */
+ if (ra_ptr)
+ window_size = (*ra_ptr >> 30) * 4;
+ else
+ window_size = 8;
+
+ fs->retaddr_column = window_size;
+
+ return _URC_NO_REASON;
+}
+
+static void
+uw_update_context_1 (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+ struct _Unwind_Context orig_context = *context;
+ _Unwind_Word *sp, *cfa, *next_cfa;
+ int i;
+
+ if (fs->signal_regs)
+ {
+ cfa = (_Unwind_Word *) fs->signal_regs[1];
+ next_cfa = (_Unwind_Word *) cfa[-3];
+
+ for (i = 0; i < 4; i++)
+ context->reg[i] = fs->signal_regs + (i << 2);
+ }
+ else
+ {
+ int window_size = fs->retaddr_column >> 2;
+
+ sp = (_Unwind_Word *) orig_context.sp;
+ cfa = (_Unwind_Word *) orig_context.cfa;
+ next_cfa = (_Unwind_Word *) cfa[-3];
+
+ /* Registers a0-a3 are in the save area below sp. */
+ context->reg[0] = sp - 4;
+
+ /* Find the extra save area below next_cfa. */
+ for (i = 1; i < window_size; i++)
+ context->reg[i] = next_cfa - 4 * (1 + window_size - i);
+
+ /* Remaining registers rotate from previous save areas. */
+ for (i = window_size; i < 4; i++)
+ context->reg[i] = orig_context.reg[i - window_size];
+ }
+
+ context->sp = cfa;
+ context->cfa = next_cfa;
+
+ _Unwind_SetSignalFrame (context, fs->signal_frame);
+}
+
+/* CONTEXT describes the unwind state for a frame, and FS describes the FDE
+ of its caller. Update CONTEXT to refer to the caller as well. Note
+ that the lsda member is not updated here, but later in
+ uw_frame_state_for. */
+
+static void
+uw_update_context (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+ uw_update_context_1 (context, fs);
+
+ /* Compute the return address now, since the return address column
+ can change from frame to frame. */
+ if (fs->signal_ra != 0)
+ context->ra = (void *) fs->signal_ra;
+ else
+ context->ra = (void *) ((_Unwind_GetGR (context, fs->retaddr_column)
+ & XTENSA_RA_FIELD_MASK) | context->ra_high_bits);
+}
+
+static void
+uw_advance_context (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+ uw_update_context (context, fs);
+}
+
+/* Fill in CONTEXT for top-of-stack. The only valid registers at this
+ level will be the return address and the CFA. */
+
+#define uw_init_context(CONTEXT) \
+ do \
+ { \
+ __builtin_unwind_init (); \
+ uw_init_context_1 (CONTEXT, __builtin_dwarf_cfa (), \
+ __builtin_return_address (0)); \
+ } \
+ while (0)
+
+static void __attribute__((noinline))
+uw_init_context_1 (struct _Unwind_Context *context, void *outer_cfa,
+ void *outer_ra)
+{
+ void *ra = __builtin_return_address (0);
+ void *cfa = __builtin_dwarf_cfa ();
+ _Unwind_FrameState fs;
+
+ memset (context, 0, sizeof (struct _Unwind_Context));
+ context->ra = ra;
+
+ memset (&fs, 0, sizeof (fs));
+ fs.retaddr_column = 8;
+ context->sp = cfa;
+ context->cfa = outer_cfa;
+ context->ra_high_bits =
+ ((_Unwind_Word) uw_init_context_1) & ~XTENSA_RA_FIELD_MASK;
+ uw_update_context_1 (context, &fs);
+
+ context->ra = outer_ra;
+}
+
+
+/* Install TARGET into CURRENT so that we can return to it. This is a
+ macro because __builtin_eh_return must be invoked in the context of
+ our caller. */
+
+#define uw_install_context(CURRENT, TARGET) \
+ do \
+ { \
+ long offset = uw_install_context_1 ((CURRENT), (TARGET)); \
+ void *handler = __builtin_frob_return_addr ((TARGET)->ra); \
+ __builtin_eh_return (offset, handler); \
+ } \
+ while (0)
+
+static long
+uw_install_context_1 (struct _Unwind_Context *current,
+ struct _Unwind_Context *target)
+{
+ long i;
+
+ /* The eh_return insn assumes a window size of 8, so don't bother copying
+ the save areas for registers a8-a15 since they won't be reloaded. */
+ for (i = 0; i < 2; ++i)
+ {
+ void *c = current->reg[i];
+ void *t = target->reg[i];
+
+ if (t && c && t != c)
+ memcpy (c, t, 4 * sizeof (_Unwind_Word));
+ }
+
+ return 0;
+}
+
+static inline _Unwind_Ptr
+uw_identify_context (struct _Unwind_Context *context)
+{
+ return _Unwind_GetCFA (context);
+}
+
+
+#include "unwind.inc"
+
+#if defined (USE_GAS_SYMVER) && defined (SHARED) && defined (USE_LIBUNWIND_EXCEPTIONS)
+alias (_Unwind_Backtrace);
+alias (_Unwind_DeleteException);
+alias (_Unwind_FindEnclosingFunction);
+alias (_Unwind_ForcedUnwind);
+alias (_Unwind_GetDataRelBase);
+alias (_Unwind_GetTextRelBase);
+alias (_Unwind_GetCFA);
+alias (_Unwind_GetGR);
+alias (_Unwind_GetIP);
+alias (_Unwind_GetLanguageSpecificData);
+alias (_Unwind_GetRegionStart);
+alias (_Unwind_RaiseException);
+alias (_Unwind_Resume);
+alias (_Unwind_Resume_or_Rethrow);
+alias (_Unwind_SetGR);
+alias (_Unwind_SetIP);
+#endif
+
+#endif /* !USING_SJLJ_EXCEPTIONS */
diff --git a/gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.h b/gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.h
new file mode 100644
index 000000000..c962ebb13
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.h
@@ -0,0 +1,49 @@
+/* DWARF2 frame unwind data structure for Xtensa.
+ Copyright (C) 1997-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* A target can override (perhaps for backward compatibility) how
+ many dwarf2 columns are unwound. */
+#ifndef DWARF_FRAME_REGISTERS
+#define DWARF_FRAME_REGISTERS FIRST_PSEUDO_REGISTER
+#endif
+
+/* Xtensa's variable-size register window save areas can be unwound without
+ any unwind info. This is a stripped down version of the standard DWARF
+ _Unwind_FrameState. */
+typedef struct
+{
+ /* The information we care about from the CIE/FDE. */
+ _Unwind_Personality_Fn personality;
+ _Unwind_Word retaddr_column;
+ unsigned char fde_encoding;
+ unsigned char lsda_encoding;
+ unsigned char saw_z;
+ unsigned char signal_frame;
+ void *eh_ptr;
+
+ /* Saved registers for a signal frame. */
+ _Unwind_Word *signal_regs;
+ _Unwind_Word signal_ra;
+} _Unwind_FrameState;
+